Drop Cloog support

This commit drops the Cloog support for Polly. The scripts and
  documentation are changed to only use isl as prerequisity. In the code
  all Cloog specific parts have been removed and all relevant tests have
  been ported to the isl backend when it was created.

llvm-svn: 223141
This commit is contained in:
Johannes Doerfert 2014-12-02 19:26:58 +00:00
parent 1e6e41bdc5
commit 305fed96e6
132 changed files with 184 additions and 8031 deletions

View File

@ -89,16 +89,6 @@ if(PLUTO_FOUND)
set(POLLY_LINK_LIBS ${POLLY_LINK_LIBS} ${PLUTO_LIBRARY})
endif(PLUTO_FOUND)
option(POLLY_USE_CLOOG "Build Polly with Cloog support" ON)
if(POLLY_USE_CLOOG)
# Build Cloog support in Polly (default is for cloog-isl).
FIND_PACKAGE(Cloog)
FIND_PACKAGE(Gmp REQUIRED)
if(CLOOG_FOUND)
set(POLLY_LINK_LIBS ${POLLY_LINK_LIBS} ${CLOOG_LIBRARY})
endif(CLOOG_FOUND)
endif(POLLY_USE_CLOOG)
if(GMP_FOUND)
set(POLLY_LINK_LIBS ${POLLY_LINK_LIBS} ${GMP_LIBRARY})
endif(GMP_FOUND)
@ -111,9 +101,6 @@ if (POLLY_ENABLE_GPGPU_CODEGEN)
set(GPU_CODEGEN TRUE)
endif(POLLY_ENABLE_GPGPU_CODEGEN)
if (CLOOG_FOUND)
INCLUDE_DIRECTORIES( ${CLOOG_INCLUDE_DIR} )
endif(CLOOG_FOUND)
if (PLUTO_FOUND)
INCLUDE_DIRECTORIES( ${PLUTO_INCLUDE_DIR} )
endif(PLUTO_FOUND)

View File

@ -29,13 +29,12 @@ endif
POLLY_CXXFLAGS += "-fno-rtti -fno-exceptions"
PLUTO_FOUND := @pluto_found@
CLOOG_FOUND := @cloog_found@
CUDALIB_FOUND := @cuda_found@
# Set include directroys
POLLY_INC := @gmp_inc@ @isl_inc@ @cloog_inc@ @cuda_inc@\
POLLY_INC := @gmp_inc@ @isl_inc@ @cuda_inc@\
@pluto_inc@ -I$(POLLY_SRC_ROOT)/lib/JSON/include
POLLY_LD := @gmp_ld@ @isl_ld@ @cloog_ld@ @cuda_ld@ @pluto_ld@
POLLY_LD := @gmp_ld@ @isl_ld@ @cuda_ld@ @pluto_ld@
POLLY_LIB := @gmp_lib@ @isl_lib@ @cloog_lib@ @cuda_lib@ @pluto_lib@
POLLY_LIB := @gmp_lib@ @isl_lib@ @cuda_lib@ @pluto_lib@

View File

@ -37,8 +37,6 @@ if test ${srcdir} != "." ; then
fi
fi
AC_DEFINE([CLOOG_INT_GMP], [1], [Use gmp for isl])
dnl **************************************************************************
dnl * Determine which system we are building on
dnl **************************************************************************
@ -83,14 +81,6 @@ CXXFLAGS="$CXXFLAGS"
find_lib_and_headers([isl], [isl/val.h], [isl], [required])
CXXFLAGS=$saved_CXXFLAGS
dnl Check that we have cloog.
saved_CXXFLAGS=$CXXFLAGS
CXXFLAGS="$CXXFLAGS $gmp_inc $isl_inc"
find_lib_and_headers([cloog], [cloog/isl/cloog.h], [cloog-isl])
CXXFLAGS=$saved_CXXFLAGS
AS_IF([test "x$cloog_found" = "xyes"],
[AC_DEFINE([CLOOG_FOUND],[1],[Define if cloog found])])
dnl Check that we have libpluto.
saved_CXXFLAGS=$CXXFLAGS
CXXFLAGS="$CXXFLAGS $gmp_inc $isl_inc"

View File

@ -1,19 +0,0 @@
FIND_PATH(CLOOG_INCLUDE_DIR cloog/isl/cloog.h)
FIND_LIBRARY(CLOOG_LIBRARY NAMES cloog-isl)
IF (CLOOG_INCLUDE_DIR AND CLOOG_LIBRARY)
SET(CLOOG_FOUND TRUE)
ENDIF (CLOOG_INCLUDE_DIR AND CLOOG_LIBRARY)
IF (CLOOG_FOUND)
IF (NOT CLOOG_FIND_QUIETLY)
MESSAGE(STATUS "Found Cloog: ${CLOOG_LIBRARY}")
ENDIF (NOT CLOOG_FIND_QUIETLY)
ELSE (CLOOG_FOUND)
IF (CLOOG_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "Could not find Cloog")
ENDIF (CLOOG_FIND_REQUIRED)
ENDIF (CLOOG_FOUND)

108
polly/configure vendored
View File

@ -595,10 +595,6 @@ pluto_ld
pluto_lib
pluto_inc
pluto_found
cloog_ld
cloog_lib
cloog_inc
cloog_found
isl_ld
isl_lib
isl_inc
@ -661,7 +657,6 @@ with_llvmsrc
with_llvmobj
with_gmp
with_isl
with_cloog
with_pluto
enable_polly_gpu_codegen
with_cuda
@ -1294,7 +1289,6 @@ Optional Packages:
--with-llvmobj Location of LLVM Object Code
--with-gmp prefix of gmp
--with-isl prefix of isl
--with-cloog prefix of cloog
--with-pluto prefix of pluto
--with-cuda prefix of cuda
@ -1897,7 +1891,6 @@ if test ${srcdir} != "." ; then
fi
$as_echo "#define CLOOG_INT_GMP 1" >>confdefs.h
@ -2578,96 +2571,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
CXXFLAGS=$saved_CXXFLAGS
saved_CXXFLAGS=$CXXFLAGS
CXXFLAGS="$CXXFLAGS $gmp_inc $isl_inc"
ac_ext=cpp
ac_cpp='$CXXCPP $CPPFLAGS'
ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
OLD_CXXFLAGS=$CXXFLAGS;
OLD_LDFLAGS=$LDFLAGS;
OLD_LIBS=$LIBS;
LIBS="$LIBS -lcloog-isl";
# Get include path and lib path
# Check whether --with-cloog was given.
if test "${with_cloog+set}" = set; then :
withval=$with_cloog; given_inc_path="$withval/include"; CXXFLAGS="-I$given_inc_path $CXXFLAGS";
given_lib_path="$withval/lib"; LDFLAGS="-L$given_lib_path $LDFLAGS"
else
given_inc_path=inc_not_give_cloog;
given_lib_path=lib_not_give_cloog
fi
# Check for library and headers works
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for cloog: cloog/isl/cloog.h in $given_inc_path, and libcloog-isl in $given_lib_path" >&5
$as_echo_n "checking for cloog: cloog/isl/cloog.h in $given_inc_path, and libcloog-isl in $given_lib_path... " >&6; }
# try to compile a file that includes a header of the library
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <cloog/isl/cloog.h>
int
main ()
{
;
;
return 0;
}
_ACEOF
if ac_fn_cxx_try_link "$LINENO"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5
$as_echo "ok" >&6; }
cloog_found="yes"
if test "x$given_inc_path" != "xinc_not_give_cloog"; then :
cloog_inc="-I$given_inc_path"
fi
cloog_lib="-lcloog-isl"
if test "x$given_lib_path" != "xlib_not_give_cloog"; then :
cloog_ld="-L$given_lib_path"
fi
else
if test "x" = "xrequired"; then :
as_fn_error $? "cloog required but not found" "$LINENO" 5
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
$as_echo "not found" >&6; }
fi
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
# reset original CXXFLAGS
CXXFLAGS=$OLD_CXXFLAGS
LDFLAGS=$OLD_LDFLAGS;
LIBS=$OLD_LIBS
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
CXXFLAGS=$saved_CXXFLAGS
if test "x$cloog_found" = "xyes"; then :
$as_echo "#define CLOOG_FOUND 1" >>confdefs.h
fi
saved_CXXFLAGS=$CXXFLAGS
CXXFLAGS="$CXXFLAGS $gmp_inc $isl_inc"
ac_ext=cpp
@ -2755,17 +2658,6 @@ $as_echo "#define PLUTO_FOUND 1" >>confdefs.h
fi
ac_ext=cpp
ac_cpp='$CXXCPP $CPPFLAGS'
ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
OLD_CXXFLAGS=$CXXFLAGS;
OLD_LDFLAGS=$LDFLAGS;
OLD_LIBS=$LIBS;
# Check whether --enable-polly_gpu_codegen was given.
if test "${enable_polly_gpu_codegen+set}" = set; then :
enableval=$enable_polly_gpu_codegen;

View File

@ -1,91 +0,0 @@
//===- CLooG.h - CLooG interface --------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// CLooG[1] interface.
//
// The CLooG interface takes a Scop and generates a CLooG AST (clast). This
// clast can either be returned directly or it can be pretty printed to stdout.
//
// A typical clast output looks like this:
//
// for (c2 = max(0, ceild(n + m, 2); c2 <= min(511, floord(5 * n, 3)); c2++) {
// bb2(c2);
// }
//
// [1] http://www.cloog.org/ - The Chunky Loop Generator
//
//===----------------------------------------------------------------------===//
#ifndef POLLY_CLOOG_H
#define POLLY_CLOOG_H
#include "polly/Config/config.h"
#ifdef CLOOG_FOUND
#include "polly/ScopPass.h"
#define CLOOG_INT_GMP 1
#include "cloog/cloog.h"
struct clast_name;
namespace llvm {
class raw_ostream;
}
namespace polly {
class Scop;
class Cloog;
class CloogInfo : public ScopPass {
Cloog *C;
Scop *scop;
public:
static char ID;
CloogInfo() : ScopPass(ID), C(0) {}
/// Write a .cloog input file
void dump(FILE *F);
/// Print a source code representation of the program.
void pprint(llvm::raw_ostream &OS);
/// Create the CLooG AST from this program.
const struct clast_root *getClast();
bool runOnScop(Scop &S);
void printScop(llvm::raw_ostream &OS) const;
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual void releaseMemory();
};
// Visitor class for clasts.
// Only 'visitUser' has to be implemented by subclasses; the default
// implementations of the other methods traverse the clast recursively.
class ClastVisitor {
public:
virtual void visit(const clast_stmt *stmt);
virtual void visitAssignment(const clast_assignment *stmt);
virtual void visitBlock(const clast_block *stmt);
virtual void visitFor(const clast_for *stmt);
virtual void visitGuard(const clast_guard *stmt);
virtual void visitUser(const clast_user_stmt *stmt) = 0;
virtual ~ClastVisitor() {}
};
}
namespace llvm {
class PassRegistry;
void initializeCloogInfoPass(llvm::PassRegistry &);
}
#endif /* CLOOG_FOUND */
#endif /* POLLY_CLOOG_H */

View File

@ -27,13 +27,7 @@ enum VectorizerChoice {
};
extern VectorizerChoice PollyVectorizerChoice;
enum CodeGenChoice {
#ifdef CLOOG_FOUND
CODEGEN_CLOOG,
#endif
CODEGEN_ISL,
CODEGEN_NONE
};
enum CodeGenChoice { CODEGEN_ISL, CODEGEN_NONE };
extern CodeGenChoice PollyCodeGenChoice;
/// @brief Flag to turn on/off annotation of alias scopes.

View File

@ -1,11 +1,5 @@
/* include/polly/Config/config.h.in. Generated from autoconf/configure.ac by autoheader. */
/* Define if cloog found */
#undef CLOOG_FOUND
/* Use gmp for isl */
#undef CLOOG_INT_GMP
/* Define if cudalib found */
#undef CUDALIB_FOUND

View File

@ -26,11 +26,6 @@ class RegionPass;
}
namespace polly {
#ifdef CLOOG_FOUND
llvm::Pass *createCloogExporterPass();
llvm::Pass *createCloogInfoPass();
llvm::Pass *createCodeGenerationPass();
#endif
llvm::Pass *createCodePreparationPass();
llvm::Pass *createDeadCodeElimPass();
llvm::Pass *createDependencesPass();
@ -66,11 +61,6 @@ struct PollyForcePassLinking {
if (std::getenv("bar") != (char *)-1)
return;
#ifdef CLOOG_FOUND
polly::createCloogExporterPass();
polly::createCloogInfoPass();
polly::createCodeGenerationPass();
#endif
polly::createCodePreparationPass();
polly::createDeadCodeElimPass();
polly::createDependencesPass();
@ -97,9 +87,6 @@ struct PollyForcePassLinking {
namespace llvm {
class PassRegistry;
#ifdef CLOOG_FOUND
void initializeCodeGenerationPass(llvm::PassRegistry &);
#endif
void initializeCodePreparationPass(llvm::PassRegistry &);
void initializeDeadCodeElimPass(llvm::PassRegistry &);
void initializeIndependentBlocksPass(llvm::PassRegistry &);

View File

@ -207,14 +207,6 @@ ScopDetection::ScopDetection() : FunctionPass(ID) {
"accesses are enabled.\n");
PollyUseRuntimeAliasChecks = false;
}
#ifdef CLOOG_FOUND
if (PollyCodeGenChoice == CODEGEN_CLOOG) {
DEBUG(errs() << "WARNING: We disable runtime alias checks as the cloog "
"code generation cannot emit them.\n");
PollyUseRuntimeAliasChecks = false;
}
#endif
}
template <class RR, typename... Args>

View File

@ -10,12 +10,6 @@ set(POLLY_JSON_FILES
JSON/json_writer.cpp
)
if (CLOOG_FOUND)
set(CLOOG_FILES
CodeGen/Cloog.cpp
CodeGen/CodeGeneration.cpp)
endif (CLOOG_FOUND)
set(ISL_CODEGEN_FILES
CodeGen/IslAst.cpp
CodeGen/IslExprBuilder.cpp
@ -35,7 +29,6 @@ add_polly_library(Polly
Analysis/ScopPass.cpp
Analysis/TempScopInfo.cpp
CodeGen/BlockGenerators.cpp
${CLOOG_FILES}
${ISL_CODEGEN_FILES}
CodeGen/LoopGenerators.cpp
CodeGen/IRBuilder.cpp

View File

@ -1,365 +0,0 @@
//===- Cloog.cpp - Cloog interface ----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Cloog[1] interface.
//
// The Cloog interface takes a Scop and generates a Cloog AST (clast). This
// clast can either be returned directly or it can be pretty printed to stdout.
//
// A typical clast output looks like this:
//
// for (c2 = max(0, ceild(n + m, 2); c2 <= min(511, floord(5 * n, 3)); c2++) {
// bb2(c2);
// }
//
// [1] http://www.cloog.org/ - The Chunky Loop Generator
//
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/Cloog.h"
#ifdef CLOOG_FOUND
#include "polly/LinkAllPasses.h"
#include "polly/ScopInfo.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "cloog/isl/domain.h"
#include "cloog/isl/cloog.h"
#include <unistd.h>
using namespace llvm;
using namespace polly;
#define DEBUG_TYPE "polly-cloog"
namespace polly {
class Cloog {
Scop *S;
CloogOptions *Options;
CloogState *State;
clast_stmt *ClastRoot;
void buildCloogOptions();
CloogUnionDomain *buildCloogUnionDomain();
CloogInput *buildCloogInput();
public:
Cloog(Scop *Scop);
~Cloog();
/// Write a .cloog input file
void dump(FILE *F);
/// Print a source code representation of the program.
void pprint(llvm::raw_ostream &OS);
/// Create the Cloog AST from this program.
clast_root *getClast();
};
Cloog::Cloog(Scop *Scop) : S(Scop) {
State = cloog_isl_state_malloc(Scop->getIslCtx());
buildCloogOptions();
ClastRoot = cloog_clast_create_from_input(buildCloogInput(), Options);
}
Cloog::~Cloog() {
cloog_clast_free(ClastRoot);
cloog_options_free(Options);
cloog_state_free(State);
}
// Create a FILE* write stream and get the output to it written
// to a std::string.
class FileToString {
int FD[2];
FILE *input;
static const int BUFFERSIZE = 20;
char buf[BUFFERSIZE + 1];
public:
FileToString() {
pipe(FD);
input = fdopen(FD[1], "w");
}
~FileToString() {
close(FD[0]);
// close(FD[1]);
}
FILE *getInputFile() { return input; }
void closeInput() {
fclose(input);
close(FD[1]);
}
std::string getOutput() {
std::string output;
int readSize;
while (true) {
readSize = read(FD[0], &buf, BUFFERSIZE);
if (readSize <= 0)
break;
output += std::string(buf, readSize);
}
return output;
}
};
/// Write .cloog input file.
void Cloog::dump(FILE *F) {
CloogInput *Input = buildCloogInput();
cloog_input_dump_cloog(F, Input, Options);
cloog_input_free(Input);
}
/// Print a source code representation of the program.
void Cloog::pprint(raw_ostream &OS) {
FileToString *Output = new FileToString();
clast_pprint(Output->getInputFile(), ClastRoot, 0, Options);
Output->closeInput();
OS << Output->getOutput();
delete (Output);
}
/// Create the Cloog AST from this program.
clast_root *Cloog::getClast() { return (clast_root *)ClastRoot; }
void Cloog::buildCloogOptions() {
Options = cloog_options_malloc(State);
Options->quiet = 1;
Options->strides = 1;
Options->save_domains = 1;
Options->noscalars = 1;
// Compute simple hulls to reduce code generation time.
Options->sh = 1;
// The last loop depth to optimize should be the last scattering dimension.
// CLooG by default will continue to split the loops even after the last
// scattering dimension. This splitting is problematic for the schedules
// calculated by the PoCC/isl/Pluto optimizer. Such schedules contain may
// not be fully defined, but statements without dependences may be mapped
// to the same exeuction time. For such schedules, continuing to split
// may lead to a larger set of if-conditions in the innermost loop.
Options->l = 0;
}
CloogUnionDomain *Cloog::buildCloogUnionDomain() {
CloogUnionDomain *DU = cloog_union_domain_alloc(S->getNumParams());
for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) {
ScopStmt *Stmt = *SI;
CloogScattering *Scattering;
CloogDomain *Domain;
Scattering = cloog_scattering_from_isl_map(Stmt->getScattering());
Domain = cloog_domain_from_isl_set(Stmt->getDomain());
std::string entryName = Stmt->getBaseName();
DU = cloog_union_domain_add_domain(DU, entryName.c_str(), Domain,
Scattering, Stmt);
}
return DU;
}
CloogInput *Cloog::buildCloogInput() {
// XXX: We do not copy the context of the scop, but use an unconstrained
// context. This 'hack' is necessary as the context may contain bounds
// on parameters such as [n] -> {:0 <= n < 2^32}. Those large
// integers will cause CLooG to construct a clast that contains
// expressions that include these large integers. Such expressions can
// possibly not be evaluated correctly with i64 types. The cloog
// based code generation backend, however, can not derive types
// automatically and just assumes i64 types. Hence, it will break or
// generate incorrect code.
// This hack does not remove all possibilities of incorrectly generated
// code, but it is ensures that for most problems the problems do not
// show up. The correct solution, will be to automatically derive the
// minimal types for each expression. This could be added to CLooG and it
// will be available in the isl based code generation.
isl_set *EmptyContext = isl_set_universe(S->getParamSpace());
CloogDomain *Context = cloog_domain_from_isl_set(EmptyContext);
CloogUnionDomain *Statements = buildCloogUnionDomain();
isl_set *ScopContext = S->getContext();
for (unsigned i = 0; i < isl_set_dim(ScopContext, isl_dim_param); i++) {
isl_id *id = isl_set_get_dim_id(ScopContext, isl_dim_param, i);
Statements = cloog_union_domain_set_name(Statements, CLOOG_PARAM, i,
isl_id_get_name(id));
isl_id_free(id);
}
isl_set_free(ScopContext);
CloogInput *Input = cloog_input_alloc(Context, Statements);
return Input;
}
void ClastVisitor::visit(const clast_stmt *stmt) {
if (CLAST_STMT_IS_A(stmt, stmt_root))
assert(false && "No second root statement expected");
else if (CLAST_STMT_IS_A(stmt, stmt_ass))
return visitAssignment((const clast_assignment *)stmt);
else if (CLAST_STMT_IS_A(stmt, stmt_user))
return visitUser((const clast_user_stmt *)stmt);
else if (CLAST_STMT_IS_A(stmt, stmt_block))
return visitBlock((const clast_block *)stmt);
else if (CLAST_STMT_IS_A(stmt, stmt_for))
return visitFor((const clast_for *)stmt);
else if (CLAST_STMT_IS_A(stmt, stmt_guard))
return visitGuard((const clast_guard *)stmt);
if (stmt->next)
visit(stmt->next);
}
void ClastVisitor::visitAssignment(const clast_assignment *stmt) {}
void ClastVisitor::visitBlock(const clast_block *stmt) { visit(stmt->body); }
void ClastVisitor::visitFor(const clast_for *stmt) { visit(stmt->body); }
void ClastVisitor::visitGuard(const clast_guard *stmt) { visit(stmt->then); }
} // End namespace polly.
namespace {
struct CloogExporter : public ScopPass {
static char ID;
Scop *S;
explicit CloogExporter() : ScopPass(ID) {}
std::string getFileName(Region *R) const;
virtual bool runOnScop(Scop &S);
void getAnalysisUsage(AnalysisUsage &AU) const;
};
}
std::string CloogExporter::getFileName(Region *R) const {
std::string FunctionName = R->getEntry()->getParent()->getName();
std::string ExitName, EntryName;
raw_string_ostream ExitStr(ExitName);
raw_string_ostream EntryStr(EntryName);
R->getEntry()->printAsOperand(EntryStr, false);
EntryStr.str();
if (R->getExit()) {
R->getExit()->printAsOperand(ExitStr, false);
ExitStr.str();
} else
ExitName = "FunctionExit";
std::string RegionName = EntryName + "---" + ExitName;
std::string FileName = FunctionName + "___" + RegionName + ".cloog";
return FileName;
}
char CloogExporter::ID = 0;
bool CloogExporter::runOnScop(Scop &S) {
Region &R = S.getRegion();
CloogInfo &C = getAnalysis<CloogInfo>();
std::string FunctionName = R.getEntry()->getParent()->getName();
std::string Filename = getFileName(&R);
errs() << "Writing Scop '" << R.getNameStr() << "' in function '"
<< FunctionName << "' to '" << Filename << "'...\n";
FILE *F = fopen(Filename.c_str(), "w");
C.dump(F);
fclose(F);
return false;
}
void CloogExporter::getAnalysisUsage(AnalysisUsage &AU) const {
// Get the Common analysis usage of ScopPasses.
ScopPass::getAnalysisUsage(AU);
AU.addRequired<CloogInfo>();
}
static RegisterPass<CloogExporter> A("polly-export-cloog",
"Polly - Export the Cloog input file"
" (Writes a .cloog file for each Scop)");
llvm::Pass *polly::createCloogExporterPass() { return new CloogExporter(); }
/// Write a .cloog input file
void CloogInfo::dump(FILE *F) { C->dump(F); }
/// Print a source code representation of the program.
void CloogInfo::pprint(llvm::raw_ostream &OS) { C->pprint(OS); }
/// Create the Cloog AST from this program.
const struct clast_root *CloogInfo::getClast() { return C->getClast(); }
void CloogInfo::releaseMemory() {
if (C) {
delete C;
C = 0;
}
}
bool CloogInfo::runOnScop(Scop &S) {
if (C)
delete C;
scop = &S;
C = new Cloog(&S);
Function *F = S.getRegion().getEntry()->getParent();
(void)F;
DEBUG(dbgs() << ":: " << F->getName());
DEBUG(dbgs() << " : " << S.getRegion().getNameStr() << "\n");
DEBUG(C->pprint(dbgs()));
return false;
}
void CloogInfo::printScop(raw_ostream &OS) const {
Function *function = scop->getRegion().getEntry()->getParent();
OS << function->getName() << "():\n";
C->pprint(OS);
}
void CloogInfo::getAnalysisUsage(AnalysisUsage &AU) const {
// Get the Common analysis usage of ScopPasses.
ScopPass::getAnalysisUsage(AU);
}
char CloogInfo::ID = 0;
Pass *polly::createCloogInfoPass() { return new CloogInfo(); }
INITIALIZE_PASS_BEGIN(CloogInfo, "polly-cloog", "Execute Cloog code generation",
false, false);
INITIALIZE_PASS_DEPENDENCY(ScopInfo);
INITIALIZE_PASS_END(CloogInfo, "polly-cloog", "Execute Cloog code generation",
false, false)
#endif // CLOOG_FOUND

File diff suppressed because it is too large Load Diff

View File

@ -16,11 +16,6 @@ LD.Flags += $(POLLY_LD) $(POLLY_LIB)
include $(LEVEL)/Makefile.config
# Enable optional source files
ifeq ($(CLOOG_FOUND), yes)
CLOOG_FILES= CodeGen/Cloog.cpp \
CodeGen/CodeGeneration.cpp
endif
ifeq ($(GPU_CODEGEN), yes)
GPGPU_CODEGEN_FILES= CodeGen/PTXGenerator.cpp
endif
@ -62,7 +57,6 @@ SOURCES= Polly.cpp \
Transform/ScheduleOptimizer.cpp \
${GPGPU_FILES} \
${ISL_CODEGEN_FILES} \
${CLOOG_FILES} \
${POLLY_JSON_FILES} \
${POLLY_PLUTO_FILES}

View File

@ -21,7 +21,6 @@
#include "polly/RegisterPasses.h"
#include "polly/Canonicalization.h"
#include "polly/CodeGen/Cloog.h"
#include "polly/CodeGen/CodeGeneration.h"
#include "polly/Dependences.h"
#include "polly/LinkAllPasses.h"
@ -68,12 +67,9 @@ static cl::opt<OptimizerChoice> Optimizer(
CodeGenChoice polly::PollyCodeGenChoice;
static cl::opt<CodeGenChoice, true> XCodeGenerator(
"polly-code-generator", cl::desc("Select the code generator"),
cl::values(
#ifdef CLOOG_FOUND
clEnumValN(CODEGEN_CLOOG, "cloog", "CLooG"),
#endif
clEnumValN(CODEGEN_ISL, "isl", "isl code generator"),
clEnumValN(CODEGEN_NONE, "none", "no code generation"), clEnumValEnd),
cl::values(clEnumValN(CODEGEN_ISL, "isl", "isl code generator"),
clEnumValN(CODEGEN_NONE, "none", "no code generation"),
clEnumValEnd),
cl::Hidden, cl::location(PollyCodeGenChoice), cl::init(CODEGEN_ISL),
cl::ZeroOrMore, cl::cat(PollyCategory));
@ -143,10 +139,6 @@ static cl::opt<bool, true> XPollyAnnotateAliasScopes(
namespace polly {
void initializePollyPasses(PassRegistry &Registry) {
#ifdef CLOOG_FOUND
initializeCloogInfoPass(Registry);
initializeCodeGenerationPass(Registry);
#endif
initializeIslCodeGenerationPass(Registry);
initializeCodePreparationPass(Registry);
initializeDeadCodeElimPass(Registry);
@ -189,9 +181,7 @@ void initializePollyPasses(PassRegistry &Registry) {
/// provided to analyze the run and compile time changes caused by the
/// scheduling optimizer.
///
/// Polly supports both CLooG (http://www.cloog.org) as well as the isl internal
/// code generator. For the moment, the CLooG code generator is enabled by
/// default.
/// Polly supports the isl internal code generator.
static void registerPollyPasses(llvm::PassManagerBase &PM) {
registerCanonicalicationPasses(PM);
@ -231,16 +221,6 @@ static void registerPollyPasses(llvm::PassManagerBase &PM) {
PM.add(polly::createJSONExporterPass());
switch (PollyCodeGenChoice) {
#ifdef CLOOG_FOUND
case CODEGEN_CLOOG:
PM.add(polly::createCodeGenerationPass());
if (PollyVectorizerChoice == VECTORIZER_BB) {
VectorizeConfig C;
C.FastDep = true;
PM.add(createBBVectorizePass(C));
}
break;
#endif
case CODEGEN_ISL:
PM.add(polly::createIslCodeGenerationPass());
break;

View File

@ -14,7 +14,6 @@
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
#include "polly/CodeGen/BlockGenerators.h"
#include "polly/CodeGen/Cloog.h"
#include "polly/ScopDetection.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/Analysis/DominanceFrontier.h"
@ -505,9 +504,6 @@ void IndependentBlocks::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<ScalarEvolution>();
AU.addRequired<ScopDetection>();
AU.addPreserved<ScopDetection>();
#ifdef CLOOG_FOUND
AU.addPreserved<CloogInfo>();
#endif
}
bool IndependentBlocks::runOnFunction(llvm::Function &F) {

View File

@ -1,19 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @init_array() nounwind {
entry:
br label %for.cond
for.cond: ; preds = %for.cond1, %entry
%indvar1 = phi i64 [ %indvar.next2, %for.cond1 ], [ 0, %entry ] ; <i64> [#uses=1]
br i1 false, label %for.cond1, label %for.end32
for.cond1: ; preds = %for.cond
%indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1]
br label %for.cond
for.end32: ; preds = %for.cond
ret void
}

View File

@ -1,43 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
; RUN: opt %loadPolly -polly-detect -analyze < %s | not FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
target triple = "i386-portbld-freebsd8.0"
define void @MAIN__() nounwind {
entry:
br i1 undef, label %bb6.preheader, label %bb3
bb3: ; preds = %bb3, %entry
br i1 undef, label %bb6.preheader, label %bb3
bb6.preheader: ; preds = %bb3, %entry
br i1 undef, label %bb11, label %bb9.preheader
bb9.preheader: ; preds = %bb6.preheader
br label %bb11
bb11: ; preds = %bb9.preheader, %bb6.preheader
br label %bb15
bb15: ; preds = %bb15, %bb11
br i1 undef, label %bb26.loopexit, label %bb15
bb26.loopexit: ; preds = %bb15
br i1 undef, label %bb31, label %bb29.preheader
bb29.preheader: ; preds = %bb26.loopexit
br label %bb29
bb29: ; preds = %bb29, %bb29.preheader
%indvar47 = phi i32 [ 0, %bb29.preheader ], [ %indvar.next48, %bb29 ] ; <i32> [#uses=1]
%indvar.next48 = add i32 %indvar47, 1 ; <i32> [#uses=2]
%exitcond50 = icmp eq i32 %indvar.next48, undef ; <i1> [#uses=1]
br i1 %exitcond50, label %bb31, label %bb29
bb31: ; preds = %bb29, %bb26.loopexit
%errtot.3 = phi float [ undef, %bb26.loopexit ], [ undef, %bb29 ] ; <float> [#uses=0]
ret void
}
; CHECK: SCOP:

View File

@ -1,27 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @clause_SetSplitField(i32 %Length) nounwind inlinehint {
entry:
br i1 undef, label %bb1, label %bb6
bb1: ; preds = %entry
unreachable
bb6: ; preds = %entry
%tmp = zext i32 %Length to i64 ; <i64> [#uses=1]
br label %bb8
bb7: ; preds = %bb8
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %bb8
bb8: ; preds = %bb7, %bb6
%indvar = phi i64 [ %indvar.next, %bb7 ], [ 0, %bb6 ] ; <i64> [#uses=2]
%exitcond = icmp ne i64 %indvar, %tmp ; <i1> [#uses=1]
br i1 %exitcond, label %bb7, label %return
return: ; preds = %bb8
ret void
}

View File

@ -1,115 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@win193 = external global [4 x [36 x double]], align 32 ; <[4 x [36 x double]]*> [#uses=3]
@sb_sample = external global [2 x [2 x [18 x [32 x double]]]], align 32 ; <[2 x [2 x [18 x [32 x double]]]]*> [#uses=2]
define void @mdct_sub48() nounwind {
entry:
br i1 undef, label %bb, label %bb54
bb: ; preds = %entry
br label %bb54
bb3: ; preds = %bb50
br label %bb8
bb4: ; preds = %bb8
br label %bb8
bb8: ; preds = %bb4, %bb3
br i1 undef, label %bb4, label %bb9
bb9: ; preds = %bb8
br label %bb48
bb25: ; preds = %bb48
br i1 false, label %bb26, label %bb27
bb26: ; preds = %bb48, %bb25
br label %bb37
bb27: ; preds = %bb25
br i1 undef, label %bb32, label %bb35
bb32: ; preds = %bb27
br label %bb37
bb34: ; preds = %bb35
%0 = getelementptr inbounds [36 x double]* undef, i64 0, i64 0 ; <double*> [#uses=0]
%1 = getelementptr inbounds [18 x [32 x double]]* undef, i64 0, i64 0 ; <[32 x double]*> [#uses=1]
%2 = getelementptr inbounds [32 x double]* %1, i64 0, i64 0 ; <double*> [#uses=0]
%3 = getelementptr inbounds [36 x double]* undef, i64 0, i64 0 ; <double*> [#uses=0]
%4 = sub nsw i32 17, %k.4 ; <i32> [#uses=1]
%5 = getelementptr inbounds [2 x [2 x [18 x [32 x double]]]]* @sb_sample, i64 0, i64 0 ; <[2 x [18 x [32 x double]]]*> [#uses=1]
%6 = getelementptr inbounds [2 x [18 x [32 x double]]]* %5, i64 0, i64 0 ; <[18 x [32 x double]]*> [#uses=1]
%7 = sext i32 %4 to i64 ; <i64> [#uses=1]
%8 = getelementptr inbounds [18 x [32 x double]]* %6, i64 0, i64 %7 ; <[32 x double]*> [#uses=1]
%9 = getelementptr inbounds [32 x double]* %8, i64 0, i64 0 ; <double*> [#uses=1]
%10 = load double* %9, align 8 ; <double> [#uses=0]
%11 = fsub double 0.000000e+00, undef ; <double> [#uses=1]
%12 = getelementptr inbounds double* getelementptr inbounds ([4 x [36 x double]]* @win193, i64 0, i64 2, i64 4), i64 0 ; <double*> [#uses=1]
store double %11, double* %12, align 8
%13 = add nsw i32 %k.4, 9 ; <i32> [#uses=1]
%14 = add nsw i32 %k.4, 18 ; <i32> [#uses=1]
%15 = getelementptr inbounds [4 x [36 x double]]* @win193, i64 0, i64 0 ; <[36 x double]*> [#uses=1]
%16 = sext i32 %14 to i64 ; <i64> [#uses=1]
%17 = getelementptr inbounds [36 x double]* %15, i64 0, i64 %16 ; <double*> [#uses=1]
%18 = load double* %17, align 8 ; <double> [#uses=0]
%19 = sext i32 %k.4 to i64 ; <i64> [#uses=1]
%20 = getelementptr inbounds [18 x [32 x double]]* undef, i64 0, i64 %19 ; <[32 x double]*> [#uses=1]
%21 = sext i32 %band.2 to i64 ; <i64> [#uses=1]
%22 = getelementptr inbounds [32 x double]* %20, i64 0, i64 %21 ; <double*> [#uses=1]
%23 = load double* %22, align 8 ; <double> [#uses=0]
%24 = sext i32 %39 to i64 ; <i64> [#uses=1]
%25 = getelementptr inbounds [4 x [36 x double]]* @win193, i64 0, i64 %24 ; <[36 x double]*> [#uses=1]
%26 = getelementptr inbounds [36 x double]* %25, i64 0, i64 0 ; <double*> [#uses=1]
%27 = load double* %26, align 8 ; <double> [#uses=0]
%28 = sub nsw i32 17, %k.4 ; <i32> [#uses=1]
%29 = getelementptr inbounds [2 x [2 x [18 x [32 x double]]]]* @sb_sample, i64 0, i64 0 ; <[2 x [18 x [32 x double]]]*> [#uses=1]
%30 = getelementptr inbounds [2 x [18 x [32 x double]]]* %29, i64 0, i64 0 ; <[18 x [32 x double]]*> [#uses=1]
%31 = sext i32 %28 to i64 ; <i64> [#uses=1]
%32 = getelementptr inbounds [18 x [32 x double]]* %30, i64 0, i64 %31 ; <[32 x double]*> [#uses=1]
%33 = getelementptr inbounds [32 x double]* %32, i64 0, i64 0 ; <double*> [#uses=1]
%34 = load double* %33, align 8 ; <double> [#uses=0]
%35 = sext i32 %13 to i64 ; <i64> [#uses=1]
%36 = getelementptr inbounds double* getelementptr inbounds ([4 x [36 x double]]* @win193, i64 0, i64 2, i64 4), i64 %35 ; <double*> [#uses=1]
store double 0.000000e+00, double* %36, align 8
%37 = sub nsw i32 %k.4, 1 ; <i32> [#uses=1]
br label %bb35
bb35: ; preds = %bb34, %bb27
%k.4 = phi i32 [ %37, %bb34 ], [ 8, %bb27 ] ; <i32> [#uses=6]
br i1 undef, label %bb34, label %bb36
bb36: ; preds = %bb35
unreachable
bb37: ; preds = %bb32, %bb26
%38 = add nsw i32 %band.2, 1 ; <i32> [#uses=1]
br label %bb48
bb48: ; preds = %bb37, %bb9
%band.2 = phi i32 [ %38, %bb37 ], [ 0, %bb9 ] ; <i32> [#uses=2]
%39 = load i32* null, align 8 ; <i32> [#uses=1]
br i1 undef, label %bb26, label %bb25
bb50: ; preds = %bb54
br i1 undef, label %bb3, label %bb51
bb51: ; preds = %bb50
br i1 undef, label %bb52, label %bb53
bb52: ; preds = %bb51
unreachable
bb53: ; preds = %bb51
br label %bb54
bb54: ; preds = %bb53, %bb, %entry
br i1 undef, label %bb50, label %return
return: ; preds = %bb54
ret void
}

View File

@ -1,28 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
define void @init_array() nounwind {
bb:
br label %bb1
bb1: ; preds = %bb4, %bb
br i1 undef, label %bb2, label %bb5
bb2: ; preds = %bb3, %bb1
%indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb1 ] ; <i64> [#uses=1]
%tmp3 = trunc i64 undef to i32 ; <i32> [#uses=1]
br i1 false, label %bb3, label %bb4
bb3: ; preds = %bb2
%tmp = srem i32 %tmp3, 1024 ; <i32> [#uses=0]
store double undef, double* undef
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %bb2
bb4: ; preds = %bb2
br label %bb1
bb5: ; preds = %bb1
ret void
}

View File

@ -1,34 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @fft_float(i32 %NumSamples) nounwind {
br label %bb18
bb18: ; preds = %bb17
br i1 false, label %bb19, label %bb22
bb19: ; preds = %bb18
%a = uitofp i32 %NumSamples to double ; <double> [#uses=1]
br label %bb21
bb20: ; preds = %bb21
%1 = load float* undef, align 4 ; <float> [#uses=0]
%2 = fpext float undef to double ; <double> [#uses=1]
%3 = fdiv double %2, %a ; <double> [#uses=0]
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %bb21
bb21: ; preds = %bb20, %bb19
%indvar = phi i64 [ %indvar.next, %bb20 ], [ 0, %bb19 ] ; <i64> [#uses=1]
br i1 false, label %bb20, label %bb22.loopexit
bb22.loopexit: ; preds = %bb21
br label %bb22
bb22: ; preds = %bb22.loopexit, %bb18
br label %return
return: ; preds = %bb22
ret void
}

View File

@ -1,34 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define hidden void @luaD_callhook() nounwind {
entry:
br i1 undef, label %bb, label %return
bb: ; preds = %entry
br i1 undef, label %bb1, label %return
bb1: ; preds = %bb
%0 = sub nsw i64 undef, undef ; <i64> [#uses=1]
br i1 false, label %bb2, label %bb3
bb2: ; preds = %bb1
br label %bb4
bb3: ; preds = %bb1
br label %bb4
bb4: ; preds = %bb3, %bb2
br i1 undef, label %bb5, label %bb6
bb5: ; preds = %bb4
unreachable
bb6: ; preds = %bb4
%1 = getelementptr inbounds i8* undef, i64 %0 ; <i8*> [#uses=0]
ret void
return: ; preds = %bb, %entry
ret void
}

View File

@ -1,39 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -disable-output < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @matrixTranspose(double** %A) nounwind {
entry:
br label %bb4
bb: ; preds = %bb4
%0 = add nsw i32 %i.0, 1 ; <i32> [#uses=1]
br label %bb2
bb1: ; preds = %bb2
%1 = getelementptr inbounds double** %A, i64 0 ; <double**> [#uses=0]
%2 = getelementptr inbounds double** %A, i64 0 ; <double**> [#uses=0]
%3 = getelementptr inbounds double** %A, i64 0 ; <double**> [#uses=0]
%4 = sext i32 %j.0 to i64 ; <i64> [#uses=1]
%5 = getelementptr inbounds double** %A, i64 %4 ; <double**> [#uses=1]
%6 = load double** %5, align 8 ; <double*> [#uses=0]
%7 = add nsw i32 %j.0, 1 ; <i32> [#uses=1]
br label %bb2
bb2: ; preds = %bb1, %bb
%j.0 = phi i32 [ %0, %bb ], [ %7, %bb1 ] ; <i32> [#uses=3]
%8 = icmp sle i32 %j.0, 50 ; <i1> [#uses=1]
br i1 %8, label %bb1, label %bb3
bb3: ; preds = %bb2
%9 = add nsw i32 %i.0, 1 ; <i32> [#uses=1]
br label %bb4
bb4: ; preds = %bb3, %entry
%i.0 = phi i32 [ 0, %entry ], [ %9, %bb3 ] ; <i32> [#uses=3]
%10 = icmp sle i32 %i.0, 50 ; <i1> [#uses=1]
br i1 %10, label %bb, label %return
return: ; preds = %bb4
ret void
}

View File

@ -1,35 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -verify-dom-info -disable-output < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @getNonAffNeighbour() nounwind {
entry:
br i1 undef, label %bb, label %bb6
bb: ; preds = %entry
br i1 false, label %bb1, label %bb2
bb1: ; preds = %bb
br label %bb16
bb2: ; preds = %bb
br i1 false, label %bb3, label %bb4
bb3: ; preds = %bb2
br label %bb16
bb4: ; preds = %bb2
br label %bb16
bb6: ; preds = %entry
br i1 false, label %bb7, label %bb9
bb7: ; preds = %bb6
br label %bb16
bb9: ; preds = %bb6
br label %bb16
bb16: ; preds = %bb9, %bb7, %bb4, %bb3, %bb1
ret void
}

View File

@ -1,28 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -verify-dom-info -disable-output < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @intrapred_luma_16x16(i32 %predmode) nounwind {
entry:
switch i32 %predmode, label %bb81 [
i32 0, label %bb25
i32 1, label %bb26
]
bb23: ; preds = %bb25
%indvar.next95 = add i64 %indvar94, 1 ; <i64> [#uses=1]
br label %bb25
bb25: ; preds = %bb23, %entry
%indvar94 = phi i64 [ %indvar.next95, %bb23 ], [ 0, %entry ] ; <i64> [#uses=1]
br i1 false, label %bb23, label %return
bb26: ; preds = %entry
ret void
bb81: ; preds = %entry
ret void
return: ; preds = %bb25
ret void
}

View File

@ -1,97 +0,0 @@
; RUN: opt %loadPolly -polly-cloog -analyze < %s
;int bar1();
;int bar2();
;int bar3();
;int k;
;#define N 100
;int A[N];
;
;int main() {
; int i, j, z;
;
; __sync_synchronize();
; for (i = 0; i < N; i++) {
; if (i < 50)
; A[i] = 8;
; if (i < 4)
; A[i] = 9;
; if (i < 3)
; A[i] = 10;
; }
; __sync_synchronize();
;
; return A[z];
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
@A = common global [100 x i32] zeroinitializer, align 16 ; <[100 x i32]*> [#uses=2]
@k = common global i32 0, align 4 ; <i32*> [#uses=0]
define i32 @main() nounwind {
; <label>:0
fence seq_cst
br label %1
; <label>:1 ; preds = %12, %0
%indvar = phi i64 [ %indvar.next, %12 ], [ 0, %0 ] ; <i64> [#uses=4]
%scevgep = getelementptr [100 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=3]
%i.0 = trunc i64 %indvar to i32 ; <i32> [#uses=3]
%exitcond = icmp ne i64 %indvar, 100 ; <i1> [#uses=1]
br i1 %exitcond, label %2, label %13
; <label>:2 ; preds = %1
%3 = icmp slt i32 %i.0, 50 ; <i1> [#uses=1]
br i1 %3, label %4, label %5
; <label>:4 ; preds = %2
store i32 8, i32* %scevgep
br label %5
; <label>:5 ; preds = %4, %2
%6 = icmp slt i32 %i.0, 4 ; <i1> [#uses=1]
br i1 %6, label %7, label %8
; <label>:7 ; preds = %5
store i32 9, i32* %scevgep
br label %8
; <label>:8 ; preds = %7, %5
%9 = icmp slt i32 %i.0, 3 ; <i1> [#uses=1]
br i1 %9, label %10, label %11
; <label>:10 ; preds = %8
store i32 10, i32* %scevgep
br label %11
; <label>:11 ; preds = %10, %8
br label %12
; <label>:12 ; preds = %11
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %1
; <label>:13 ; preds = %1
fence seq_cst
%14 = sext i32 undef to i64 ; <i64> [#uses=1]
%15 = getelementptr inbounds i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0), i64 %14 ; <i32*> [#uses=1]
%16 = load i32* %15 ; <i32> [#uses=1]
ret i32 %16
}
; CHECK: for (c2=0;c2<=2;c2++) {
; CHECK: S0(c2);
; CHECK: S1(c2);
; CHECK: S2(c2);
; CHECK: }
; CHECK: S0(3);
; CHECK: S1(3);
; CHECK: for (c2=4;c2<=49;c2++) {
; CHECK: S0(c2);
; CHECK: }
; CHECK: S0: Stmt_4
; CHECK: S1: Stmt_7
; CHECK: S2: Stmt_10
; CHECK:

View File

@ -1,31 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -disable-output < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @cfft2([2 x float]* %x) nounwind {
entry:
%d.1.reg2mem = alloca [2 x float]* ; <[2 x float]**> [#uses=3]
br i1 undef, label %bb2, label %bb34
bb2: ; preds = %bb34, %entry
ret void
bb20: ; preds = %bb34
store [2 x float]* undef, [2 x float]** %d.1.reg2mem
br i1 false, label %bb21, label %bb23
bb21: ; preds = %bb20
%0 = getelementptr inbounds [2 x float]* %x, i64 undef ; <[2 x float]*> [#uses=1]
store [2 x float]* %0, [2 x float]** %d.1.reg2mem
br label %bb23
bb23: ; preds = %bb21, %bb20
%d.1.reload = load [2 x float]** %d.1.reg2mem ; <[2 x float]*> [#uses=1]
br i1 undef, label %bb29, label %bb34
bb29: ; preds = %bb23
%1 = getelementptr inbounds [2 x float]* %d.1.reload, i64 undef ; <[2 x float]*> [#uses=0]
br label %bb34
bb34: ; preds = %bb29, %bb23, %entry
br i1 undef, label %bb20, label %bb2
}

View File

@ -1,30 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -disable-output < %s
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @main() nounwind {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvar1 = phi i64 [ %indvar.next2, %for.inc ], [ 0, %entry ] ; <i64> [#uses=2]
%exitcond = icmp ne i64 %indvar1, 1024 ; <i1> [#uses=1]
br label %a
a: ; preds = %for.cond
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %a
br label %for.inc
for.inc: ; preds = %for.body
%indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1]
br label %for.cond
for.end: ; preds = %a
br label %for.cond5
for.cond5: ; preds = %for.inc17, %for.end
ret void
}

View File

@ -1,22 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @compdecomp() nounwind {
entry:
%max = alloca i64
%i = load i64* undef
br label %bb37
bb37: ; preds = %bb36, %bb28
%tmp = icmp ugt i64 %i, 0
br i1 %tmp, label %bb38, label %bb39
bb38: ; preds = %bb37
store i64 %i, i64* %max
br label %bb39
bb39: ; preds = %bb38, %bb37
unreachable
}

View File

@ -1,24 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @Reflection_coefficients(i16* %r) nounwind {
bb20:
%indvar3.lcssa20.reload = load i64* undef
%tmp = mul i64 %indvar3.lcssa20.reload, -1
%tmp5 = add i64 %tmp, 8
br label %bb22
bb21: ; preds = %bb22
%r_addr.1.moved.to.bb21 = getelementptr i16* %r, i64 0
store i16 0, i16* %r_addr.1.moved.to.bb21, align 2
%indvar.next = add i64 %indvar, 1
br label %bb22
bb22: ; preds = %bb21, %bb20
%indvar = phi i64 [ %indvar.next, %bb21 ], [ 0, %bb20 ]
%exitcond = icmp ne i64 %indvar, %tmp5
br i1 %exitcond, label %bb21, label %return
return: ; preds = %bb22
ret void
}

View File

@ -1,38 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
define void @CleanNet() nounwind {
entry:
%firstVia.0.reg2mem = alloca i64
br label %bb7
bb7: ; preds = %bb7, %entry
br i1 undef, label %bb7, label %bb8
bb8: ; preds = %bb7
%indvar5.lcssa.reload = load i64* undef
%tmp17 = mul i64 %indvar5.lcssa.reload, -1
%tmp18 = add i64 0, %tmp17
br label %bb18
bb13: ; preds = %bb18
%0 = icmp ult i64 %i.1, 0
br i1 %0, label %bb14, label %bb17
bb14: ; preds = %bb13
store i64 %i.1, i64* %firstVia.0.reg2mem
br label %bb17
bb17: ; preds = %bb14, %bb13
%indvar.next16 = add i64 %indvar15, 1
br label %bb18
bb18: ; preds = %bb17, %bb8
%indvar15 = phi i64 [ %indvar.next16, %bb17 ], [ 0, %bb8 ]
%i.1 = add i64 %tmp18, %indvar15
br i1 undef, label %bb13, label %bb25
bb25: ; preds = %bb18
ret void
}

View File

@ -1,59 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @main() nounwind {
.split:
br label %0
.loopexit.loopexit: ; preds = %.preheader.us
br label %.loopexit.simregexit
.loopexit.simregexit: ; preds = %.loopexit.loopexit, %._crit_edge
br label %.loopexit
.loopexit: ; preds = %.loopexit.simregexit
%indvar.next16 = add i64 %indvar15, 1
%exitcond53 = icmp eq i64 %indvar.next16, 2048
br i1 %exitcond53, label %2, label %0
; <label>:0 ; preds = %.loopexit, %.split
%indvar15 = phi i64 [ 0, %.split ], [ %indvar.next16, %.loopexit ]
br label %.simregentry
.simregentry: ; preds = %0
%indvar15.ph = phi i64 [ %indvar15, %0 ]
%tmp67 = add i64 %indvar15, 1
%i.06 = trunc i64 %tmp67 to i32
%tmp25 = add i64 undef, 1
%1 = icmp slt i32 %i.06, 2048
br i1 %1, label %.lr.ph.preheader, label %._crit_edge.simregexit
.lr.ph.preheader: ; preds = %.simregentry
br label %.lr.ph
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
%indvar33 = phi i64 [ %indvar.next34, %.lr.ph ], [ 0, %.lr.ph.preheader ]
%indvar.next34 = add i64 %indvar33, 1
%exitcond40 = icmp eq i64 %indvar.next34, 0
br i1 %exitcond40, label %._crit_edge.loopexit, label %.lr.ph
._crit_edge.loopexit: ; preds = %.lr.ph
br label %._crit_edge.simregexit
._crit_edge.simregexit: ; preds = %.simregentry, %._crit_edge.loopexit
br label %._crit_edge
._crit_edge: ; preds = %._crit_edge.simregexit
br i1 false, label %.loopexit.simregexit, label %.preheader.us.preheader
.preheader.us.preheader: ; preds = %._crit_edge
br label %.preheader.us
.preheader.us: ; preds = %.preheader.us, %.preheader.us.preheader
%exitcond26.old = icmp eq i64 undef, %tmp25
br i1 false, label %.loopexit.loopexit, label %.preheader.us
; <label>:2 ; preds = %.loopexit
ret void
}

View File

@ -1,29 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define void @main() nounwind {
.split:
br label %0
.loopexit: ; preds = %.lr.ph, %0
%indvar.next16 = add i64 %indvar15, 1
%exitcond53 = icmp eq i64 %indvar.next16, 2048
br i1 %exitcond53, label %1, label %0
; <label>:0 ; preds = %.loopexit, %.split
%indvar15 = phi i64 [ 0, %.split ], [ %indvar.next16, %.loopexit ]
%tmp59 = sub i64 2046, %indvar15
%tmp38 = and i64 %tmp59, 4294967295
%tmp39 = add i64 %tmp38, 1
br i1 false, label %.lr.ph, label %.loopexit
.lr.ph: ; preds = %.lr.ph, %0
%indvar33 = phi i64 [ %indvar.next34, %.lr.ph ], [ 0, %0 ]
%indvar.next34 = add i64 %indvar33, 1
%exitcond40 = icmp eq i64 %indvar.next34, %tmp39
br i1 %exitcond40, label %.loopexit, label %.lr.ph
; <label>:1 ; preds = %.loopexit
ret void
}

View File

@ -1,20 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"
define void @fixup_gotos(i32* %A, i32* %data) nounwind {
entry:
br label %if
if:
%cond = icmp eq i32* %A, null
br i1 %cond, label %last, label %then
then:
store i32 1, i32* %data, align 4
br label %last
last:
ret void
}

View File

@ -1,27 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
; We just check that this compilation does not crash.
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
target triple = "hexagon-unknown-linux-gnu"
define void @init() nounwind {
entry:
%hi.129.reg2mem = alloca i64
br label %for.body
for.cond5.preheader: ; preds = %for.body
br label %for.body7
for.body: ; preds = %for.body, %entry
br i1 undef, label %for.body, label %for.cond5.preheader
for.body7: ; preds = %for.body7, %for.cond5.preheader
%i.128 = phi i64 [ 0, %for.cond5.preheader ], [ %inc17, %for.body7 ]
%inc17 = add nsw i64 %i.128, 1
store i64 undef, i64* %hi.129.reg2mem
br i1 false, label %for.body7, label %for.end18
for.end18: ; preds = %for.body7
unreachable
}

View File

@ -1,21 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -polly-vectorizer=polly < %s
; PR 19421
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
define void @extract_field(i32* %frame, i32 %nb_planes) {
entry:
br i1 undef, label %for.body, label %for.end
for.body: ; preds = %for.body, %entry
%indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
%arrayidx2.moved.to.if.end = getelementptr i32* %frame, i64 %indvar
%.moved.to.if.end = zext i32 %nb_planes to i64
store i32 undef, i32* %arrayidx2.moved.to.if.end
%indvar.next = add i64 %indvar, 1
%exitcond = icmp ne i64 %indvar.next, %.moved.to.if.end
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}

View File

@ -1,72 +0,0 @@
; REQUIRES: nvptx-registered-target
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=transformed+gpu -enable-polly-gpgpu -polly-gpgpu-triple=nvptx64-unknown-unknown -polly-codegen < %s -S | FileCheck %s
;int A[1024];
;int gpu() {
; int i;
;
; for(i = 0; i < 1024; i++)
; A[i] = i*128 + 508;
;
; return 0;
;}
;
;int main() {
; int b = gpu();
; return 0;
;}
; ModuleID = '1d_parallel.s'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x i32] zeroinitializer, align 16
; Function Attrs: nounwind uwtable
define i32 @gpu() #0 {
br label %.split
.split: ; preds = %0
br label %1
; <label>:1 ; preds = %.split, %1
%indvar = phi i64 [ 0, %.split ], [ %indvar.next, %1 ]
%2 = mul i64 %indvar, 128
%3 = add i64 %2, 508
%4 = trunc i64 %3 to i32
%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar
store i32 %4, i32* %scevgep, align 4
%indvar.next = add i64 %indvar, 1
%exitcond = icmp ne i64 %indvar.next, 1024
br i1 %exitcond, label %1, label %5
; <label>:5 ; preds = %1
ret i32 0
}
; Function Attrs: nounwind uwtable
define i32 @main() #0 {
br label %.split
.split: ; preds = %0
%1 = tail call i32 @gpu()
ret i32 0
}
; CHECK: call void @polly_initDevice
; CHECK: call void @polly_getPTXModule
; CHECK: call void @polly_getPTXKernelEntry
; CHECK: call void @polly_allocateMemoryForHostAndDevice
; CHECK: call void @polly_setKernelParameters
; CHECK: call void @polly_startTimerByCudaEvent
; CHECK: call void @polly_launchKernel
; CHECK: call void @polly_copyFromDeviceToHost
; CHECK: call void @polly_stopTimerByCudaEvent
; CHECK: call void @polly_cleanupGPGPUResources
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = metadata !{metadata !"clang version 3.5.0 "}

View File

@ -1,83 +0,0 @@
; REQUIRES: nvptx-registered-target
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=transformed+gpu -enable-polly-gpgpu -polly-gpgpu-triple=nvptx64-unknown-unknown -polly-codegen < %s -S | FileCheck %s
;int A[128][128];
;
;int gpu_pure() {
; int i,j;
;
; for(i = 0; i < 128; i++)
; for(j = 0; j < 128; j++)
; A[i][j] = i*128 + j;
;
; return 0;
;}
;
;int main() {
; int b = gpu_pure();
; return 0;
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [128 x [128 x i32]] zeroinitializer, align 16
define i32 @gpu_pure() nounwind uwtable {
entry:
br label %for.cond
for.cond: ; preds = %for.inc6, %entry
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %for.inc6 ], [ 0, %entry ]
%lftr.wideiv5 = trunc i64 %indvars.iv2 to i32
%exitcond6 = icmp ne i32 %lftr.wideiv5, 128
br i1 %exitcond6, label %for.body, label %for.end8
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ]
%lftr.wideiv = trunc i64 %indvars.iv to i32
%exitcond = icmp ne i32 %lftr.wideiv, 128
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%tmp = shl nsw i64 %indvars.iv2, 7
%tmp7 = add nsw i64 %tmp, %indvars.iv
%arrayidx5 = getelementptr inbounds [128 x [128 x i32]]* @A, i64 0, i64 %indvars.iv2, i64 %indvars.iv
%tmp8 = trunc i64 %tmp7 to i32
store i32 %tmp8, i32* %arrayidx5, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%indvars.iv.next = add i64 %indvars.iv, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc6
for.inc6: ; preds = %for.end
%indvars.iv.next3 = add i64 %indvars.iv2, 1
br label %for.cond
for.end8: ; preds = %for.cond
ret i32 0
}
define i32 @main() nounwind uwtable {
entry:
%call = call i32 @gpu_pure()
ret i32 0
}
; CHECK: call void @polly_initDevice
; CHECK: call void @polly_getPTXModule
; CHECK: call void @polly_getPTXKernelEntry
; CHECK: call void @polly_allocateMemoryForHostAndDevice
; CHECK: call void @polly_setKernelParameters
; CHECK: call void @polly_startTimerByCudaEvent
; CHECK: call void @polly_launchKernel
; CHECK: call void @polly_copyFromDeviceToHost
; CHECK: call void @polly_stopTimerByCudaEvent
; CHECK: call void @polly_cleanupGPGPUResources

View File

@ -1,109 +0,0 @@
; REQUIRES: nvptx-registered-target
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=transformed+gpu -enable-polly-gpgpu -polly-gpgpu-triple=nvptx64-unknown-unknown -polly-codegen < %s -S | FileCheck %s
; This test case is currently broken.
; XFAIL: *
;int A[128][128];
;
;int gpu_no_pure() {
; int i,j,k;
;
; for(i = 0; i < 128; i++)
; for(j = 0; j < 128; j++)
; for(k = 0; k < 256; k++)
; A[i][j] += i*123/(k+1)+5-j*k-123;
;
; return 0;
;}
;
;int main() {
; int b = gpu_no_pure();
; return 0;
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [128 x [128 x i32]] zeroinitializer, align 16
define i32 @gpu_no_pure() nounwind uwtable {
entry:
br label %for.cond
for.cond: ; preds = %for.inc16, %entry
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %for.inc16 ], [ 0, %entry ]
%lftr.wideiv5 = trunc i64 %indvars.iv2 to i32
%exitcond6 = icmp ne i32 %lftr.wideiv5, 128
br i1 %exitcond6, label %for.body, label %for.end18
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc13, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc13 ], [ 0, %for.body ]
%lftr.wideiv = trunc i64 %indvars.iv to i32
%exitcond1 = icmp ne i32 %lftr.wideiv, 128
br i1 %exitcond1, label %for.body3, label %for.end15
for.body3: ; preds = %for.cond1
br label %for.cond4
for.cond4: ; preds = %for.inc, %for.body3
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %k.0, 256
br i1 %exitcond, label %for.body6, label %for.end
for.body6: ; preds = %for.cond4
%tmp = mul nsw i64 %indvars.iv2, 123
%add = add nsw i32 %k.0, 1
%tmp7 = trunc i64 %tmp to i32
%div = sdiv i32 %tmp7, %add
%add7 = add nsw i32 %div, 5
%tmp8 = trunc i64 %indvars.iv to i32
%mul8 = mul nsw i32 %tmp8, %k.0
%sub = sub nsw i32 %add7, %mul8
%sub9 = add nsw i32 %sub, -123
%arrayidx11 = getelementptr inbounds [128 x [128 x i32]]* @A, i64 0, i64 %indvars.iv2, i64 %indvars.iv
%tmp9 = load i32* %arrayidx11, align 4
%add12 = add nsw i32 %tmp9, %sub9
store i32 %add12, i32* %arrayidx11, align 4
br label %for.inc
for.inc: ; preds = %for.body6
%inc = add nsw i32 %k.0, 1
br label %for.cond4
for.end: ; preds = %for.cond4
br label %for.inc13
for.inc13: ; preds = %for.end
%indvars.iv.next = add i64 %indvars.iv, 1
br label %for.cond1
for.end15: ; preds = %for.cond1
br label %for.inc16
for.inc16: ; preds = %for.end15
%indvars.iv.next3 = add i64 %indvars.iv2, 1
br label %for.cond
for.end18: ; preds = %for.cond
ret i32 0
}
define i32 @main() nounwind uwtable {
entry:
%call = call i32 @gpu_no_pure()
ret i32 0
}
; CHECK: call void @polly_initDevice
; CHECK: call void @polly_getPTXModule
; CHECK: call void @polly_getPTXKernelEntry
; CHECK: call void @polly_allocateMemoryForHostAndDevice
; CHECK: call void @polly_setKernelParameters
; CHECK: call void @polly_startTimerByCudaEvent
; CHECK: call void @polly_launchKernel
; CHECK: call void @polly_copyFromDeviceToHost
; CHECK: call void @polly_stopTimerByCudaEvent
; CHECK: call void @polly_cleanupGPGPUResources

View File

@ -1,17 +0,0 @@
{
"context" : "{ : }",
"name" : "%1 => %5",
"statements" : [
{
"accesses" : [
{
"kind" : "write",
"relation" : "{ Stmt_1[i0] -> MemRef_A[i0] }"
}
],
"domain" : "{ Stmt_1[i0] : i0 >= 0 and i0 <= 1023 }",
"name" : "Stmt_1",
"schedule" : "{ Stmt_1[i0] -> scattering[0, i0, 0] }"
}
]
}

View File

@ -1,17 +0,0 @@
{
"context" : "{ : }",
"name" : "%1 => %5",
"statements" : [
{
"accesses" : [
{
"kind" : "write",
"relation" : "{ Stmt_1[i0] -> MemRef_A[i0] }"
}
],
"domain" : "{ Stmt_1[i0] : i0 >= 0 and i0 <= 1023 }",
"name" : "Stmt_1",
"schedule" : "{ Stmt_1[i0] -> scattering[0, o0, o1, o2, o3] : o0 >= 0 and o0 <= 1 and o1 >= 0 and o1 <= 1 and o2 >= 0 and o2 <= 15 and o3 >= 0 and o3 <= 15 and i0 = 512o0 + 256o1 + 16o2 + o3 }"
}
]
}

View File

@ -1,21 +0,0 @@
{
"context" : "{ : }",
"name" : "for.cond => for.end18",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[128i0 + i1] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[128i0 + i1] }"
}
],
"domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 127 and i1 >= 0 and i1 <= 127 and i2 >= 0 and i2 <= 255 }",
"name" : "Stmt_for_body6",
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] }"
}
]
}

View File

@ -1,21 +0,0 @@
{
"context" : "{ : }",
"name" : "for.cond => for.end18",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[128i0 + i1] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_A[128i0 + i1] }"
}
],
"domain" : "{ Stmt_for_body6[i0, i1, i2] : i0 >= 0 and i0 <= 127 and i1 >= 0 and i1 <= 127 and i2 >= 0 and i2 <= 255 }",
"name" : "Stmt_for_body6",
"schedule" : "{ Stmt_for_body6[i0, i1, i2] -> scattering[0, o0, o1, o2, o3, i2, 0] : o0 >= 0 and o0 <= 7 and o1 >= 0 and o1 <= 15 and o2 >= 0 and o2 <= 7 and o3 >= 0 and o3 <= 15 and i0 = 16o0 + o1 and i1 = 16o2 + o3 }"
}
]
}

View File

@ -1,17 +0,0 @@
{
"context" : "{ : }",
"name" : "for.cond => for.end8",
"statements" : [
{
"accesses" : [
{
"kind" : "write",
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_A[128i0 + i1] }"
}
],
"domain" : "{ Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= 127 and i1 >= 0 and i1 <= 127 }",
"name" : "Stmt_for_body3",
"schedule" : "{ Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0] }"
}
]
}

View File

@ -1,17 +0,0 @@
{
"context" : "{ : }",
"name" : "for.cond => for.end8",
"statements" : [
{
"accesses" : [
{
"kind" : "write",
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_A[128i0 + i1] }"
}
],
"domain" : "{ Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= 127 and i1 >= 0 and i1 <= 127 }",
"name" : "Stmt_for_body3",
"schedule" : "{ Stmt_for_body3[i0, i1] -> scattering[0, o0, o1, o2, o3]: o0 >= 0 and o0 <= 7 and o1 >= 0 and o1 <= 15 and o2 >= 0 and o2 <= 7 and o3 >= 0 and o3 <= 15 and i0 = 16o0 + o1 and i1 = 16o2 + o3 }"
}
]
}

View File

@ -1,5 +0,0 @@
config.suffixes = ['.ll']
gpgpu = config.root.enable_gpgpu_codegen
if gpgpu not in ['TRUE', 'true', 'yes', 'YES']:
config.unsupported = True

View File

@ -1,44 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-openmp < %s -S | FileCheck %s
;
;void f(int * restrict A, int * restrict B, int n) {
; for (int i = 0; i < n; i++)
; A[i] = B[i] * 2;
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @f(i32* noalias %A, i32* noalias %B, i32 %n) nounwind uwtable {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%tmp = trunc i64 %indvars.iv to i32
%cmp = icmp slt i32 %tmp, %n
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv
%tmp1 = load i32* %arrayidx, align 4
%mul = shl nsw i32 %tmp1, 1
%arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
store i32 %mul, i32* %arrayidx2, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}
; CHECK: %polly.par.userContext[[NO:[0-9]*]] = bitcast i8* %polly.par.userContext to { i32, i32*, i32* }*
; CHECK: %0 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 0
; CHECK: %1 = load i32* %0
; CHECK: %2 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 1
; CHECK: %3 = load i32** %2
; CHECK: %4 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 2
; CHECK: %5 = load i32** %4

View File

@ -1,40 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -enable-polly-openmp -disable-verify -S < %s | FileCheck %s
;#define N 10
;
;void foo() {
; float A[N];
;
; for (int i=0; i < N; i++)
; A[i] = 10;
;
; return;
;}
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-pc-linux-gnu"
define void @foo() nounwind {
entry:
%A = alloca [10 x float], align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%cmp = icmp slt i32 %i.0, 10
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds [10 x float]* %A, i32 0, i32 %i.0
store float 1.000000e+01, float* %arrayidx
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}
; CHECK: store [10 x float]* %A, [10 x float]**

View File

@ -1,56 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-cloog -analyze -S < %s | FileCheck %s -check-prefix=CLOOG
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-openmp -S < %s | FileCheck %s
;
; Test case that checks that after the parallel loop on j the value for i is
; taken from the right temporary (in particular, _not_ the temporary used for i
; in the OpenMP subfunction for the loop on j).
;
; void f(long * restrict A) {
; long i, j;
; for (i=0; i<100; ++i) {
; #pragma omp parallel
; for (j=0; j<100; ++j)
; A[j] += i;
; A[i] = 42;
; }
; }
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @f(i64* noalias nocapture %A) {
entry:
br label %for.i
for.i:
%i = phi i64 [ %i.next, %for.end ], [ 0, %entry ]
br label %for.j
for.j: ; preds = %for.j, %for.i
%j = phi i64 [ 0, %for.i ], [ %j.next, %for.j ]
%i.arrayidx = getelementptr inbounds i64* %A, i64 %j
%load = load i64* %i.arrayidx
%add = add nsw i64 %load, %i
store i64 %add, i64* %i.arrayidx
%j.next = add i64 %j, 1
%j.exitcond = icmp eq i64 %j.next, 100
br i1 %j.exitcond, label %for.end, label %for.j
for.end: ; preds = %for.j
%j.arrayidx = getelementptr inbounds i64* %A, i64 %i
store i64 42, i64* %j.arrayidx
%i.next = add i64 %i, 1
%i.exitcond = icmp eq i64 %i.next, 100
br i1 %i.exitcond, label %end, label %for.i
end: ; preds = %for.end, %entry
ret void
}
; CLOOG: for (c2=0;c2<=99;c2++) {
; CLOOG: for (c4=0;c4<=99;c4++) {
; CLOOG: Stmt_for_j(c2,c4);
; CLOOG: }
; CLOOG: Stmt_for_end(c2);
; CLOOG: }
; CHECK: @f.polly.subfn

View File

@ -1,34 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -enable-polly-openmp -S < %s | FileCheck %s
;
; 'arg' has the same type as A[i], i.e., the function argument has to be
; copied to the function generated for the loop.
;
; float A[100];
; void copy_in_test(float arg) {
; long i;
; for (i=0; i<100; ++i)
; A[i] = arg;
; }
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x float] zeroinitializer, align 16
define void @copy_in_test(float %arg) nounwind uwtable {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds [100 x float]* @A, i64 0, i64 %indvars.iv
store float %arg, float* %arrayidx
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 100
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; CHECK: %polly.par.userContext = alloca { float }

View File

@ -1,35 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -enable-polly-openmp -S < %s | FileCheck %s
;
; 'arg' is converted to float before the loop, so the corresponding temporary
; has to be copied to the function generated for the loop.
;
; float A[100];
; void copy_in_test(long arg) {
; long i;
; for (i=0; i<100; ++i)
; A[i] = arg;
; }
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [100 x float] zeroinitializer, align 16
define void @copy_in_test(i64 %arg) nounwind uwtable {
entry:
%conv = sitofp i64 %arg to float
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds [100 x float]* @A, i64 0, i64 %indvars.iv
store float %conv, float* %arrayidx
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 100
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
; CHECK: %polly.par.userContext = alloca { float }

View File

@ -1,56 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -enable-polly-openmp -S < %s | FileCheck %s
;#define N 10
;
;void foo() {
; float A[N];
; int i = 0;
;
; for (i=0; i < N; i++)
; A[i] = 10;
;
; return;
;}
;
;
;int main()
;{
; foo();
;}
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-pc-linux-gnu"
define void @foo() nounwind {
entry:
%A = alloca [10 x float], align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%cmp = icmp slt i32 %i.0, 10
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds [10 x float]* %A, i32 0, i32 %i.0
store float 1.000000e+01, float* %arrayidx
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}
define i32 @main() nounwind {
entry:
call void @foo()
ret i32 0
}
; CHECK: %[[V:[._a-zA-Z0-9]+]] = getelementptr inbounds { [10 x float]* }* %polly.par.userContext, i32 0, i32 0
; CHECK: store [10 x float]* %A, [10 x float]** %[[V]]
; CHECK: inbounds { [10 x float]* }* %polly.par.userContext{{[0-9]*}}, i32 0, i32 0
; CHECK: load [10 x float]**

View File

@ -1,50 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -enable-polly-openmp -analyze < %s 2>&1 | FileCheck %s
;#define N 500000
;float A[N];
;int main() {
; int j, k;
;
; for(k = 0; k < N; k++)
; for (j = 0; j <= N; j++)
; A[j] = k;
;
; return 0;
;}
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-pc-linux-gnu"
@A = common global [500000 x float] zeroinitializer, align 4
define i32 @main() nounwind {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc8, %entry.split
%0 = phi i32 [ 0, %entry.split ], [ %inc10, %for.inc8 ]
br label %for.body4
for.body4: ; preds = %for.body4, %for.cond1.preheader
%j.01 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ]
%arrayidx = getelementptr [500000 x float]* @A, i32 0, i32 %j.01
%conv = sitofp i32 %0 to float
store float %conv, float* %arrayidx, align 4
%inc = add nsw i32 %j.01, 1
%exitcond = icmp eq i32 %inc, 500001
br i1 %exitcond, label %for.inc8, label %for.body4
for.inc8: ; preds = %for.body4
%inc10 = add nsw i32 %0, 1
%exitcond3 = icmp eq i32 %inc10, 500000
br i1 %exitcond3, label %for.end11, label %for.cond1.preheader
for.end11: ; preds = %for.inc8
ret i32 0
}
; CHECK-NOT: Checking region: omp.setup

View File

@ -1,65 +0,0 @@
; RUN: opt %loadPolly -S -polly-codegen -enable-polly-openmp < %s | FileCheck %s
;
; Check that we allocate the parallel context in the entry block and use
; lifetime markers to mark the live range.
;
; CHECK: entry:
; CHECK: %polly.par.userContext = alloca { i32* }
; CHECK: br label %while.cond
;
; CHECK: polly.start:
; CHECK-NEXT: %[[BC1:[._0-9a-zA-Z]*]] = bitcast { i32* }* %polly.par.userContext to i8*
; CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* %[[BC1]])
; CHECK-NEXT: %[[GEP:[._0-9a-zA-Z]*]] = getelementptr inbounds { i32* }* %polly.par.userContext, i32 0, i32 0
; CHECK-NEXT: store i32* %A, i32** %[[GEP]]
; CHECK-NEXT: %polly.par.userContext{{[0-9]*}} = bitcast { i32* }* %polly.par.userContext to i8*
; CHECK-NEXT: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @jd.polly.subfn, i8* %polly.par.userContext{{[0-9]*}}, i32 0, i64 0, i64 1024, i64 1)
; CHECK-NEXT: call void @jd.polly.subfn(i8* %polly.par.userContext{{[0-9]*}})
; CHECK-NEXT: call void @GOMP_parallel_end()
; CHECK-NEXT: %[[BC2:[._0-9a-zA-Z]*]] = bitcast { i32* }* %polly.par.userContext to i8*
; CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* %[[BC2]])
; CHECK-NEXT: br label %polly.merge_new_and_old
; int cond();
; void jd(int *A) {
; while (cond())
; for (int j = 0; j < 1024; j++)
; A[j] = 1;
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @jd(i32* %A) {
entry:
br label %while.cond
while.cond: ; preds = %for.end, %entry
%call = call i32 (...)* @cond() #2
%tobool = icmp eq i32 %call, 0
br i1 %tobool, label %while.end, label %while.body
while.body: ; preds = %while.cond
br label %for.cond
for.cond: ; preds = %for.inc, %while.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %while.body ]
%exitcond = icmp ne i64 %indvars.iv, 1024
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
store i32 1, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
br label %while.cond
while.end: ; preds = %while.cond
ret void
}
declare i32 @cond(...) #1

View File

@ -1,3 +0,0 @@
cloog = config.root.cloog_found
if cloog not in ['TRUE', 'true', 'yes', 'YES'] :
config.unsupported = True

View File

@ -1,208 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck -check-prefix=IMPORT %s
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck -check-prefix=IMPORT %s
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-postfix=tiled -polly-import-jscop-dir=%S -polly-cloog -polly-codegen -enable-polly-openmp -analyze -disable-polly-legality < %s | FileCheck -check-prefix=TILED %s
;#define M 1024
;#define N 1024
;#define K 1024
;
;float A[M][K], B[K][N], C[M][N], X[K];
;
;float parallel_loop() {
; int i, j, k;
;
; for (i = 0; i < M; i++)
; for (j = 0; j< N; j++)
; for (k = 0; k < K; k++)
; C[i][j] += A[i][k] * B[k][j];
;
; for (i = 0; i < M; i++)
; for (j = 0; j < N; j++)
; for (k = 0; k < K; k++)
; X[k] += X[k];
;
; return C[42][42] + X[42];
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x [1024 x float]] zeroinitializer, align 16
@B = common global [1024 x [1024 x float]] zeroinitializer, align 16
@C = common global [1024 x [1024 x float]] zeroinitializer, align 16
@X = common global [1024 x float] zeroinitializer, align 16
define float @parallel_loop() nounwind {
bb:
br label %bb18
bb18: ; preds = %bb32, %bb
%indvar9 = phi i64 [ %indvar.next10, %bb32 ], [ 0, %bb ]
%exitcond15 = icmp ne i64 %indvar9, 1024
br i1 %exitcond15, label %bb19, label %bb33
bb19: ; preds = %bb18
br label %bb20
bb20: ; preds = %bb30, %bb19
%indvar6 = phi i64 [ %indvar.next7, %bb30 ], [ 0, %bb19 ]
%scevgep14 = getelementptr [1024 x [1024 x float]]* @C, i64 0, i64 %indvar9, i64 %indvar6
%exitcond12 = icmp ne i64 %indvar6, 1024
br i1 %exitcond12, label %bb21, label %bb31
bb21: ; preds = %bb20
br label %bb22
bb22: ; preds = %bb28, %bb21
%indvar3 = phi i64 [ %indvar.next4, %bb28 ], [ 0, %bb21 ]
%scevgep11 = getelementptr [1024 x [1024 x float]]* @A, i64 0, i64 %indvar9, i64 %indvar3
%scevgep8 = getelementptr [1024 x [1024 x float]]* @B, i64 0, i64 %indvar3, i64 %indvar6
%exitcond5 = icmp ne i64 %indvar3, 1024
br i1 %exitcond5, label %bb23, label %bb29
bb23: ; preds = %bb22
%tmp = load float* %scevgep11, align 4
%tmp24 = load float* %scevgep8, align 4
%tmp25 = fmul float %tmp, %tmp24
%tmp26 = load float* %scevgep14, align 4
%tmp27 = fadd float %tmp26, %tmp25
store float %tmp27, float* %scevgep14, align 4
br label %bb28
bb28: ; preds = %bb23
%indvar.next4 = add i64 %indvar3, 1
br label %bb22
bb29: ; preds = %bb22
br label %bb30
bb30: ; preds = %bb29
%indvar.next7 = add i64 %indvar6, 1
br label %bb20
bb31: ; preds = %bb20
br label %bb32
bb32: ; preds = %bb31
%indvar.next10 = add i64 %indvar9, 1
br label %bb18
bb33: ; preds = %bb18
br label %bb34
bb34: ; preds = %bb48, %bb33
%i.1 = phi i32 [ 0, %bb33 ], [ %tmp49, %bb48 ]
%exitcond2 = icmp ne i32 %i.1, 1024
br i1 %exitcond2, label %bb35, label %bb50
bb35: ; preds = %bb34
br label %bb36
bb36: ; preds = %bb45, %bb35
%j.1 = phi i32 [ 0, %bb35 ], [ %tmp46, %bb45 ]
%exitcond1 = icmp ne i32 %j.1, 1024
br i1 %exitcond1, label %bb37, label %bb47
bb37: ; preds = %bb36
br label %bb38
bb38: ; preds = %bb43, %bb37
%indvar = phi i64 [ %indvar.next, %bb43 ], [ 0, %bb37 ]
%scevgep = getelementptr [1024 x float]* @X, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %bb39, label %bb44
bb39: ; preds = %bb38
%tmp40 = load float* %scevgep, align 4
%tmp41 = load float* %scevgep, align 4
%tmp42 = fadd float %tmp41, %tmp40
store float %tmp42, float* %scevgep, align 4
br label %bb43
bb43: ; preds = %bb39
%indvar.next = add i64 %indvar, 1
br label %bb38
bb44: ; preds = %bb38
br label %bb45
bb45: ; preds = %bb44
%tmp46 = add nsw i32 %j.1, 1
br label %bb36
bb47: ; preds = %bb36
br label %bb48
bb48: ; preds = %bb47
%tmp49 = add nsw i32 %i.1, 1
br label %bb34
bb50: ; preds = %bb34
%tmp51 = load float* getelementptr inbounds ([1024 x [1024 x float]]* @C, i64 0, i64 42, i64 42), align 8
%tmp52 = load float* getelementptr inbounds ([1024 x float]* @X, i64 0, i64 42), align 8
%tmp53 = fadd float %tmp51, %tmp52
ret float %tmp53
}
; CHECK: for (c2=0;c2<=1023;c2++) {
; CHECK: for (c4=0;c4<=1023;c4++) {
; CHECK: for (c6=0;c6<=1023;c6++) {
; CHECK: Stmt_bb23(c2,c4,c6);
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: for (c2=0;c2<=1023;c2++) {
; CHECK: for (c4=0;c4<=1023;c4++) {
; CHECK: for (c6=0;c6<=1023;c6++) {
; CHECK: Stmt_bb39(c2,c4,c6);
; CHECK: }
; CHECK: }
; CHECK: }
; CHECK: Parallel loop with iterator 'c2' generated
; CHECK: Parallel loop with iterator 'c6' generated
; CHECK-NOT: Parallel loop
; IMPORT: for (c2=0;c2<=1023;c2++) {
; IMPORT: for (c4=0;c4<=1023;c4++) {
; IMPORT: for (c6=0;c6<=1023;c6++) {
; IMPORT: Stmt_bb23(c2,c4,c6);
; IMPORT: Stmt_bb39(c2,c4,c6);
; IMPORT: }
; IMPORT: }
; IMPORT: }
; IMPORT-NOT: Parallel loop
; TILED: for (c2=0;c2<=1023;c2+=4) {
; TILED: for (c4=0;c4<=1023;c4+=4) {
; TILED: for (c6=0;c6<=1023;c6+=4) {
; TILED: for (c8=c2;c8<=c2+3;c8++) {
; TILED: for (c9=c4;c9<=c4+3;c9++) {
; TILED: for (c10=c6;c10<=c6+3;c10++) {
; TILED: Stmt_bb23(c8,c9,c10);
; TILED: }
; TILED: }
; TILED: }
; TILED: }
; TILED: }
; TILED: }
; TILED: for (c2=0;c2<=1023;c2+=4) {
; TILED: for (c4=0;c4<=1023;c4+=4) {
; TILED: for (c6=0;c6<=1023;c6+=4) {
; TILED: for (c8=c2;c8<=c2+3;c8++) {
; TILED: for (c9=c4;c9<=c4+3;c9++) {
; TILED: for (c10=c6;c10<=c6+3;c10++) {
; TILED: Stmt_bb39(c8,c9,c10);
; TILED: }
; TILED: }
; TILED: }
; TILED: }
; TILED: }
; TILED: }
; I am not sure if we actually may have parallel loops here. The dependency
; analysis does not detect any. This may however be because we do not
; correctly update the imported schedule. Add a check that hopefully fails
; after this is corrected. Or someone proves there are no parallel loops and
; we can remove this comment.
; TILDED-NOT: Parallel loop

View File

@ -1,42 +0,0 @@
{
"name": "bb18 => bb50",
"context": "{ [] }",
"statements": [{
"name": "Stmt_bb23",
"domain": "{ Stmt_bb23[i0, i1, i2] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 1023 }",
"schedule": "{ Stmt_bb23[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] }",
"accesses": [{
"kind": "read",
"relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_A[1024i0 + i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_B[i1 + 1024i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_C[1024i0 + i1] }"
},
{
"kind": "write",
"relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_C[1024i0 + i1] }"
}]
},
{
"name": "Stmt_bb39",
"domain": "{ Stmt_bb39[i0, i1, i2] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 1023 }",
"schedule": "{ Stmt_bb39[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, i2, 0] }",
"accesses": [{
"kind": "read",
"relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }"
},
{
"kind": "write",
"relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }"
}]
}]
}

View File

@ -1,42 +0,0 @@
{
"name": "bb18 => bb50",
"context": "{ [] }",
"statements": [{
"name": "Stmt_bb23",
"domain": "{ Stmt_bb23[i0, i1, i2] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 1023 }",
"schedule": "{ Stmt_bb23[i0, i1, i2] -> scattering[0, o0, 0, o1, 0, o2, 0, i0, i1, i2] : (exists e0, e1, e2: 4e0 = o0 and 4e1 = o1 and 4e2 = o2 and o0 <= i0 <= 3 + o0 and o1 <= i1 <= 3 + o1 and o2 <= i2 <= 3 + o2)}",
"accesses": [{
"kind": "read",
"relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_A[1024i0 + i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_B[i1 + 1024i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_C[1024i0 + i1] }"
},
{
"kind": "write",
"relation": "{ Stmt_bb23[i0, i1, i2] -> MemRef_C[1024i0 + i1] }"
}]
},
{
"name": "Stmt_bb39",
"domain": "{ Stmt_bb39[i0, i1, i2] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 1023 }",
"schedule": "{ Stmt_bb39[i0, i1, i2] -> scattering[1, o0, 0, o1, 0, o2, 0, i0, i1, i2] : (exists e0, e1, e2: 4e0 = o0 and 4e1 = o1 and 4e2 = o2 and o0 <= i0 <= 3 + o0 and o1 <= i1 <= 3 + o1 and o2 <= i2 <= 3 + o2)}",
"accesses": [{
"kind": "read",
"relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }"
},
{
"kind": "write",
"relation": "{ Stmt_bb39[i0, i1, i2] -> MemRef_X[i2] }"
}]
}]
}

View File

@ -1,72 +0,0 @@
; RUN: opt %loadPolly -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck %s
;#define M 1024
;#define N 1024
;#define K 1024
;
;float X[K];
;
;float parallel_loop_simple() {
; int i, k;
;
; for (i = 0; i < M; i++)
; for (k = 0; k < K; k++)
; X[k] += X[k];
;
; return X[42];
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@X = common global [1024 x float] zeroinitializer, align 16
define float @parallel_loop_simple() nounwind {
bb:
br label %bb2
bb2: ; preds = %bb10, %bb
%i.0 = phi i32 [ 0, %bb ], [ %tmp11, %bb10 ]
%exitcond1 = icmp ne i32 %i.0, 1024
br i1 %exitcond1, label %bb3, label %bb12
bb3: ; preds = %bb2
br label %bb4
bb4: ; preds = %bb8, %bb3
%indvar = phi i64 [ %indvar.next, %bb8 ], [ 0, %bb3 ]
%scevgep = getelementptr [1024 x float]* @X, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %bb5, label %bb9
bb5: ; preds = %bb4
%tmp = load float* %scevgep, align 4
%tmp6 = load float* %scevgep, align 4
%tmp7 = fadd float %tmp6, %tmp
store float %tmp7, float* %scevgep, align 4
br label %bb8
bb8: ; preds = %bb5
%indvar.next = add i64 %indvar, 1
br label %bb4
bb9: ; preds = %bb4
br label %bb10
bb10: ; preds = %bb9
%tmp11 = add nsw i32 %i.0, 1
br label %bb2
bb12: ; preds = %bb2
%tmp13 = load float* getelementptr inbounds ([1024 x float]* @X, i64 0, i64 42), align 8
ret float %tmp13
}
; CHECK: for (c2=0;c2<=1023;c2++) {
; CHECK: for (c4=0;c4<=1023;c4++) {
; CHECK: Stmt_bb5(c2,c4);
; CHECK: }
; CHECK: }
; CHECK: Parallel loop with iterator 'c4' generated
; CHECK-NOT: Parallel loop

View File

@ -1,80 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck %s
;#define N 1024
;
;float C[N], X[N];
;
;float parallel_loop_simple2() {
; int j;
;
; for (j = 0; j < N; j++)
; C[j] = j;
;
; for (j = 0; j < N; j++)
; X[j] += X[j];
;
; return C[42] + X[42];
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@C = common global [1024 x float] zeroinitializer, align 16
@X = common global [1024 x float] zeroinitializer, align 16
define float @parallel_loop_simple2() nounwind {
bb:
br label %bb5
bb5: ; preds = %bb7, %bb
%indvar1 = phi i64 [ %indvar.next2, %bb7 ], [ 0, %bb ]
%scevgep4 = getelementptr [1024 x float]* @C, i64 0, i64 %indvar1
%j.0 = trunc i64 %indvar1 to i32
%exitcond3 = icmp ne i64 %indvar1, 1024
br i1 %exitcond3, label %bb6, label %bb8
bb6: ; preds = %bb5
%tmp = sitofp i32 %j.0 to float
store float %tmp, float* %scevgep4, align 4
br label %bb7
bb7: ; preds = %bb6
%indvar.next2 = add i64 %indvar1, 1
br label %bb5
bb8: ; preds = %bb5
br label %bb9
bb9: ; preds = %bb14, %bb8
%indvar = phi i64 [ %indvar.next, %bb14 ], [ 0, %bb8 ]
%scevgep = getelementptr [1024 x float]* @X, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %bb10, label %bb15
bb10: ; preds = %bb9
%tmp11 = load float* %scevgep, align 4
%tmp12 = load float* %scevgep, align 4
%tmp13 = fadd float %tmp12, %tmp11
store float %tmp13, float* %scevgep, align 4
br label %bb14
bb14: ; preds = %bb10
%indvar.next = add i64 %indvar, 1
br label %bb9
bb15: ; preds = %bb9
%tmp16 = load float* getelementptr inbounds ([1024 x float]* @C, i64 0, i64 42), align 8
%tmp17 = load float* getelementptr inbounds ([1024 x float]* @X, i64 0, i64 42), align 8
%tmp18 = fadd float %tmp16, %tmp17
ret float %tmp18
}
; CHECK: for (c2=0;c2<=1023;c2++) {
; CHECK: Stmt_bb6(c2);
; CHECK: }
; CHECK: for (c2=0;c2<=1023;c2++) {
; CHECK: Stmt_bb10(c2);
; CHECK: }
; CHECK: Parallel loop with iterator 'c2' generated
; CHECK: Parallel loop with iterator 'c2' generated
; CHECK-NOT: Parallel loop

View File

@ -1,36 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s -enable-polly-openmp -S | FileCheck %s
;
; This test case implements the following code:
;
; for (i = 0; i < 1024; i++)
; A[i] = A[i] * param
;
; The problem is that 'param' is not references in any subscript of loop
; bound, but it must still be forwarded to the OpenMP subfunction.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define void @foo(double %param, [1024 x double]* %A) {
entry:
br label %for.preheader
for.preheader:
br label %for.body
for.body:
%indvar = phi i64 [ 0, %for.preheader ], [ %indvar.next, %for.inc ]
%arrayidx = getelementptr [1024 x double]* %A, i64 0, i64 %indvar
%val = load double* %arrayidx
%mul = fmul double %param, %val
store double %mul, double* %arrayidx, align 8
br label %for.inc
for.inc:
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
; CHECK: @foo.polly.subfn

View File

@ -1,92 +0,0 @@
; RUN: opt %loadPolly -polly-codegen -enable-polly-openmp -verify-dom-info -S < %s | FileCheck %s
;#include <string.h>
;#define N 10
;
;double A[N];
;double B[N];
;
;void loop_openmp() {
; for (int i = 0; i < N; i++) {
; for (int j = 0; j < N; j++) {
; A[j] += j;
; }
; }
;}
;
;int main () {
; memset(A, 0, sizeof(float) * N);
;
; loop_openmp();
;
; return 0;
;}
;
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-pc-linux-gnu"
@A = common global [10 x double] zeroinitializer, align 4
@B = common global [10 x double] zeroinitializer, align 4
define void @loop_openmp() nounwind {
entry:
br label %for.cond
for.cond: ; preds = %for.inc10, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
%exitcond1 = icmp ne i32 %i.0, 10
br i1 %exitcond1, label %for.body, label %for.end13
for.body: ; preds = %for.cond
br label %for.cond2
for.cond2: ; preds = %for.inc, %for.body
%tmp = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%arrayidx = getelementptr [10 x double]* @A, i32 0, i32 %tmp
%exitcond = icmp ne i32 %tmp, 10
br i1 %exitcond, label %for.body5, label %for.end
for.body5: ; preds = %for.cond2
%conv = sitofp i32 %tmp to double
%tmp8 = load double* %arrayidx, align 4
%add = fadd double %tmp8, %conv
store double %add, double* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body5
%inc = add nsw i32 %tmp, 1
br label %for.cond2
for.end: ; preds = %for.cond2
br label %for.inc10
for.inc10: ; preds = %for.end
%inc12 = add nsw i32 %i.0, 1
br label %for.cond
for.end13: ; preds = %for.cond
ret void
}
define i32 @main() nounwind {
entry:
call void @llvm.memset.p0i8.i32(i8* bitcast ([10 x double]* @A to i8*), i8 0, i32 40, i32 4, i1 false)
call void @loop_openmp()
ret i32 0
}
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
; CHECK: %polly.par.userContext = alloca { i32 }
; CHECK: %[[NO:[._a-zA-Z0-9]*]] = getelementptr inbounds { i32 }* %polly.par.userContext, i32 0, i32 0
; CHECK: store i32 %polly.indvar, i32* %[[NO]]
; CHECK: %[[DATA:[._a-zA-Z0-9]*]] = bitcast { i32 }* %polly.par.userContext to i8*
; CHECK: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @loop_openmp.polly.subfn, i8* %[[DATA]], i32 0, i32 0, i32 10, i32 1)
; CHECK: call void @loop_openmp.polly.subfn(i8* %[[DATA]])
; CHECK: call void @GOMP_parallel_end()
; Verify the new subfunction is annotated such that SCoP detection will skip it.
; CHECK: @loop_openmp.polly.subfn({{.*}}) [[ATTR:#[0-9]+]]
; CHECK: attributes [[ATTR]] = {{{[^\}]*}}polly.skip.fn{{[^\}]*}}}

View File

@ -1,105 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-openmp -S < %s | FileCheck %s
;#include <string.h>
;#include <stdio.h>
;#define N 5
;
;float A[N];
;float B[N];
;
;void loop1_openmp() {
; for (int i = 0; i <= N; i++)
; A[i] = 0;
;
; for (int j = 0; j <= N; j++)
; for (int k = 0; k <= N; k++)
; B[k] += j;
;}
;
;int main () {
; int i;
; memset(A, 0, sizeof(float) * N);
; memset(B, 0, sizeof(float) * N);
;
; loop1_openmp();
;
; return 0;
;}
;
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-pc-linux-gnu"
@A = common global [5 x float] zeroinitializer, align 4
@B = common global [5 x float] zeroinitializer, align 4
define void @loop1_openmp() nounwind {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%arrayidx = getelementptr [5 x float]* @A, i32 0, i32 %i.0
%exitcond2 = icmp ne i32 %i.0, 6
br i1 %exitcond2, label %for.body, label %for.end
for.body: ; preds = %for.cond
store float 0.000000e+00, float* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
br label %for.cond4
for.cond4: ; preds = %for.inc21, %for.end
%tmp = phi i32 [ 0, %for.end ], [ %inc23, %for.inc21 ]
%exitcond1 = icmp ne i32 %tmp, 6
br i1 %exitcond1, label %for.body7, label %for.end24
for.body7: ; preds = %for.cond4
br label %for.cond9
for.cond9: ; preds = %for.inc17, %for.body7
%k.0 = phi i32 [ 0, %for.body7 ], [ %inc19, %for.inc17 ]
%arrayidx15 = getelementptr [5 x float]* @B, i32 0, i32 %k.0
%exitcond = icmp ne i32 %k.0, 6
br i1 %exitcond, label %for.body12, label %for.end20
for.body12: ; preds = %for.cond9
%conv = sitofp i32 %tmp to float
%tmp16 = load float* %arrayidx15, align 4
%add = fadd float %tmp16, %conv
store float %add, float* %arrayidx15, align 4
br label %for.inc17
for.inc17: ; preds = %for.body12
%inc19 = add nsw i32 %k.0, 1
br label %for.cond9
for.end20: ; preds = %for.cond9
br label %for.inc21
for.inc21: ; preds = %for.end20
%inc23 = add nsw i32 %tmp, 1
br label %for.cond4
for.end24: ; preds = %for.cond4
ret void
}
define i32 @main() nounwind {
entry:
call void @llvm.memset.p0i8.i32(i8* bitcast ([5 x float]* @A to i8*), i8 0, i32 20, i32 4, i1 false)
call void @llvm.memset.p0i8.i32(i8* bitcast ([5 x float]* @B to i8*), i8 0, i32 20, i32 4, i1 false)
call void @loop1_openmp()
ret i32 0
}
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
; CHECK-DAG: %polly.par.userContext{{[0-9]*}} = alloca {}
; CHECK-DAG: %polly.par.userContext{{[0-9]*}} = alloca { i32 }

View File

@ -1,81 +0,0 @@
; RUN: opt %loadPolly -mem2reg -polly-codegen -enable-polly-openmp -S < %s
;#include <string.h>
;#define N 10240000
;
;float A[N];
;float B[N];
;
;void loop1_openmp() {
; for (int i = 0; i <= N; i++)
; A[i] = 0;
; for (int j = 0; j <= N; j++)
; B[j] = 0;
;}
;
;
;int main () {
; int i;
; memset(A, 0, sizeof(float) * N);
; memset(B, 1, sizeof(float) * N);
;
; loop1_openmp();
;
; return 0;
;}
;
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-pc-linux-gnu"
@A = common global [10240000 x float] zeroinitializer, align 4
@B = common global [10240000 x float] zeroinitializer, align 4
define void @loop1_openmp() nounwind {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%arrayidx = getelementptr [10240000 x float]* @A, i32 0, i32 %i.0
%exitcond1 = icmp ne i32 %i.0, 10240001
br i1 %exitcond1, label %for.body, label %for.end
for.body: ; preds = %for.cond
store float 0.000000e+00, float* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
br label %for.cond4
for.cond4: ; preds = %for.inc10, %for.end
%j.0 = phi i32 [ 0, %for.end ], [ %inc12, %for.inc10 ]
%arrayidx9 = getelementptr [10240000 x float]* @B, i32 0, i32 %j.0
%exitcond = icmp ne i32 %j.0, 10240001
br i1 %exitcond, label %for.body7, label %for.end13
for.body7: ; preds = %for.cond4
store float 0.000000e+00, float* %arrayidx9, align 4
br label %for.inc10
for.inc10: ; preds = %for.body7
%inc12 = add nsw i32 %j.0, 1
br label %for.cond4
for.end13: ; preds = %for.cond4
ret void
}
define i32 @main() nounwind {
entry:
call void @llvm.memset.p0i8.i32(i8* bitcast ([10240000 x float]* @A to i8*), i8 0, i32 40960000, i32 4, i1 false)
call void @llvm.memset.p0i8.i32(i8* bitcast ([10240000 x float]* @B to i8*), i8 1, i32 40960000, i32 4, i1 false)
call void @loop1_openmp()
ret i32 0
}
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind

View File

@ -1,48 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-vectorizer=polly -enable-polly-openmp -polly-opt-isl -polly-codegen < %s
; void f(int *A, int a, int b) {
; int local = a > b ? a : b;
; int i;
; for (i = 0; i < 100; i++) {
; A[i] += local;
; }
; }
;
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
define void @f(i32* %A, i32 %a, i32 %b) {
entry:
%cmp = icmp sgt i32 %a, %b
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
br label %cond.end
cond.false: ; preds = %entry
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %a, %cond.true ], [ %b, %cond.false ]
br label %for.cond
for.cond: ; preds = %for.inc, %cond.end
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %cond.end ]
%exitcond = icmp ne i64 %indvars.iv, 100
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %cond
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -1,104 +0,0 @@
; RUN: opt %loadPolly -polly-codegen < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
%union.pthread_attr_t = type { i64, [12 x i32] }
%union.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
%union.pthread_mutexattr_t = type { i32 }
@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = weak alias i32 (i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i64)* @pthread_cancel ; <i32 (i64)*> [#uses=0]
@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%union.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%union.pthread_mutex_t*)*> [#uses=0]
@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%union.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%union.pthread_mutex_t*)*> [#uses=0]
@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%union.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%union.pthread_mutex_t*)*> [#uses=0]
@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%union.pthread_mutex_t*, %union.pthread_mutexattr_t*)* @pthread_mutex_init ; <i32 (%union.pthread_mutex_t*, %union.pthread_mutexattr_t*)*> [#uses=0]
@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_init ; <i32 (%union.pthread_mutexattr_t*)*> [#uses=0]
@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%union.pthread_mutexattr_t*, i32)* @pthread_mutexattr_settype ; <i32 (%union.pthread_mutexattr_t*, i32)*> [#uses=0]
@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_destroy ; <i32 (%union.pthread_mutexattr_t*)*> [#uses=0]
define void @_ZL6createP6node_tii3v_tS1_d() {
entry:
br i1 undef, label %bb, label %bb5
bb: ; preds = %entry
br i1 false, label %bb1, label %bb3
bb1: ; preds = %bb
br label %bb3
bb3: ; preds = %bb1, %bb
%iftmp.99.0 = phi i64 [ undef, %bb1 ], [ 1, %bb ] ; <i64> [#uses=0]
br label %bb5
bb5: ; preds = %bb3, %entry
br i1 undef, label %return, label %bb7
bb7: ; preds = %bb5
unreachable
return: ; preds = %bb5
ret void
}
define i32 @pthread_once(i32*, void ()*) {
ret i32 0
}
define i8* @pthread_getspecific(i32) {
ret i8* null
}
define i32 @pthread_setspecific(i32, i8*) {
ret i32 0
}
define i32 @pthread_create(i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*) {
ret i32 0
}
define i32 @pthread_cancel(i64) {
ret i32 0
}
define i32 @pthread_mutex_lock(%union.pthread_mutex_t*) {
ret i32 0
}
define i32 @pthread_mutex_trylock(%union.pthread_mutex_t*) {
ret i32 0
}
define i32 @pthread_mutex_unlock(%union.pthread_mutex_t*) {
ret i32 0
}
define i32 @pthread_mutex_init(%union.pthread_mutex_t*, %union.pthread_mutexattr_t*) {
ret i32 0
}
define i32 @pthread_key_create(i32*, void (i8*)*) {
ret i32 0
}
define i32 @pthread_key_delete(i32) {
ret i32 0
}
define i32 @pthread_mutexattr_init(%union.pthread_mutexattr_t*) {
ret i32 0
}
define i32 @pthread_mutexattr_settype(%union.pthread_mutexattr_t*, i32) {
ret i32 0
}
define i32 @pthread_mutexattr_destroy(%union.pthread_mutexattr_t*) {
ret i32 0
}

View File

@ -1,56 +0,0 @@
;RUN: opt %loadPolly -polly-prepare -polly-detect-scops-in-regions-without-loops -polly-detect-scops-in-functions-without-loops -polly-cloog -analyze < %s | FileCheck %s
;#include <string.h>
;int A[1];
;
;void constant_condition () {
; int a = 0;
; int b = 0;
;
; if (a == b)
; A[0] = 0;
; else
; A[0] = 1;
;}
;
;int main () {
; int i;
;
; A[0] = 2;
;
; constant_condition();
;
; return A[0];
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1 x i32] zeroinitializer, align 4 ; <[1 x i32]*> [#uses=1]
define void @constant_condition() nounwind {
bb:
%tmp = icmp eq i32 0, 0 ; <i1> [#uses=0]
br i1 true, label %bb1, label %bb2
bb1: ; preds = %bb
store i32 0, i32* getelementptr inbounds ([1 x i32]* @A, i32 0, i32 0)
br label %bb3
bb2: ; preds = %bb
store i32 1, i32* getelementptr inbounds ([1 x i32]* @A, i32 0, i32 0)
br label %bb3
bb3: ; preds = %bb2, %bb1
ret void
}
define i32 @main() nounwind {
bb:
store i32 2, i32* getelementptr inbounds ([1 x i32]* @A, i32 0, i32 0)
call void @constant_condition()
%tmp = load i32* getelementptr inbounds ([1 x i32]* @A, i32 0, i32 0) ; <i32> [#uses=1]
ret i32 %tmp
}
; CHECK: Stmt_bb1();

View File

@ -1,303 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-cloog -analyze < %s | FileCheck %s
;#define M 36
;#define N 36
;#define K 36
;#define alpha 1
;#define beta 1
;double A[M][K+13];
;double B[K][N+13];
;double C[M][N+13];
;
;#include <stdio.h>
;
;void init_array()
;{
; int i, j;
;
; for (i=0; i<N; i++) {
; for (j=0; j<N; j++) {
; A[i][j] = (i + j);
; // We do not want to optimize this.
; __sync_synchronize();
; B[i][j] = (double)(i*j);
; C[i][j] = 0.0;
; }
; }
;}
;
;
;void print_array()
;{
; int i, j;
;
; for (i=0; i<N; i++) {
; for (j=0; j<N; j++) {
; fprintf(stdout, "%lf ", C[i][j]);
; if (j%80 == 79) fprintf(stdout, "\n");
; }
; fprintf(stdout, "\n");
; }
;}
;
;
;void do_pluto_matmult(void) {
; int i, j, k;
;
; __sync_synchronize();
; i = 0;
; do {
; j = 0;
; do {
; k = 0;
; do {
; C[i][j] = beta*C[i][j] + alpha*A[i][k] * B[k][j];
; ++k;
; } while (k < K);
; ++j;
; } while (j < N);
; ++i;
; } while (i < M);
; __sync_synchronize();
;}
;
;int main()
;{
; register double s;
;
; init_array();
;
;#pragma scop
; do_pluto_matmult();
;#pragma endscop
; print_array();
;
; return 0;
;}
; RUN: opt %loadPolly -basicaa -polly-codegen -disable-output < %s
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -analyze < %s | FileCheck -check-prefix=IMPORT %s
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=valid_reverse -polly-cloog -analyze < %s | FileCheck -check-prefix=REVERSE %s > /dev/null
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=invalid_reverse -polly-cloog -analyze < %s 2>&1 | FileCheck -check-prefix=INVALID %s > /dev/null
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -analyze < %s | FileCheck -check-prefix=IMPORT %s
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen -S < %s | FileCheck -check-prefix=CODEGEN %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
@A = common global [36 x [49 x double]] zeroinitializer, align 8 ; <[36 x [49 x double]]*> [#uses=3]
@B = common global [36 x [49 x double]] zeroinitializer, align 8 ; <[36 x [49 x double]]*> [#uses=3]
@C = common global [36 x [49 x double]] zeroinitializer, align 8 ; <[36 x [49 x double]]*> [#uses=4]
@stdout = external global %struct._IO_FILE* ; <%struct._IO_FILE**> [#uses=3]
@.str = private constant [5 x i8] c"%lf \00" ; <[5 x i8]*> [#uses=1]
@.str1 = private constant [2 x i8] c"\0A\00" ; <[2 x i8]*> [#uses=1]
define void @init_array() nounwind {
entry:
br label %for.cond
for.cond: ; preds = %for.inc29, %entry
%indvar1 = phi i64 [ %indvar.next2, %for.inc29 ], [ 0, %entry ] ; <i64> [#uses=7]
%exitcond6 = icmp ne i64 %indvar1, 36 ; <i1> [#uses=1]
br i1 %exitcond6, label %for.body, label %for.end32
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ] ; <i64> [#uses=7]
%tmp7 = add i64 %indvar1, %indvar ; <i64> [#uses=1]
%add = trunc i64 %tmp7 to i32 ; <i32> [#uses=1]
%arrayidx10 = getelementptr [36 x [49 x double]]* @A, i64 0, i64 %indvar1, i64 %indvar ; <double*> [#uses=1]
%tmp9 = mul i64 %indvar1, %indvar ; <i64> [#uses=1]
%mul = trunc i64 %tmp9 to i32 ; <i32> [#uses=1]
%arrayidx20 = getelementptr [36 x [49 x double]]* @B, i64 0, i64 %indvar1, i64 %indvar ; <double*> [#uses=1]
%arrayidx27 = getelementptr [36 x [49 x double]]* @C, i64 0, i64 %indvar1, i64 %indvar ; <double*> [#uses=1]
%exitcond = icmp ne i64 %indvar, 36 ; <i1> [#uses=1]
br i1 %exitcond, label %for.body4, label %for.end
for.body4: ; preds = %for.cond1
%conv = sitofp i32 %add to double ; <double> [#uses=1]
store double %conv, double* %arrayidx10
fence seq_cst
%conv13 = sitofp i32 %mul to double ; <double> [#uses=1]
store double %conv13, double* %arrayidx20
store double 0.000000e+00, double* %arrayidx27
br label %for.inc
for.inc: ; preds = %for.body4
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc29
for.inc29: ; preds = %for.end
%indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1]
br label %for.cond
for.end32: ; preds = %for.cond
ret void
}
define void @print_array() nounwind {
entry:
br label %for.cond
for.cond: ; preds = %for.inc18, %entry
%indvar1 = phi i64 [ %indvar.next2, %for.inc18 ], [ 0, %entry ] ; <i64> [#uses=3]
%exitcond3 = icmp ne i64 %indvar1, 36 ; <i1> [#uses=1]
br i1 %exitcond3, label %for.body, label %for.end21
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ] ; <i64> [#uses=3]
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] ; <i32> [#uses=2]
%arrayidx9 = getelementptr [36 x [49 x double]]* @C, i64 0, i64 %indvar1, i64 %indvar ; <double*> [#uses=1]
%exitcond = icmp ne i64 %indvar, 36 ; <i1> [#uses=1]
br i1 %exitcond, label %for.body4, label %for.end
for.body4: ; preds = %for.cond1
%tmp5 = load %struct._IO_FILE** @stdout ; <%struct._IO_FILE*> [#uses=1]
%tmp10 = load double* %arrayidx9 ; <double> [#uses=1]
%call = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %tmp5, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %tmp10) ; <i32> [#uses=0]
%cmp12 = icmp eq i32 %j.0, 79 ; <i1> [#uses=1]
br i1 %cmp12, label %if.then, label %if.end
if.then: ; preds = %for.body4
%tmp13 = load %struct._IO_FILE** @stdout ; <%struct._IO_FILE*> [#uses=1]
%call14 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %tmp13, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0)) ; <i32> [#uses=0]
br label %if.end
if.end: ; preds = %if.then, %for.body4
br label %for.inc
for.inc: ; preds = %if.end
%inc = add nsw i32 %j.0, 1 ; <i32> [#uses=1]
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %for.cond1
for.end: ; preds = %for.cond1
%tmp16 = load %struct._IO_FILE** @stdout ; <%struct._IO_FILE*> [#uses=1]
%call17 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %tmp16, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0)) ; <i32> [#uses=0]
br label %for.inc18
for.inc18: ; preds = %for.end
%indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1]
br label %for.cond
for.end21: ; preds = %for.cond
ret void
}
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
define void @do_pluto_matmult() nounwind {
entry:
fence seq_cst
br label %do.body
do.body: ; preds = %do.cond42, %entry
%indvar3 = phi i64 [ %indvar.next4, %do.cond42 ], [ 0, %entry ] ; <i64> [#uses=3]
br label %do.body1
do.body1: ; preds = %do.cond36, %do.body
%indvar1 = phi i64 [ %indvar.next2, %do.cond36 ], [ 0, %do.body ] ; <i64> [#uses=3]
%arrayidx5 = getelementptr [36 x [49 x double]]* @C, i64 0, i64 %indvar3, i64 %indvar1 ; <double*> [#uses=2]
br label %do.body2
do.body2: ; preds = %do.cond, %do.body1
%indvar = phi i64 [ %indvar.next, %do.cond ], [ 0, %do.body1 ] ; <i64> [#uses=3]
%arrayidx13 = getelementptr [36 x [49 x double]]* @A, i64 0, i64 %indvar3, i64 %indvar ; <double*> [#uses=1]
%arrayidx22 = getelementptr [36 x [49 x double]]* @B, i64 0, i64 %indvar, i64 %indvar1 ; <double*> [#uses=1]
%tmp6 = load double* %arrayidx5 ; <double> [#uses=1]
%mul = fmul double 1.000000e+00, %tmp6 ; <double> [#uses=1]
%tmp14 = load double* %arrayidx13 ; <double> [#uses=1]
%mul15 = fmul double 1.000000e+00, %tmp14 ; <double> [#uses=1]
%tmp23 = load double* %arrayidx22 ; <double> [#uses=1]
%mul24 = fmul double %mul15, %tmp23 ; <double> [#uses=1]
%add = fadd double %mul, %mul24 ; <double> [#uses=1]
store double %add, double* %arrayidx5
br label %do.cond
do.cond: ; preds = %do.body2
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2]
%exitcond = icmp ne i64 %indvar.next, 36 ; <i1> [#uses=1]
br i1 %exitcond, label %do.body2, label %do.end
do.end: ; preds = %do.cond
br label %do.cond36
do.cond36: ; preds = %do.end
%indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=2]
%exitcond5 = icmp ne i64 %indvar.next2, 36 ; <i1> [#uses=1]
br i1 %exitcond5, label %do.body1, label %do.end39
do.end39: ; preds = %do.cond36
br label %do.cond42
do.cond42: ; preds = %do.end39
%indvar.next4 = add i64 %indvar3, 1 ; <i64> [#uses=2]
%exitcond6 = icmp ne i64 %indvar.next4, 36 ; <i1> [#uses=1]
br i1 %exitcond6, label %do.body, label %do.end45
do.end45: ; preds = %do.cond42
fence seq_cst
ret void
}
define i32 @main() nounwind {
entry:
call void @init_array()
call void @do_pluto_matmult()
call void @print_array()
ret i32 0
}
; CHECK: for (c2=0;c2<=35;c2++) {
; CHECK: for (c4=0;c4<=35;c4++) {
; CHECK: for (c6=0;c6<=35;c6++) {
; CHECK: Stmt_do_body2(c2,c4,c6);
; CHECK: }
; CHECK: }
; CHECK: }
; Do not dump the complete CLooG output. New CLooG version optimize more
; in this test case.
; IMPORT: for (c2=0;c2<=35;c2+=4) {
; IMPORT: c3<=min(35,c2+3);c3++) {
; IMPORT: for (c6=0;c6<=35;c6+=4) {
; IMPORT: c7<=min(35,c6+3);c7++) {
; IMPORT: for (c10=0;c10<=35;c10+=4) {
; IMPORT: c11<=min(35,c10+3);c11++)
; IMPORT: {
; IMPORT: Stmt_do_body2(c3,c7,c11);
; IMPORT: }
; IMPORT: }
; IMPORT: }
; IMPORT: }
; IMPORT: }
; IMPORT: }
; CODEGEN: polly.stmt.do.body2
; REVERSE: for (c2=-35;c2<=0;c2++) {
; REVERSE: for (c4=-35;c4<=0;c4++) {
; REVERSE: for (c6=0;c6<=35;c6++) {
; REVERSE: Stmt_do_body2(-c2,-c4,c6);
; REVERSE: }
; REVERSE: }
; REVERSE: }
; INVALID: file contains a scattering that changes the dependences.

View File

@ -1,36 +0,0 @@
0.000000 14910.000000 29820.000000 44730.000000 59640.000000 74550.000000 89460.000000 104370.000000 119280.000000 134190.000000 149100.000000 164010.000000 178920.000000 193830.000000 208740.000000 223650.000000 238560.000000 253470.000000 268380.000000 283290.000000 298200.000000 313110.000000 328020.000000 342930.000000 357840.000000 372750.000000 387660.000000 402570.000000 417480.000000 432390.000000 447300.000000 462210.000000 477120.000000 492030.000000 506940.000000 521850.000000
0.000000 15540.000000 31080.000000 46620.000000 62160.000000 77700.000000 93240.000000 108780.000000 124320.000000 139860.000000 155400.000000 170940.000000 186480.000000 202020.000000 217560.000000 233100.000000 248640.000000 264180.000000 279720.000000 295260.000000 310800.000000 326340.000000 341880.000000 357420.000000 372960.000000 388500.000000 404040.000000 419580.000000 435120.000000 450660.000000 466200.000000 481740.000000 497280.000000 512820.000000 528360.000000 543900.000000
0.000000 16170.000000 32340.000000 48510.000000 64680.000000 80850.000000 97020.000000 113190.000000 129360.000000 145530.000000 161700.000000 177870.000000 194040.000000 210210.000000 226380.000000 242550.000000 258720.000000 274890.000000 291060.000000 307230.000000 323400.000000 339570.000000 355740.000000 371910.000000 388080.000000 404250.000000 420420.000000 436590.000000 452760.000000 468930.000000 485100.000000 501270.000000 517440.000000 533610.000000 549780.000000 565950.000000
0.000000 16800.000000 33600.000000 50400.000000 67200.000000 84000.000000 100800.000000 117600.000000 134400.000000 151200.000000 168000.000000 184800.000000 201600.000000 218400.000000 235200.000000 252000.000000 268800.000000 285600.000000 302400.000000 319200.000000 336000.000000 352800.000000 369600.000000 386400.000000 403200.000000 420000.000000 436800.000000 453600.000000 470400.000000 487200.000000 504000.000000 520800.000000 537600.000000 554400.000000 571200.000000 588000.000000
0.000000 17430.000000 34860.000000 52290.000000 69720.000000 87150.000000 104580.000000 122010.000000 139440.000000 156870.000000 174300.000000 191730.000000 209160.000000 226590.000000 244020.000000 261450.000000 278880.000000 296310.000000 313740.000000 331170.000000 348600.000000 366030.000000 383460.000000 400890.000000 418320.000000 435750.000000 453180.000000 470610.000000 488040.000000 505470.000000 522900.000000 540330.000000 557760.000000 575190.000000 592620.000000 610050.000000
0.000000 18060.000000 36120.000000 54180.000000 72240.000000 90300.000000 108360.000000 126420.000000 144480.000000 162540.000000 180600.000000 198660.000000 216720.000000 234780.000000 252840.000000 270900.000000 288960.000000 307020.000000 325080.000000 343140.000000 361200.000000 379260.000000 397320.000000 415380.000000 433440.000000 451500.000000 469560.000000 487620.000000 505680.000000 523740.000000 541800.000000 559860.000000 577920.000000 595980.000000 614040.000000 632100.000000
0.000000 18690.000000 37380.000000 56070.000000 74760.000000 93450.000000 112140.000000 130830.000000 149520.000000 168210.000000 186900.000000 205590.000000 224280.000000 242970.000000 261660.000000 280350.000000 299040.000000 317730.000000 336420.000000 355110.000000 373800.000000 392490.000000 411180.000000 429870.000000 448560.000000 467250.000000 485940.000000 504630.000000 523320.000000 542010.000000 560700.000000 579390.000000 598080.000000 616770.000000 635460.000000 654150.000000
0.000000 19320.000000 38640.000000 57960.000000 77280.000000 96600.000000 115920.000000 135240.000000 154560.000000 173880.000000 193200.000000 212520.000000 231840.000000 251160.000000 270480.000000 289800.000000 309120.000000 328440.000000 347760.000000 367080.000000 386400.000000 405720.000000 425040.000000 444360.000000 463680.000000 483000.000000 502320.000000 521640.000000 540960.000000 560280.000000 579600.000000 598920.000000 618240.000000 637560.000000 656880.000000 676200.000000
0.000000 19950.000000 39900.000000 59850.000000 79800.000000 99750.000000 119700.000000 139650.000000 159600.000000 179550.000000 199500.000000 219450.000000 239400.000000 259350.000000 279300.000000 299250.000000 319200.000000 339150.000000 359100.000000 379050.000000 399000.000000 418950.000000 438900.000000 458850.000000 478800.000000 498750.000000 518700.000000 538650.000000 558600.000000 578550.000000 598500.000000 618450.000000 638400.000000 658350.000000 678300.000000 698250.000000
0.000000 20580.000000 41160.000000 61740.000000 82320.000000 102900.000000 123480.000000 144060.000000 164640.000000 185220.000000 205800.000000 226380.000000 246960.000000 267540.000000 288120.000000 308700.000000 329280.000000 349860.000000 370440.000000 391020.000000 411600.000000 432180.000000 452760.000000 473340.000000 493920.000000 514500.000000 535080.000000 555660.000000 576240.000000 596820.000000 617400.000000 637980.000000 658560.000000 679140.000000 699720.000000 720300.000000
0.000000 21210.000000 42420.000000 63630.000000 84840.000000 106050.000000 127260.000000 148470.000000 169680.000000 190890.000000 212100.000000 233310.000000 254520.000000 275730.000000 296940.000000 318150.000000 339360.000000 360570.000000 381780.000000 402990.000000 424200.000000 445410.000000 466620.000000 487830.000000 509040.000000 530250.000000 551460.000000 572670.000000 593880.000000 615090.000000 636300.000000 657510.000000 678720.000000 699930.000000 721140.000000 742350.000000
0.000000 21840.000000 43680.000000 65520.000000 87360.000000 109200.000000 131040.000000 152880.000000 174720.000000 196560.000000 218400.000000 240240.000000 262080.000000 283920.000000 305760.000000 327600.000000 349440.000000 371280.000000 393120.000000 414960.000000 436800.000000 458640.000000 480480.000000 502320.000000 524160.000000 546000.000000 567840.000000 589680.000000 611520.000000 633360.000000 655200.000000 677040.000000 698880.000000 720720.000000 742560.000000 764400.000000
0.000000 22470.000000 44940.000000 67410.000000 89880.000000 112350.000000 134820.000000 157290.000000 179760.000000 202230.000000 224700.000000 247170.000000 269640.000000 292110.000000 314580.000000 337050.000000 359520.000000 381990.000000 404460.000000 426930.000000 449400.000000 471870.000000 494340.000000 516810.000000 539280.000000 561750.000000 584220.000000 606690.000000 629160.000000 651630.000000 674100.000000 696570.000000 719040.000000 741510.000000 763980.000000 786450.000000
0.000000 23100.000000 46200.000000 69300.000000 92400.000000 115500.000000 138600.000000 161700.000000 184800.000000 207900.000000 231000.000000 254100.000000 277200.000000 300300.000000 323400.000000 346500.000000 369600.000000 392700.000000 415800.000000 438900.000000 462000.000000 485100.000000 508200.000000 531300.000000 554400.000000 577500.000000 600600.000000 623700.000000 646800.000000 669900.000000 693000.000000 716100.000000 739200.000000 762300.000000 785400.000000 808500.000000
0.000000 23730.000000 47460.000000 71190.000000 94920.000000 118650.000000 142380.000000 166110.000000 189840.000000 213570.000000 237300.000000 261030.000000 284760.000000 308490.000000 332220.000000 355950.000000 379680.000000 403410.000000 427140.000000 450870.000000 474600.000000 498330.000000 522060.000000 545790.000000 569520.000000 593250.000000 616980.000000 640710.000000 664440.000000 688170.000000 711900.000000 735630.000000 759360.000000 783090.000000 806820.000000 830550.000000
0.000000 24360.000000 48720.000000 73080.000000 97440.000000 121800.000000 146160.000000 170520.000000 194880.000000 219240.000000 243600.000000 267960.000000 292320.000000 316680.000000 341040.000000 365400.000000 389760.000000 414120.000000 438480.000000 462840.000000 487200.000000 511560.000000 535920.000000 560280.000000 584640.000000 609000.000000 633360.000000 657720.000000 682080.000000 706440.000000 730800.000000 755160.000000 779520.000000 803880.000000 828240.000000 852600.000000
0.000000 24990.000000 49980.000000 74970.000000 99960.000000 124950.000000 149940.000000 174930.000000 199920.000000 224910.000000 249900.000000 274890.000000 299880.000000 324870.000000 349860.000000 374850.000000 399840.000000 424830.000000 449820.000000 474810.000000 499800.000000 524790.000000 549780.000000 574770.000000 599760.000000 624750.000000 649740.000000 674730.000000 699720.000000 724710.000000 749700.000000 774690.000000 799680.000000 824670.000000 849660.000000 874650.000000
0.000000 25620.000000 51240.000000 76860.000000 102480.000000 128100.000000 153720.000000 179340.000000 204960.000000 230580.000000 256200.000000 281820.000000 307440.000000 333060.000000 358680.000000 384300.000000 409920.000000 435540.000000 461160.000000 486780.000000 512400.000000 538020.000000 563640.000000 589260.000000 614880.000000 640500.000000 666120.000000 691740.000000 717360.000000 742980.000000 768600.000000 794220.000000 819840.000000 845460.000000 871080.000000 896700.000000
0.000000 26250.000000 52500.000000 78750.000000 105000.000000 131250.000000 157500.000000 183750.000000 210000.000000 236250.000000 262500.000000 288750.000000 315000.000000 341250.000000 367500.000000 393750.000000 420000.000000 446250.000000 472500.000000 498750.000000 525000.000000 551250.000000 577500.000000 603750.000000 630000.000000 656250.000000 682500.000000 708750.000000 735000.000000 761250.000000 787500.000000 813750.000000 840000.000000 866250.000000 892500.000000 918750.000000
0.000000 26880.000000 53760.000000 80640.000000 107520.000000 134400.000000 161280.000000 188160.000000 215040.000000 241920.000000 268800.000000 295680.000000 322560.000000 349440.000000 376320.000000 403200.000000 430080.000000 456960.000000 483840.000000 510720.000000 537600.000000 564480.000000 591360.000000 618240.000000 645120.000000 672000.000000 698880.000000 725760.000000 752640.000000 779520.000000 806400.000000 833280.000000 860160.000000 887040.000000 913920.000000 940800.000000
0.000000 27510.000000 55020.000000 82530.000000 110040.000000 137550.000000 165060.000000 192570.000000 220080.000000 247590.000000 275100.000000 302610.000000 330120.000000 357630.000000 385140.000000 412650.000000 440160.000000 467670.000000 495180.000000 522690.000000 550200.000000 577710.000000 605220.000000 632730.000000 660240.000000 687750.000000 715260.000000 742770.000000 770280.000000 797790.000000 825300.000000 852810.000000 880320.000000 907830.000000 935340.000000 962850.000000
0.000000 28140.000000 56280.000000 84420.000000 112560.000000 140700.000000 168840.000000 196980.000000 225120.000000 253260.000000 281400.000000 309540.000000 337680.000000 365820.000000 393960.000000 422100.000000 450240.000000 478380.000000 506520.000000 534660.000000 562800.000000 590940.000000 619080.000000 647220.000000 675360.000000 703500.000000 731640.000000 759780.000000 787920.000000 816060.000000 844200.000000 872340.000000 900480.000000 928620.000000 956760.000000 984900.000000
0.000000 28770.000000 57540.000000 86310.000000 115080.000000 143850.000000 172620.000000 201390.000000 230160.000000 258930.000000 287700.000000 316470.000000 345240.000000 374010.000000 402780.000000 431550.000000 460320.000000 489090.000000 517860.000000 546630.000000 575400.000000 604170.000000 632940.000000 661710.000000 690480.000000 719250.000000 748020.000000 776790.000000 805560.000000 834330.000000 863100.000000 891870.000000 920640.000000 949410.000000 978180.000000 1006950.000000
0.000000 29400.000000 58800.000000 88200.000000 117600.000000 147000.000000 176400.000000 205800.000000 235200.000000 264600.000000 294000.000000 323400.000000 352800.000000 382200.000000 411600.000000 441000.000000 470400.000000 499800.000000 529200.000000 558600.000000 588000.000000 617400.000000 646800.000000 676200.000000 705600.000000 735000.000000 764400.000000 793800.000000 823200.000000 852600.000000 882000.000000 911400.000000 940800.000000 970200.000000 999600.000000 1029000.000000
0.000000 30030.000000 60060.000000 90090.000000 120120.000000 150150.000000 180180.000000 210210.000000 240240.000000 270270.000000 300300.000000 330330.000000 360360.000000 390390.000000 420420.000000 450450.000000 480480.000000 510510.000000 540540.000000 570570.000000 600600.000000 630630.000000 660660.000000 690690.000000 720720.000000 750750.000000 780780.000000 810810.000000 840840.000000 870870.000000 900900.000000 930930.000000 960960.000000 990990.000000 1021020.000000 1051050.000000
0.000000 30660.000000 61320.000000 91980.000000 122640.000000 153300.000000 183960.000000 214620.000000 245280.000000 275940.000000 306600.000000 337260.000000 367920.000000 398580.000000 429240.000000 459900.000000 490560.000000 521220.000000 551880.000000 582540.000000 613200.000000 643860.000000 674520.000000 705180.000000 735840.000000 766500.000000 797160.000000 827820.000000 858480.000000 889140.000000 919800.000000 950460.000000 981120.000000 1011780.000000 1042440.000000 1073100.000000
0.000000 31290.000000 62580.000000 93870.000000 125160.000000 156450.000000 187740.000000 219030.000000 250320.000000 281610.000000 312900.000000 344190.000000 375480.000000 406770.000000 438060.000000 469350.000000 500640.000000 531930.000000 563220.000000 594510.000000 625800.000000 657090.000000 688380.000000 719670.000000 750960.000000 782250.000000 813540.000000 844830.000000 876120.000000 907410.000000 938700.000000 969990.000000 1001280.000000 1032570.000000 1063860.000000 1095150.000000
0.000000 31920.000000 63840.000000 95760.000000 127680.000000 159600.000000 191520.000000 223440.000000 255360.000000 287280.000000 319200.000000 351120.000000 383040.000000 414960.000000 446880.000000 478800.000000 510720.000000 542640.000000 574560.000000 606480.000000 638400.000000 670320.000000 702240.000000 734160.000000 766080.000000 798000.000000 829920.000000 861840.000000 893760.000000 925680.000000 957600.000000 989520.000000 1021440.000000 1053360.000000 1085280.000000 1117200.000000






0.000000 36330.000000 72660.000000 108990.000000 145320.000000 181650.000000 217980.000000 254310.000000 290640.000000 326970.000000 363300.000000 399630.000000 435960.000000 472290.000000 508620.000000 544950.000000 581280.000000 617610.000000 653940.000000 690270.000000 726600.000000 762930.000000 799260.000000 835590.000000 871920.000000 908250.000000 944580.000000 980910.000000 1017240.000000 1053570.000000 1089900.000000 1126230.000000 1162560.000000 1198890.000000 1235220.000000 1271550.000000


View File

@ -1,25 +0,0 @@
{
"name": "do.body => do.end45",
"context": "{ [] }",
"statements": [{
"name": "Stmt_do_body2",
"domain": "{ Stmt_do_body2[i0, i1, i2] : i0 >= 0 and i0 <= 35 and i1 >= 0 and i1 <= 35 and i2 >= 0 and i2 <= 35 }",
"schedule": "{ Stmt_do_body2[i0, i1, i2] -> scattering[0, o1, i0, o3, 0, o5, i1, o7, 0, o9, i2, o11, 0] : 4o7 = o5 and 4o11 = o9 and 4o3 = o1 and o1 <= i0 and o1 >= -3 + i0 and o5 <= i1 and o5 >= -3 + i1 and o9 <= i2 and o9 >= -3 + i2 }",
"accesses": [{
"kind": "read",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }"
},
{
"kind": "read",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_A[49i0 + i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_B[i1 + 49i2] }"
},
{
"kind": "write",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }"
}]
}]
}

View File

@ -1,25 +0,0 @@
{
"name": "do.body => do.end45",
"context": "{ [] }",
"statements": [{
"name": "Stmt_do_body2",
"domain": "{ Stmt_do_body2[i0, i1, i2] : i0 >= 0 and i0 <= 35 and i1 >= 0 and i1 <= 35 and i2 >= 0 and i2 <= 35 }",
"schedule": "{ Stmt_do_body2[i0, i1, i2] -> scattering[0, i0, 0, i1, 0, -i2, 0] }",
"accesses": [{
"kind": "read",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }"
},
{
"kind": "read",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_A[49i0 + i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_B[i1 + 49i2] }"
},
{
"kind": "write",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }"
}]
}]
}

View File

@ -1,25 +0,0 @@
{
"name": "do.body => do.end45",
"context": "{ [] }",
"statements": [{
"name": "Stmt_do_body2",
"domain": "{ Stmt_do_body2[i0, i1, i2] : i0 >= 0 and i0 <= 35 and i1 >= 0 and i1 <= 35 and i2 >= 0 and i2 <= 35 }",
"schedule": "{ Stmt_do_body2[i0, i1, i2] -> scattering[0, -i0, 0, -i1, 0, i2, 0] }",
"accesses": [{
"kind": "read",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }"
},
{
"kind": "read",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_A[49i0 + i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_B[i1 + 49i2] }"
},
{
"kind": "write",
"relation": "{ Stmt_do_body2[i0, i1, i2] -> MemRef_C[49i0 + i1] }"
}]
}]
}

View File

@ -1,177 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-cloog -analyze < %s | FileCheck %s
;#include <string.h>
;#define N 1024
;int A[N];
;int B[N];
;
;void loop_with_condition() {
; int i;
;
; __sync_synchronize();
; for (i = 0; i < N; i++) {
; if (i <= N / 2)
; A[i] = 1;
; else
; A[i] = 2;
; B[i] = 3;
; }
; __sync_synchronize();
;}
;
;int main () {
; int i;
;
; memset(A, 0, sizeof(int) * N);
; memset(B, 0, sizeof(int) * N);
;
; loop_with_condition();
;
; for (i = 0; i < N; i++)
; if (B[i] != 3)
; return 1;
;
; for (i = 0; i < N; i++)
; if (i <= N / 2 && A[i] != 1)
; return 1;
; else if (i > N / 2 && A[i] != 2)
; return 1;
; return 0;
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
@A = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4]
@B = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4]
define void @loop_with_condition() nounwind {
; <label>:0
fence seq_cst
br label %1
; <label>:1 ; preds = %7, %0
%indvar = phi i64 [ %indvar.next, %7 ], [ 0, %0 ] ; <i64> [#uses=5]
%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=2]
%scevgep1 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar ; <i32*> [#uses=1]
%i.0 = trunc i64 %indvar to i32 ; <i32> [#uses=1]
%exitcond = icmp ne i64 %indvar, 1024 ; <i1> [#uses=1]
br i1 %exitcond, label %2, label %8
; <label>:2 ; preds = %1
%3 = icmp sle i32 %i.0, 512 ; <i1> [#uses=1]
br i1 %3, label %4, label %5
; <label>:4 ; preds = %2
store i32 1, i32* %scevgep
br label %6
; <label>:5 ; preds = %2
store i32 2, i32* %scevgep
br label %6
; <label>:6 ; preds = %5, %4
store i32 3, i32* %scevgep1
br label %7
; <label>:7 ; preds = %6
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %1
; <label>:8 ; preds = %1
fence seq_cst
ret void
}
define i32 @main() nounwind {
; <label>:0
call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @A to i8*), i8 0, i64 4096, i32 1, i1 false)
call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @B to i8*), i8 0, i64 4096, i32 1, i1 false)
call void @loop_with_condition()
br label %1
; <label>:1 ; preds = %8, %0
%indvar1 = phi i64 [ %indvar.next2, %8 ], [ 0, %0 ] ; <i64> [#uses=3]
%scevgep3 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar1 ; <i32*> [#uses=1]
%i.0 = trunc i64 %indvar1 to i32 ; <i32> [#uses=1]
%2 = icmp slt i32 %i.0, 1024 ; <i1> [#uses=1]
br i1 %2, label %3, label %9
; <label>:3 ; preds = %1
%4 = load i32* %scevgep3 ; <i32> [#uses=1]
%5 = icmp ne i32 %4, 3 ; <i1> [#uses=1]
br i1 %5, label %6, label %7
; <label>:6 ; preds = %3
br label %28
; <label>:7 ; preds = %3
br label %8
; <label>:8 ; preds = %7
%indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1]
br label %1
; <label>:9 ; preds = %1
br label %10
; <label>:10 ; preds = %26, %9
%indvar = phi i64 [ %indvar.next, %26 ], [ 0, %9 ] ; <i64> [#uses=3]
%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=2]
%i.1 = trunc i64 %indvar to i32 ; <i32> [#uses=3]
%11 = icmp slt i32 %i.1, 1024 ; <i1> [#uses=1]
br i1 %11, label %12, label %27
; <label>:12 ; preds = %10
%13 = icmp sle i32 %i.1, 512 ; <i1> [#uses=1]
br i1 %13, label %14, label %18
; <label>:14 ; preds = %12
%15 = load i32* %scevgep ; <i32> [#uses=1]
%16 = icmp ne i32 %15, 1 ; <i1> [#uses=1]
br i1 %16, label %17, label %18
; <label>:17 ; preds = %14
br label %28
; <label>:18 ; preds = %14, %12
%19 = icmp sgt i32 %i.1, 512 ; <i1> [#uses=1]
br i1 %19, label %20, label %24
; <label>:20 ; preds = %18
%21 = load i32* %scevgep ; <i32> [#uses=1]
%22 = icmp ne i32 %21, 2 ; <i1> [#uses=1]
br i1 %22, label %23, label %24
; <label>:23 ; preds = %20
br label %28
; <label>:24 ; preds = %20, %18
br label %25
; <label>:25 ; preds = %24
br label %26
; <label>:26 ; preds = %25
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %10
; <label>:27 ; preds = %10
br label %28
; <label>:28 ; preds = %27, %23, %17, %6
%.0 = phi i32 [ 1, %6 ], [ 1, %17 ], [ 1, %23 ], [ 0, %27 ] ; <i32> [#uses=1]
ret i32 %.0
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
; CHECK: for (c2=0;c2<=512;c2++) {
; CHECK: Stmt_4(c2);
; CHECK: Stmt_6(c2);
; CHECK: }
; CHECK: for (c2=513;c2<=1023;c2++) {
; CHECK: Stmt_5(c2);
; CHECK: Stmt_6(c2);
; CHECK: }

View File

@ -1,139 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-cloog -analyze < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x i32] zeroinitializer, align 16
@B = common global [1024 x i32] zeroinitializer, align 16
define void @loop_with_condition(i32 %m) nounwind {
entry:
fence seq_cst
%tmp = sub i32 0, %m
%tmp1 = zext i32 %tmp to i64
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
%arrayidx = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar
%arrayidx10 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar
%tmp2 = add i64 %tmp1, %indvar
%sub = trunc i64 %tmp2 to i32
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%cmp3 = icmp sle i32 %sub, 1024
br i1 %cmp3, label %if.then, label %if.else
if.then: ; preds = %for.body
store i32 1, i32* %arrayidx
br label %if.end
if.else: ; preds = %for.body
store i32 2, i32* %arrayidx
br label %if.end
if.end: ; preds = %if.else, %if.then
store i32 3, i32* %arrayidx10
br label %for.inc
for.inc: ; preds = %if.end
%indvar.next = add i64 %indvar, 1
br label %for.cond
for.end: ; preds = %for.cond
fence seq_cst
ret void
}
define i32 @main() nounwind {
entry:
call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @A to i8*), i8 0, i64 4096, i32 1, i1 false)
call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @B to i8*), i8 0, i64 4096, i32 1, i1 false)
call void @loop_with_condition(i32 5)
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvar1 = phi i64 [ %indvar.next2, %for.inc ], [ 0, %entry ]
%arrayidx = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar1
%i.0 = trunc i64 %indvar1 to i32
%cmp = icmp slt i32 %i.0, 1024
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%tmp3 = load i32* %arrayidx
%cmp4 = icmp ne i32 %tmp3, 3
br i1 %cmp4, label %if.then, label %if.end
if.then: ; preds = %for.body
br label %return
if.end: ; preds = %for.body
br label %for.inc
for.inc: ; preds = %if.end
%indvar.next2 = add i64 %indvar1, 1
br label %for.cond
for.end: ; preds = %for.cond
br label %for.cond6
for.cond6: ; preds = %for.inc32, %for.end
%indvar = phi i64 [ %indvar.next, %for.inc32 ], [ 0, %for.end ]
%arrayidx15 = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar
%i.1 = trunc i64 %indvar to i32
%cmp8 = icmp slt i32 %i.1, 1024
br i1 %cmp8, label %for.body9, label %for.end35
for.body9: ; preds = %for.cond6
br i1 true, label %land.lhs.true, label %if.else
land.lhs.true: ; preds = %for.body9
%tmp16 = load i32* %arrayidx15
%cmp17 = icmp ne i32 %tmp16, 1
br i1 %cmp17, label %if.then18, label %if.else
if.then18: ; preds = %land.lhs.true
br label %return
if.else: ; preds = %land.lhs.true, %for.body9
br i1 false, label %land.lhs.true23, label %if.end30
land.lhs.true23: ; preds = %if.else
%tmp27 = load i32* %arrayidx15
%cmp28 = icmp ne i32 %tmp27, 2
br i1 %cmp28, label %if.then29, label %if.end30
if.then29: ; preds = %land.lhs.true23
br label %return
if.end30: ; preds = %land.lhs.true23, %if.else
br label %if.end31
if.end31: ; preds = %if.end30
br label %for.inc32
for.inc32: ; preds = %if.end31
%indvar.next = add i64 %indvar, 1
br label %for.cond6
for.end35: ; preds = %for.cond6
br label %return
return: ; preds = %for.end35, %if.then29, %if.then18, %if.then
%retval.0 = phi i32 [ 1, %if.then ], [ 1, %if.then18 ], [ 1, %if.then29 ], [ 0, %for.end35 ]
ret i32 %retval.0
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
; CHECK: for (c2=0;c2<=min(1023,m+1024);c2++) {
; CHECK: Stmt_if_then(c2);
; CHECK: Stmt_if_end(c2);
; CHECK: }
; CHECK: for (c2=max(0,m+1025);c2<=1023;c2++) {
; CHECK: Stmt_if_else(c2);
; CHECK: Stmt_if_end(c2);
; CHECK: }

View File

@ -1,179 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-cloog -analyze < %s | FileCheck %s
;#include <string.h>
;#define N 1024
;int A[N];
;int B[N];
;
;void loop_with_condition_ineq() {
; int i;
;
; __sync_synchronize();
; for (i = 0; i < N; i++) {
; if (i != N / 2)
; A[i] = 1;
; else
; A[i] = 2;
; B[i] = 3;
; }
; __sync_synchronize();
;}
;
;int main () {
; int i;
;
; memset(A, 0, sizeof(int) * N);
; memset(B, 0, sizeof(int) * N);
;
; loop_with_condition_ineq();
;
; for (i = 0; i < N; i++)
; if (B[i] != 3)
; return 1;
;
; for (i = 0; i < N; i++)
; if (i != N / 2 && A[i] != 1)
; return 1;
; else if (i == N && A[i] != 2)
; return 1;
; return 0;
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
@A = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4]
@B = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4]
define void @loop_with_condition_ineq() nounwind {
; <label>:0
fence seq_cst
br label %1
; <label>:1 ; preds = %7, %0
%indvar = phi i64 [ %indvar.next, %7 ], [ 0, %0 ] ; <i64> [#uses=5]
%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=2]
%scevgep1 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar ; <i32*> [#uses=1]
%i.0 = trunc i64 %indvar to i32 ; <i32> [#uses=1]
%exitcond = icmp ne i64 %indvar, 1024 ; <i1> [#uses=1]
br i1 %exitcond, label %2, label %8
; <label>:2 ; preds = %1
%3 = icmp ne i32 %i.0, 512 ; <i1> [#uses=1]
br i1 %3, label %4, label %5
; <label>:4 ; preds = %2
store i32 1, i32* %scevgep
br label %6
; <label>:5 ; preds = %2
store i32 2, i32* %scevgep
br label %6
; <label>:6 ; preds = %5, %4
store i32 3, i32* %scevgep1
br label %7
; <label>:7 ; preds = %6
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %1
; <label>:8 ; preds = %1
fence seq_cst
ret void
}
define i32 @main() nounwind {
; <label>:0
call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @A to i8*), i8 0, i64 4096, i32 1, i1 false)
call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @B to i8*), i8 0, i64 4096, i32 1, i1 false)
call void @loop_with_condition_ineq()
br label %1
; <label>:1 ; preds = %8, %0
%indvar1 = phi i64 [ %indvar.next2, %8 ], [ 0, %0 ] ; <i64> [#uses=3]
%scevgep3 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar1 ; <i32*> [#uses=1]
%i.0 = trunc i64 %indvar1 to i32 ; <i32> [#uses=1]
%2 = icmp slt i32 %i.0, 1024 ; <i1> [#uses=1]
br i1 %2, label %3, label %9
; <label>:3 ; preds = %1
%4 = load i32* %scevgep3 ; <i32> [#uses=1]
%5 = icmp ne i32 %4, 3 ; <i1> [#uses=1]
br i1 %5, label %6, label %7
; <label>:6 ; preds = %3
br label %28
; <label>:7 ; preds = %3
br label %8
; <label>:8 ; preds = %7
%indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1]
br label %1
; <label>:9 ; preds = %1
br label %10
; <label>:10 ; preds = %26, %9
%indvar = phi i64 [ %indvar.next, %26 ], [ 0, %9 ] ; <i64> [#uses=3]
%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=2]
%i.1 = trunc i64 %indvar to i32 ; <i32> [#uses=3]
%11 = icmp slt i32 %i.1, 1024 ; <i1> [#uses=1]
br i1 %11, label %12, label %27
; <label>:12 ; preds = %10
%13 = icmp ne i32 %i.1, 512 ; <i1> [#uses=1]
br i1 %13, label %14, label %18
; <label>:14 ; preds = %12
%15 = load i32* %scevgep ; <i32> [#uses=1]
%16 = icmp ne i32 %15, 1 ; <i1> [#uses=1]
br i1 %16, label %17, label %18
; <label>:17 ; preds = %14
br label %28
; <label>:18 ; preds = %14, %12
%19 = icmp eq i32 %i.1, 1024 ; <i1> [#uses=1]
br i1 %19, label %20, label %24
; <label>:20 ; preds = %18
%21 = load i32* %scevgep ; <i32> [#uses=1]
%22 = icmp ne i32 %21, 2 ; <i1> [#uses=1]
br i1 %22, label %23, label %24
; <label>:23 ; preds = %20
br label %28
; <label>:24 ; preds = %20, %18
br label %25
; <label>:25 ; preds = %24
br label %26
; <label>:26 ; preds = %25
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %10
; <label>:27 ; preds = %10
br label %28
; <label>:28 ; preds = %27, %23, %17, %6
%.0 = phi i32 [ 1, %6 ], [ 1, %17 ], [ 1, %23 ], [ 0, %27 ] ; <i32> [#uses=1]
ret i32 %.0
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
; CHECK: for (c2=0;c2<=511;c2++) {
; CHECK: Stmt_4(c2);
; CHECK: Stmt_6(c2);
; CHECK: }
; CHECK: Stmt_5(512);
; CHECK: Stmt_6(512);
; CHECK: for (c2=513;c2<=1023;c2++) {
; CHECK: Stmt_4(c2);
; CHECK: Stmt_6(c2);
; CHECK: }

View File

@ -1,220 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-cloog -analyze < %s | FileCheck %s
;#include <string.h>
;#define N 1024
;int A[N];
;int B[N];
;
;void loop_with_condition() {
; int i;
;
; __sync_synchronize();
; for (i = 0; i < N; i++) {
; if (i <= N / 2) {
; if (i > 20)
; A[i] = 1;
; else
; A[i] = 2;
; }
; B[i] = 3;
; }
; __sync_synchronize();
;}
;
;int main () {
; int i;
;
; memset(A, 0, sizeof(int) * N);
; memset(B, 0, sizeof(int) * N);
;
; loop_with_condition();
;
; for (i = 0; i < N; i++)
; if (B[i] != 3)
; return 1;
;
; for (i = 0; i < N; i++)
; if (i <= N / 2 && i > 20 && A[i] != 1)
; return 1;
; else if (i > N / 2) {
; if (i <= 20 && A[i] != 2)
; return 1;
; if (i > 20 && A[i] != 0)
; return 1;
; }
; return 0;
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
@A = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4]
@B = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=4]
define void @loop_with_condition() nounwind {
; <label>:0
fence seq_cst
br label %1
; <label>:1 ; preds = %10, %0
%indvar = phi i64 [ %indvar.next, %10 ], [ 0, %0 ] ; <i64> [#uses=5]
%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=2]
%scevgep1 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar ; <i32*> [#uses=1]
%i.0 = trunc i64 %indvar to i32 ; <i32> [#uses=2]
%exitcond = icmp ne i64 %indvar, 1024 ; <i1> [#uses=1]
br i1 %exitcond, label %2, label %11
; <label>:2 ; preds = %1
%3 = icmp sle i32 %i.0, 512 ; <i1> [#uses=1]
br i1 %3, label %4, label %9
; <label>:4 ; preds = %2
%5 = icmp sgt i32 %i.0, 20 ; <i1> [#uses=1]
br i1 %5, label %6, label %7
; <label>:6 ; preds = %4
store i32 1, i32* %scevgep
br label %8
; <label>:7 ; preds = %4
store i32 2, i32* %scevgep
br label %8
; <label>:8 ; preds = %7, %6
br label %9
; <label>:9 ; preds = %8, %2
store i32 3, i32* %scevgep1
br label %10
; <label>:10 ; preds = %9
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %1
; <label>:11 ; preds = %1
fence seq_cst
ret void
}
define i32 @main() nounwind {
; <label>:0
call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @A to i8*), i8 0, i64 4096, i32 1, i1 false)
call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @B to i8*), i8 0, i64 4096, i32 1, i1 false)
call void @loop_with_condition()
br label %1
; <label>:1 ; preds = %8, %0
%indvar1 = phi i64 [ %indvar.next2, %8 ], [ 0, %0 ] ; <i64> [#uses=3]
%scevgep3 = getelementptr [1024 x i32]* @B, i64 0, i64 %indvar1 ; <i32*> [#uses=1]
%i.0 = trunc i64 %indvar1 to i32 ; <i32> [#uses=1]
%2 = icmp slt i32 %i.0, 1024 ; <i1> [#uses=1]
br i1 %2, label %3, label %9
; <label>:3 ; preds = %1
%4 = load i32* %scevgep3 ; <i32> [#uses=1]
%5 = icmp ne i32 %4, 3 ; <i1> [#uses=1]
br i1 %5, label %6, label %7
; <label>:6 ; preds = %3
br label %39
; <label>:7 ; preds = %3
br label %8
; <label>:8 ; preds = %7
%indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1]
br label %1
; <label>:9 ; preds = %1
br label %10
; <label>:10 ; preds = %37, %9
%indvar = phi i64 [ %indvar.next, %37 ], [ 0, %9 ] ; <i64> [#uses=3]
%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar ; <i32*> [#uses=3]
%i.1 = trunc i64 %indvar to i32 ; <i32> [#uses=6]
%11 = icmp slt i32 %i.1, 1024 ; <i1> [#uses=1]
br i1 %11, label %12, label %38
; <label>:12 ; preds = %10
%13 = icmp sle i32 %i.1, 512 ; <i1> [#uses=1]
br i1 %13, label %14, label %20
; <label>:14 ; preds = %12
%15 = icmp sgt i32 %i.1, 20 ; <i1> [#uses=1]
br i1 %15, label %16, label %20
; <label>:16 ; preds = %14
%17 = load i32* %scevgep ; <i32> [#uses=1]
%18 = icmp ne i32 %17, 1 ; <i1> [#uses=1]
br i1 %18, label %19, label %20
; <label>:19 ; preds = %16
br label %39
; <label>:20 ; preds = %16, %14, %12
%21 = icmp sgt i32 %i.1, 512 ; <i1> [#uses=1]
br i1 %21, label %22, label %35
; <label>:22 ; preds = %20
%23 = icmp sle i32 %i.1, 20 ; <i1> [#uses=1]
br i1 %23, label %24, label %28
; <label>:24 ; preds = %22
%25 = load i32* %scevgep ; <i32> [#uses=1]
%26 = icmp ne i32 %25, 2 ; <i1> [#uses=1]
br i1 %26, label %27, label %28
; <label>:27 ; preds = %24
br label %39
; <label>:28 ; preds = %24, %22
%29 = icmp sgt i32 %i.1, 20 ; <i1> [#uses=1]
br i1 %29, label %30, label %34
; <label>:30 ; preds = %28
%31 = load i32* %scevgep ; <i32> [#uses=1]
%32 = icmp ne i32 %31, 0 ; <i1> [#uses=1]
br i1 %32, label %33, label %34
; <label>:33 ; preds = %30
br label %39
; <label>:34 ; preds = %30, %28
br label %35
; <label>:35 ; preds = %34, %20
br label %36
; <label>:36 ; preds = %35
br label %37
; <label>:37 ; preds = %36
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %10
; <label>:38 ; preds = %10
br label %39
; <label>:39 ; preds = %38, %33, %27, %19, %6
%.0 = phi i32 [ 1, %6 ], [ 1, %19 ], [ 1, %27 ], [ 1, %33 ], [ 0, %38 ] ; <i32> [#uses=1]
ret i32 %.0
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
; CHECK: for (c2=0;c2<=20;c2++) {
; CHECK: Stmt_7(c2);
; CHECK: Stmt_9(c2);
; CHECK: }
; CHECK: for (c2=21;c2<=512;c2++) {
; CHECK: Stmt_6(c2);
; CHECK: Stmt_9(c2);
; CHECK: }
; CHECK: for (c2=513;c2<=1023;c2++) {
; CHECK: Stmt_9(c2);
; CHECK: }
; LOOPS: Loop at depth 1 containing: %polly.loop_header<header>,%polly.stmt.,%polly.stmt.3<latch><exiting>
; LOOPS: Loop at depth 1 containing: %polly.loop_header5<header>,%polly.stmt.11,%polly.stmt.12<latch><exiting>
; LOOPS: Loop at depth 1 containing: %polly.loop_header15<header>,%polly.stmt.21<latch><exiting>

View File

@ -1,138 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen %vector-opt -S -dce < %s | FileCheck %s
;#define M 1024
;#define N 1024
;#define K 1024
;float A[K][M];
;float B[N][K];
;float C[M][N];
;/*
;void matmul_vec(void) {
; int i, j, k;
;
;
; /* With much unrolling
; for (i=0;i<=M;i++)
; for (j=0;j<=N;j+=4)
; for (k=0;k<=K;k+=8)
; for (kk=k;kk<=k+7;kk++)
; for (jj=j;jj<=j+3;jj++)
; C[i][jj] += A[kk][i] * B[jj][kk];
; vec_load splat scalar_load
; */
; /* Without unrolling
; for (i=0;i<=M;i++)
; for (j=0;j<=N;j+=4)
; for (k=0;k<=K;k++)
; for (jj=j;jj<=j+3;jj++)
; C[i][jj] += A[k][i] * B[jj][kk];
; vec_load splat scalar_load
; /
;
;}
;i*/
;int main()
;{
; int i, j, k;
; //matmul_vec();
; for(i=0; i<M/4; i++)
; for(k=0; k<K; k++) {
; for(j=0; j<N; j++)
; C[i+0][j] += A[k][i+0] * B[j][k];
; C[i+1][j] += A[k][i+1] * B[j][k];
; C[i+2][j] += A[k][i+2] * B[j][k];
; C[i+3][j] += A[k][i+3] * B[j][k];
; }
;
; return A[42][42];
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x [1024 x float]] zeroinitializer, align 16
@B = common global [1024 x [1024 x float]] zeroinitializer, align 16
@C = common global [1024 x [1024 x float]] zeroinitializer, align 16
define void @matmul_vec() nounwind {
; <label>:0
br label %1
; <label>:1 ; preds = %16, %0
%indvar3 = phi i64 [ %indvar.next4, %16 ], [ 0, %0 ]
%exitcond9 = icmp ne i64 %indvar3, 1024
br i1 %exitcond9, label %2, label %17
; <label>:2 ; preds = %1
br label %3
; <label>:3 ; preds = %14, %2
%indvar1 = phi i64 [ %indvar.next2, %14 ], [ 0, %2 ]
%scevgep8 = getelementptr [1024 x [1024 x float]]* @C, i64 0, i64 %indvar3, i64 %indvar1
%exitcond6 = icmp ne i64 %indvar1, 1024
br i1 %exitcond6, label %4, label %15
; <label>:4 ; preds = %3
br label %5
; <label>:5 ; preds = %12, %4
%indvar = phi i64 [ %indvar.next, %12 ], [ 0, %4 ]
%scevgep5 = getelementptr [1024 x [1024 x float]]* @A, i64 0, i64 %indvar, i64 %indvar3
%scevgep = getelementptr [1024 x [1024 x float]]* @B, i64 0, i64 %indvar1, i64 %indvar
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %6, label %13
; <label>:6 ; preds = %5
%7 = load float* %scevgep5, align 4
%8 = load float* %scevgep, align 4
%9 = fmul float %7, %8
%10 = load float* %scevgep8, align 4
%11 = fadd float %10, %9
store float %11, float* %scevgep8, align 4
br label %12
; <label>:12 ; preds = %6
%indvar.next = add i64 %indvar, 1
br label %5
; <label>:13 ; preds = %5
br label %14
; <label>:14 ; preds = %13
%indvar.next2 = add i64 %indvar1, 1
br label %3
; <label>:15 ; preds = %3
br label %16
; <label>:16 ; preds = %15
%indvar.next4 = add i64 %indvar3, 1
br label %1
; <label>:17 ; preds = %1
ret void
}
define i32 @main() nounwind {
call void @matmul_vec()
%1 = load float* getelementptr inbounds ([1024 x [1024 x float]]* @A, i64 0, i64 42, i64 42), align 8
%2 = fptosi float %1 to i32
ret i32 %2
}
; CHECK: load <1 x float>*
; CHECK: shufflevector <1 x float>
; CHECK: load float*
; CHECK: insertelement <4 x float>
; CHECK: load float*
; CHECK: insertelement <4 x float>
; CHECK: load float*
; CHECK: insertelement <4 x float>
; CHECK: load float*
; CHECK: insertelement <4 x float>
; CHECK: fmul <4 x float>
; CHECK: bitcast float*
; CHECK: load <4 x float>*
; CHECK: fadd <4 x float>
; CHECK: bitcast float*
; CHECK: store <4 x float>

View File

@ -1,25 +0,0 @@
{
"name": "%1 => %17",
"context": "{ [] }",
"statements": [{
"name": "Stmt_6",
"domain": "{ Stmt_6[i0, i1, i2] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 and i2 >= 0 and i2 <= 1023 }",
"schedule": "{ Stmt_6[i0, i2, i1] -> scattering[i0, i1, o, i2] : exists (e0 = [(o)/4]: 4e0 = o and o <= i2 and o >= -3 + i2) }",
"accesses": [{
"kind": "read",
"relation": "{ Stmt_6[i0, i1, i2] -> MemRef_A[i0 + 1024i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_6[i0, i1, i2] -> MemRef_B[1024i1 + i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_6[i0, i1, i2] -> MemRef_C[1024i0 + i1] }"
},
{
"kind": "write",
"relation": "{ Stmt_6[i0, i1, i2] -> MemRef_C[1024i0 + i1] }"
}]
}]
}

View File

@ -1,32 +0,0 @@
; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen < %s
; This test case checks that Polly does not break for PHI guard statement.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @phi_guard() {
entry:
%acc.reg2mem = alloca i32
br label %for.preheader
for.preheader: ; preds = %for.end, %entry
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.end ]
store i32 0, i32* %acc.reg2mem
br label %for.inc
for.inc: ; preds = %for.inc, %for.preheader
%0 = phi i32 [ 0, %for.preheader ], [ %1, %for.inc ]
%1 = add nsw i32 %0, 1
store i32 %1, i32* %acc.reg2mem
%exitcond = icmp ne i32 %1, 20
br i1 %exitcond, label %for.inc, label %for.end
for.end: ; preds = %for.inc
%indvar.next = add i64 %indvar, 1
%exitcond4 = icmp ne i64 %indvar.next, 20
br i1 %exitcond4, label %for.preheader, label %for.end10
for.end10: ; preds = %for.end
%res = load i32* %acc.reg2mem, align 4
ret i32 %res
}

View File

@ -1,28 +0,0 @@
{
"context" : "{ : }",
"name" : "for.preheader => for.end10",
"statements" : [
{
"accesses" : [
{
"kind" : "write",
"relation" : "{ Stmt_for_preheader[i0] -> MemRef_acc_reg2mem[0] }"
}
],
"domain" : "{ Stmt_for_preheader[i0] : i0 >= 0 and i0 <= 19 }",
"name" : "Stmt_for_preheader",
"schedule" : "{ Stmt_for_preheader[i0] -> [o0, o1, i0, 19i0, 0] : exists (e0 = [(o1)/32], e1 = [(o0)/32]: 32e0 = o1 and 32e1 = o0 and o0 <= i0 and o0 >= -31 + i0 and o1 <= 19i0 and o1 >= -31 + 19i0 and i0 >= 0 and i0 <= 19) }"
},
{
"accesses" : [
{
"kind" : "write",
"relation" : "{ Stmt_for_inc[i0, i1] -> MemRef_acc_reg2mem[0] }"
}
],
"domain" : "{ Stmt_for_inc[i0, i1] : i0 >= 0 and i0 <= 19 and i1 >= 0 and i1 <= 19 }",
"name" : "Stmt_for_inc",
"schedule" : "{ Stmt_for_inc[i0, i1] -> [o0, o1, i0, 19i0 + i1, 1] : exists (e0 = [(o1)/32], e1 = [(o0)/32]: 32e0 = o1 and 32e1 = o0 and o0 <= i0 and o0 >= -31 + i0 and o1 <= 19i0 + i1 and o1 >= -31 + 19i0 + i1 and i0 >= 0 and i0 <= 19 and i1 >= 0 and i1 <= 19) }"
}
]
}

View File

@ -1,148 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-cloog -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -basicaa -polly-codegen < %s > /dev/null
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -analyze -S < %s | FileCheck -check-prefix=IMPORT %s
;#define M 2048
;#define N 2048
;#define K 2048
;#define alpha 1
;#define beta 1
;double A[M][K+13];
;double B[K][N+13];
;double C[M][N+13];
;
;void init_array();
;void print_array();
;
;void pluto_matmult(void) {
; int i, j, k;
;
; __sync_synchronize();
; for(i=0; i<M; i++)
; for(j=0; j<N; j++)
; for(k=0; k<K; k++)
; C[i][j] = beta*C[i][j] + alpha*A[i][k] * B[k][j];
; __sync_synchronize();
;}
;
;int main()
;{
; register double s;
;
; init_array();
;
;#pragma scop
; pluto_matmult();
;#pragma endscop
; print_array();
;
; return 0;
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@C = common global [2048 x [2061 x double]] zeroinitializer, align 8 ; <[2048 x [2061 x double]]*> [#uses=2]
@A = common global [2048 x [2061 x double]] zeroinitializer, align 8 ; <[2048 x [2061 x double]]*> [#uses=2]
@B = common global [2048 x [2061 x double]] zeroinitializer, align 8 ; <[2048 x [2061 x double]]*> [#uses=2]
define void @pluto_matmult() nounwind {
entry:
fence seq_cst
br label %for.cond
for.cond: ; preds = %for.inc44, %entry
%indvar3 = phi i64 [ %indvar.next4, %for.inc44 ], [ 0, %entry ] ; <i64> [#uses=4]
%exitcond6 = icmp ne i64 %indvar3, 2048 ; <i1> [#uses=1]
br i1 %exitcond6, label %for.body, label %for.end47
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc40, %for.body
%indvar1 = phi i64 [ %indvar.next2, %for.inc40 ], [ 0, %for.body ] ; <i64> [#uses=4]
%arrayidx12 = getelementptr [2048 x [2061 x double]]* @C, i64 0, i64 %indvar3, i64 %indvar1 ; <double*> [#uses=2]
%exitcond5 = icmp ne i64 %indvar1, 2048 ; <i1> [#uses=1]
br i1 %exitcond5, label %for.body4, label %for.end43
for.body4: ; preds = %for.cond1
br label %for.cond5
for.cond5: ; preds = %for.inc, %for.body4
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body4 ] ; <i64> [#uses=4]
%arrayidx20 = getelementptr [2048 x [2061 x double]]* @A, i64 0, i64 %indvar3, i64 %indvar ; <double*> [#uses=1]
%arrayidx29 = getelementptr [2048 x [2061 x double]]* @B, i64 0, i64 %indvar, i64 %indvar1 ; <double*> [#uses=1]
%exitcond = icmp ne i64 %indvar, 2048 ; <i1> [#uses=1]
br i1 %exitcond, label %for.body8, label %for.end
for.body8: ; preds = %for.cond5
%tmp13 = load double* %arrayidx12 ; <double> [#uses=1]
%mul = fmul double 1.000000e+00, %tmp13 ; <double> [#uses=1]
%tmp21 = load double* %arrayidx20 ; <double> [#uses=1]
%mul22 = fmul double 1.000000e+00, %tmp21 ; <double> [#uses=1]
%tmp30 = load double* %arrayidx29 ; <double> [#uses=1]
%mul31 = fmul double %mul22, %tmp30 ; <double> [#uses=1]
%add = fadd double %mul, %mul31 ; <double> [#uses=1]
store double %add, double* %arrayidx12
br label %for.inc
for.inc: ; preds = %for.body8
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %for.cond5
for.end: ; preds = %for.cond5
br label %for.inc40
for.inc40: ; preds = %for.end
%indvar.next2 = add i64 %indvar1, 1 ; <i64> [#uses=1]
br label %for.cond1
for.end43: ; preds = %for.cond1
br label %for.inc44
for.inc44: ; preds = %for.end43
%indvar.next4 = add i64 %indvar3, 1 ; <i64> [#uses=1]
br label %for.cond
for.end47: ; preds = %for.cond
fence seq_cst
ret void
}
define i32 @main() nounwind {
entry:
call void (...)* @init_array()
call void @pluto_matmult()
call void (...)* @print_array()
ret i32 0
}
declare void @init_array(...)
declare void @print_array(...)
; CHECK: for (c2=0;c2<=2047;c2++) {
; CHECK: for (c4=0;c4<=2047;c4++) {
; CHECK: for (c6=0;c6<=2047;c6++) {
; CHECK: Stmt_for_body8(c2,c4,c6);
; CHECK: }
; CHECK: }
; CHECK: }
; Do not dump the complete CLooG output. New CLooG version optimize more
; in this test case.
; IMPORT: for (c2=0;c2<=2047;c2+=64) {
; IMPORT: c3<=min(2047,c2+63);c3++) {
; IMPORT: for (c6=0;c6<=2047;c6+=64) {
; IMPORT: c7<=min(2047,c6+63);c7++) {
; IMPORT: for (c10=0;c10<=2047;c10+=64) {
; IMPORT: c11<=min(2047,c10+63);c11++)
; IMPORT: {
; IMPORT: Stmt_for_body8(c3,c7,c11);
; IMPORT: }
; IMPORT: }
; IMPORT: }
; IMPORT: }
; IMPORT: }
; IMPORT: }

View File

@ -1,25 +0,0 @@
{
"name": "for.cond => for.end47",
"context": "{ [] }",
"statements": [{
"name": "Stmt_for_body8",
"domain": "{ Stmt_for_body8[i0, i1, i2] : i0 >= 0 and i0 <= 2047 and i1 >= 0 and i1 <= 2047 and i2 >= 0 and i2 <= 2047 }",
"schedule": "{ Stmt_for_body8[i0, i1, i2] -> scattering[0, o1, i0, o3, 0, o5, i1, o7, 0, o9, i2, o11, 0] : 64o7 = o5 and 64o11 = o9 and 64o3 = o1 and o1 <= i0 and o1 >= -63 + i0 and o5 <= i1 and o5 >= -63 + i1 and o9 <= i2 and o9 >= -63 + i2 }",
"accesses": [{
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[2061i0 + i1] }"
},
{
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[2061i0 + i2] }"
},
{
"kind": "read",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i1 + 2061i2] }"
},
{
"kind": "write",
"relation": "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[2061i0 + i1] }"
}]
}]
}

View File

@ -1,35 +0,0 @@
; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen -polly-vectorizer=polly < %s
; void f(int a[]) {
; int i;
; for (i = 0; i < 10; ++i)
; A[i] = A[i+5];
; }
; In this test case we import a schedule that limits the iteration domain
; to 0 <= i < 5, which makes the loop parallel. Previously we crashed in such
; cases. This test checks that we instead vectorize the loop.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
define void @reduced-domain-eliminates-dependences(i64* %a) {
entry:
br label %bb
bb:
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb ]
%add = add i64 %indvar, 5
%scevgep.load = getelementptr i64* %a, i64 %add
%scevgep.store = getelementptr i64* %a, i64 %indvar
%val = load i64* %scevgep.load
store i64 %val, i64* %scevgep.store, align 8
%indvar.next = add nsw i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, 10
br i1 %exitcond, label %return, label %bb
return:
ret void
}
; CHECK: store <4 x i64> %val_p_vec_full, <4 x i64>* %vector_ptr10

View File

@ -1,21 +0,0 @@
{
"context" : "{ : }",
"name" : "bb => return",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt_bb[i0] -> MemRef_a[5 + i0] }"
},
{
"kind" : "write",
"relation" : "{ Stmt_bb[i0] -> MemRef_a[i0] }"
}
],
"domain" : "{ Stmt_bb[i0] : i0 >= 0 and i0 <= 10 }",
"name" : "Stmt_bb",
"schedule" : "{ Stmt_bb[i0] -> scattering[0, i0, 0]: i0 < 4 }"
}
]
}

View File

@ -1,94 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-cloog -analyze < %s | FileCheck %s
;#include <string.h>
;#include <stdio.h>
;#define N 1021
;
;int main () {
; int i;
; int A[N];
; int RED[1];
;
; memset(A, 0, sizeof(int) * N);
;
; A[0] = 1;
; A[1] = 1;
; RED[0] = 0;
;
; for (i = 2; i < N; i++) {
; A[i] = A[i-1] + A[i-2];
; RED[0] += A[i-2];
; }
;
; if (RED[0] != 382399368)
; return 1;
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
define i32 @main() nounwind {
entry:
%A = alloca [1021 x i32], align 4 ; <[1021 x i32]*> [#uses=6]
%RED = alloca [1 x i32], align 4 ; <[1 x i32]*> [#uses=3]
%arraydecay = getelementptr inbounds [1021 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1]
%conv = bitcast i32* %arraydecay to i8* ; <i8*> [#uses=1]
call void @llvm.memset.p0i8.i64(i8* %conv, i8 0, i64 4084, i32 1, i1 false)
%arraydecay1 = getelementptr inbounds [1021 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1]
%arrayidx = getelementptr inbounds i32* %arraydecay1, i64 0 ; <i32*> [#uses=1]
store i32 1, i32* %arrayidx
%arraydecay2 = getelementptr inbounds [1021 x i32]* %A, i32 0, i32 0 ; <i32*> [#uses=1]
%arrayidx3 = getelementptr inbounds i32* %arraydecay2, i64 1 ; <i32*> [#uses=1]
store i32 1, i32* %arrayidx3
%arraydecay4 = getelementptr inbounds [1 x i32]* %RED, i32 0, i32 0 ; <i32*> [#uses=1]
%arrayidx5 = getelementptr inbounds i32* %arraydecay4, i64 0 ; <i32*> [#uses=1]
store i32 0, i32* %arrayidx5
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] ; <i64> [#uses=5]
%arrayidx15 = getelementptr [1021 x i32]* %A, i64 0, i64 %indvar ; <i32*> [#uses=2]
%tmp = add i64 %indvar, 2 ; <i64> [#uses=1]
%arrayidx20 = getelementptr [1021 x i32]* %A, i64 0, i64 %tmp ; <i32*> [#uses=1]
%tmp1 = add i64 %indvar, 1 ; <i64> [#uses=1]
%arrayidx9 = getelementptr [1021 x i32]* %A, i64 0, i64 %tmp1 ; <i32*> [#uses=1]
%exitcond = icmp ne i64 %indvar, 1019 ; <i1> [#uses=1]
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%tmp10 = load i32* %arrayidx9 ; <i32> [#uses=1]
%tmp16 = load i32* %arrayidx15 ; <i32> [#uses=1]
%add = add nsw i32 %tmp10, %tmp16 ; <i32> [#uses=1]
store i32 %add, i32* %arrayidx20
%tmp26 = load i32* %arrayidx15 ; <i32> [#uses=1]
%arraydecay27 = getelementptr inbounds [1 x i32]* %RED, i32 0, i32 0 ; <i32*> [#uses=1]
%arrayidx28 = getelementptr inbounds i32* %arraydecay27, i64 0 ; <i32*> [#uses=2]
%tmp29 = load i32* %arrayidx28 ; <i32> [#uses=1]
%add30 = add nsw i32 %tmp29, %tmp26 ; <i32> [#uses=1]
store i32 %add30, i32* %arrayidx28
br label %for.inc
for.inc: ; preds = %for.body
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br label %for.cond
for.end: ; preds = %for.cond
%arraydecay32 = getelementptr inbounds [1 x i32]* %RED, i32 0, i32 0 ; <i32*> [#uses=1]
%arrayidx33 = getelementptr inbounds i32* %arraydecay32, i64 0 ; <i32*> [#uses=1]
%tmp34 = load i32* %arrayidx33 ; <i32> [#uses=1]
%cmp35 = icmp ne i32 %tmp34, 382399368 ; <i1> [#uses=1]
br i1 %cmp35, label %if.then, label %if.end
if.then: ; preds = %for.end
br label %if.end
if.end: ; preds = %if.then, %for.end
%retval.0 = phi i32 [ 1, %if.then ], [ 0, %for.end ] ; <i32> [#uses=1]
ret i32 %retval.0
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
; CHECK: for (c2=0;c2<=1018;c2++) {
; CHECK: Stmt_for_body(c2);
; CHECK: }

View File

@ -1,138 +0,0 @@
; RUN: opt %loadPolly -polly-cloog -analyze < %s | FileCheck %s
;#include <string.h>
;#define N 1024
;
;int A[N];
;
;void sequential_loops() {
; int i;
; for (i = 0; i < N/2; i++) {
; A[i] = 1;
; }
; for (i = N/2 ; i < N; i++) {
; A[i] = 2;
; }
;}
;
;int main () {
; int i;
; memset(A, 0, sizeof(int) * N);
;
; sequential_loops();
;
; for (i = 0; i < N; i++) {
; if (A[i] != 1 && i < N/2)
; return 1;
; if (A[i] != 2 && i >= N/2)
; return 1;
; }
;
; return 0;
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-pc-linux-gnu"
@A = common global [1024 x i32] zeroinitializer, align 4 ; <[1024 x i32]*> [#uses=5]
define void @sequential_loops() nounwind {
bb:
br label %bb1
bb1: ; preds = %bb3, %bb
%indvar1 = phi i64 [ %indvar.next2, %bb3 ], [ 0, %bb ]
%scevgep4 = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar1
%exitcond3 = icmp ne i64 %indvar1, 512
br i1 %exitcond3, label %bb2, label %bb4
bb2: ; preds = %bb1
store i32 1, i32* %scevgep4
br label %bb3
bb3: ; preds = %bb2
%indvar.next2 = add i64 %indvar1, 1
br label %bb1
bb4: ; preds = %bb1
br label %bb5
bb5: ; preds = %bb7, %bb4
%indvar = phi i64 [ %indvar.next, %bb7 ], [ 0, %bb4 ]
%tmp = add i64 %indvar, 512
%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %tmp
%exitcond = icmp ne i64 %indvar, 512
br i1 %exitcond, label %bb6, label %bb8
bb6: ; preds = %bb5
store i32 2, i32* %scevgep
br label %bb7
bb7: ; preds = %bb6
%indvar.next = add i64 %indvar, 1
br label %bb5
bb8: ; preds = %bb5
ret void
}
define i32 @main() nounwind {
bb:
call void @llvm.memset.p0i8.i64(i8* bitcast ([1024 x i32]* @A to i8*), i8 0, i64 4096, i32 1, i1 false)
call void @sequential_loops()
br label %bb1
bb1: ; preds = %bb15, %bb
%indvar = phi i64 [ %indvar.next, %bb15 ], [ 0, %bb ]
%i.0 = trunc i64 %indvar to i32
%scevgep = getelementptr [1024 x i32]* @A, i64 0, i64 %indvar
%tmp = icmp slt i32 %i.0, 1024
br i1 %tmp, label %bb2, label %bb16
bb2: ; preds = %bb1
%tmp3 = load i32* %scevgep
%tmp4 = icmp ne i32 %tmp3, 1
br i1 %tmp4, label %bb5, label %bb8
bb5: ; preds = %bb2
%tmp6 = icmp slt i32 %i.0, 512
br i1 %tmp6, label %bb7, label %bb8
bb7: ; preds = %bb5
br label %bb17
bb8: ; preds = %bb5, %bb2
%tmp9 = load i32* %scevgep
%tmp10 = icmp ne i32 %tmp9, 2
br i1 %tmp10, label %bb11, label %bb14
bb11: ; preds = %bb8
%tmp12 = icmp sge i32 %i.0, 512
br i1 %tmp12, label %bb13, label %bb14
bb13: ; preds = %bb11
br label %bb17
bb14: ; preds = %bb11, %bb8
br label %bb15
bb15: ; preds = %bb14
%indvar.next = add i64 %indvar, 1
br label %bb1
bb16: ; preds = %bb1
br label %bb17
bb17: ; preds = %bb16, %bb13, %bb7
%.0 = phi i32 [ 1, %bb7 ], [ 1, %bb13 ], [ 0, %bb16 ]
ret i32 %.0
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
; CHECK: for (c2=0;c2<=511;c2++) {
; CHECK: Stmt_bb2(c2);
; CHECK: }
; CHECK: for (c2=0;c2<=511;c2++) {
; CHECK: Stmt_bb6(c2);
; CHECK: }

View File

@ -1,59 +0,0 @@
; RUN: opt %loadPolly -polly-cloog -polly-allow-nonaffine -analyze < %s | FileCheck %s
;#include <stdio.h>
;#include <stdlib.h>
;#include <math.h>
;
;int main()
;{
; int A[1024*1024];
; int i;
; for (i = 0; i < 1024; i++)
; A[i*i] = 2*i;
;
; printf("Random Value: %d", A[rand() % 1024*1024]);
;
; return 0;
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.7.2"
@.str = private unnamed_addr constant [17 x i8] c"Random Value: %d\00", align 1
define i32 @main() nounwind uwtable ssp {
entry:
%A = alloca [1048576 x i32], align 16
br label %entry.split
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %entry.split, %for.body
%0 = phi i32 [ 0, %entry.split ], [ %1, %for.body ]
%mul = mul i32 %0, 2
%mul1 = mul nsw i32 %0, %0
%idxprom1 = zext i32 %mul1 to i64
%arrayidx = getelementptr inbounds [1048576 x i32]* %A, i64 0, i64 %idxprom1
store i32 %mul, i32* %arrayidx, align 4
%1 = add nsw i32 %0, 1
%exitcond = icmp ne i32 %1, 1024
br i1 %exitcond, label %for.body, label %for.end
for.end: ; preds = %for.body
%call = call i32 @rand() nounwind
%rem = srem i32 %call, 1024
%mul2 = shl nsw i32 %rem, 10
%idxprom3 = sext i32 %mul2 to i64
%arrayidx4 = getelementptr inbounds [1048576 x i32]* %A, i64 0, i64 %idxprom3
%2 = load i32* %arrayidx4, align 16
%call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str, i64 0, i64 0), i32 %2) nounwind
ret i32 0
}
declare i32 @printf(i8*, ...)
declare i32 @rand()
; CHECK: for (c2=0;c2<=1023;c2++) {
; CHECK: Stmt_for_body(c2);
; CHECK: }

View File

@ -1,59 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen %vector-opt -dce -S < %s | FileCheck %s
;#define N 1024
;float A[N];
;float B[N];
;
;void simple_vec_const(void) {
; int i;
;
; for (i = 0; i < 4; i++)
; B[i] = A[i] + 1;
;}
;int main()
;{
; simple_vec_const();
; return A[42];
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float] zeroinitializer, align 16
define void @simple_vec_const() nounwind {
bb:
br label %bb2
bb2: ; preds = %bb5, %bb
%indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ]
%scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
%scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 4
br i1 %exitcond, label %bb3, label %bb6
bb3: ; preds = %bb2
%tmp = load float* %scevgep1, align 4
%tmp4 = fadd float %tmp, 1.000000e+00
store float %tmp4, float* %scevgep, align 4
br label %bb5
bb5: ; preds = %bb3
%indvar.next = add i64 %indvar, 1
br label %bb2
bb6: ; preds = %bb2
ret void
}
define i32 @main() nounwind {
bb:
call void @simple_vec_const()
%tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
%tmp1 = fptosi float %tmp to i32
ret i32 %tmp1
}
; CHECK: %tmp4p_vec = fadd <4 x float> %tmp_p_vec_full, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>

View File

@ -1,66 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen %vector-opt -dce -S < %s | FileCheck %s
;#define N 1024
;float A[N];
;float B[N];
;
;void simple_vec_const(void) {
; int i;
;
; for (i = 0; i < 4; i++)
; B[i] = A[i] + i;
;}
;int main()
;{
; simple_vec_const();
; return A[42];
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float] zeroinitializer, align 16
define void @simple_vec_const() nounwind {
bb:
br label %bb2
bb2: ; preds = %bb6, %bb
%indvar = phi i64 [ %indvar.next, %bb6 ], [ 0, %bb ]
%scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
%i.0 = trunc i64 %indvar to i32
%scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 4
br i1 %exitcond, label %bb3, label %bb7
bb3: ; preds = %bb2
%tmp = load float* %scevgep1, align 4
%tmp4 = sitofp i32 %i.0 to float
%tmp5 = fadd float %tmp, %tmp4
store float %tmp5, float* %scevgep, align 4
br label %bb6
bb6: ; preds = %bb3
%indvar.next = add i64 %indvar, 1
br label %bb2
bb7: ; preds = %bb2
ret void
}
define i32 @main() nounwind {
bb:
call void @simple_vec_const()
%tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
%tmp1 = fptosi float %tmp to i32
ret i32 %tmp1
}
; CHECK: insertelement <4 x float> undef, float %{{[^,]+}}, i32 0
; CHECK: insertelement <4 x float> %0, float %{{[^,]+}}, i32 1
; CHECK: insertelement <4 x float> %1, float %{{[^,]+}}, i32 2
; CHECK: insertelement <4 x float> %2, float %{{[^,]+}}, i32 3
; CHECK: fadd <4 x float> %tmp_p_vec_full, %3

View File

@ -1,44 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen %vector-opt -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float] zeroinitializer, align 16
declare float @foo(float) readnone
define void @simple_vec_call() nounwind {
entry:
br label %body
body:
%indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
%scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
%value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
%result = tail call float @foo(float %value) nounwind
store float %result, float* %scevgep, align 4
%indvar_next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar_next, 4
br i1 %exitcond, label %return, label %body
return:
ret void
}
; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %0) [[NUW:#[0-9]+]]
; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %1) [[NUW]]
; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %2) [[NUW]]
; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %3) [[NUW]]
; CHECK: %4 = insertelement <4 x float> undef, float [[RES1]], i32 0
; CHECK: %5 = insertelement <4 x float> %4, float [[RES2]], i32 1
; CHECK: %6 = insertelement <4 x float> %5, float [[RES3]], i32 2
; CHECK: %7 = insertelement <4 x float> %6, float [[RES4]], i32 3
; CHECK: store <4 x float> %7
; CHECK: attributes [[NUW]] = { nounwind }

View File

@ -1,43 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen %vector-opt -dce -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float**] zeroinitializer, align 16
declare float** @foo(float) readnone
define void @simple_vec_call() nounwind {
entry:
br label %body
body:
%indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
%scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
%value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
%result = tail call float** @foo(float %value) nounwind
store float** %result, float*** %scevgep, align 4
%indvar_next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar_next, 4
br i1 %exitcond, label %return, label %body
return:
ret void
}
; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %0) [[NUW:#[0-9]+]]
; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %1) [[NUW]]
; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %2) [[NUW]]
; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %3) [[NUW]]
; CHECK: %4 = insertelement <4 x float**> undef, float** %p_result, i32 0
; CHECK: %5 = insertelement <4 x float**> %4, float** %p_result1, i32 1
; CHECK: %6 = insertelement <4 x float**> %5, float** %p_result2, i32 2
; CHECK: %7 = insertelement <4 x float**> %6, float** %p_result3, i32 3
; CHECK: store <4 x float**> %7, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align
; CHECK: attributes [[NUW]] = { nounwind }

View File

@ -1,33 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen %vector-opt -dce -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x double] zeroinitializer, align 16
define void @simple_vec_const() nounwind {
bb:
br label %bb1
bb1: ; preds = %bb3, %bb
%indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb ]
%scevgep = getelementptr [1024 x double]* @B, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 4
br i1 %exitcond, label %bb2, label %bb4
bb2: ; preds = %bb1
%tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
%tmp2 = fpext float %tmp to double
store double %tmp2, double* %scevgep, align 4
br label %bb3
bb3: ; preds = %bb2
%indvar.next = add i64 %indvar, 1
br label %bb1
bb4: ; preds = %bb1
ret void
}
; CHECK: fpext <4 x float> %tmp_p_splat to <4 x double>

View File

@ -1,58 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen %vector-opt -S < %s | FileCheck %s
;#define N 1024
;float A[N];
;float B[N];
;
;void simple_vec_const(void) {
; int i;
;
; for (i = 0; i < 4; i++)
; B[i] = A[0];
;}
;int main()
;{
; simple_vec_const();
; return A[42];
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float] zeroinitializer, align 16
define void @simple_vec_const() nounwind {
; <label>:0
br label %1
; <label>:1 ; preds = %4, %0
%indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ]
%scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 4
br i1 %exitcond, label %2, label %5
; <label>:2 ; preds = %1
%3 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
store float %3, float* %scevgep, align 4
br label %4
; <label>:4 ; preds = %2
%indvar.next = add i64 %indvar, 1
br label %1
; <label>:5 ; preds = %1
ret void
}
define i32 @main() nounwind {
call void @simple_vec_const()
%1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
%2 = fptosi float %1 to i32
ret i32 %2
}
; CHECK: load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*)
; CHECK: shufflevector <1 x float> {{.*}}, <1 x float> {{.*}} <4 x i32> zeroinitializer

View File

@ -1,40 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen %vector-opt -dce -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float] zeroinitializer, align 16
define void @simple_vec_large_width() nounwind {
; <label>:0
br label %1
; <label>:1 ; preds = %4, %0
%indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ]
%scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
%scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 15
br i1 %exitcond, label %2, label %5
; <label>:2 ; preds = %1
%3 = load float* %scevgep1, align 4
store float %3, float* %scevgep, align 4
br label %4
; <label>:4 ; preds = %2
%indvar.next = add i64 %indvar, 1
br label %1
; <label>:5 ; preds = %1
ret void
}
define i32 @main() nounwind {
call void @simple_vec_large_width()
%1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
%2 = fptosi float %1 to i32
ret i32 %2
}
; CHECK: [[VEC1:%[a-zA-Z0-9_]+_full]] = load <15 x float>*
; CHECK: store <15 x float> [[VEC1]]

View File

@ -1,29 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen %vector-opt -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float**] zeroinitializer, align 16
@B = common global [1024 x float**] zeroinitializer, align 16
declare float @foo(float) readnone
define void @simple_vec_call() nounwind {
entry:
br label %body
body:
%indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
%scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
%value = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0), align 16
store float** %value, float*** %scevgep, align 4
%indvar_next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar_next, 4
br i1 %exitcond, label %return, label %body
return:
ret void
}
; CHECK: %value_p_splat_one = load <1 x float**>* bitcast ([1024 x float**]* @A to <1 x float**>*), align 8
; CHECK: %value_p_splat = shufflevector <1 x float**> %value_p_splat_one, <1 x float**> %value_p_splat_one, <4 x i32> zeroinitializer
; CHECK: store <4 x float**> %value_p_splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8

View File

@ -1,76 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen %vector-opt -dce -S < %s | FileCheck %s
; RUN: opt %loadPolly -basicaa -polly-codegen -polly-vectorizer=unroll-only -S < %s | FileCheck -check-prefix=UNROLL %s
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -analyze < %s | FileCheck -check-prefix=IMPORT %s
; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen < %s -S %vector-opt | FileCheck -check-prefix=CODEGEN %s
;#define N 1024
;float A[N];
;float B[N];
;
;void simple_vec_stride_one(void) {
; int i;
;
; for (i = 0; i < 4; i++)
; B[i] = A[i];
;}
;int main()
;{
; simple_vec_stride_one();
; return A[42];
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float] zeroinitializer, align 16
define void @simple_vec_stride_one() nounwind {
; <label>:0
br label %1
; <label>:1 ; preds = %4, %0
%indvar = phi i64 [ %indvar.next, %4 ], [ 0, %0 ]
%scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
%scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 4
br i1 %exitcond, label %2, label %5
; <label>:2 ; preds = %1
%3 = load float* %scevgep1, align 4
store float %3, float* %scevgep, align 4
br label %4
; <label>:4 ; preds = %2
%indvar.next = add i64 %indvar, 1
br label %1
; <label>:5 ; preds = %1
ret void
}
define i32 @main() nounwind {
call void @simple_vec_stride_one()
%1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
%2 = fptosi float %1 to i32
ret i32 %2
}
; CHECK: [[LOAD1:%[a-zA-Z0-9_]+]] = load <4 x float>*
; CHECK: store <4 x float> [[LOAD1]]
; IMPORT: for (c2=0;c2<=12;c2+=4) {
; IMPORT: Stmt_2({{[(]?}}c2/4{{[)]?}});
; IMPORT: }
; We do not generate optimal loads for this.
; CODEGEN: <4 x float>
; UNROLL: [[LOAD1:%[a-zA-Z0-9_]+_scalar.*]] = load float*
; UNROLL: [[LOAD2:%[a-zA-Z0-9_]+_scalar.*]] = load float*
; UNROLL: [[LOAD3:%[a-zA-Z0-9_]+_scalar.*]] = load float*
; UNROLL: [[LOAD4:%[a-zA-Z0-9_]+_scalar.*]] = load float*
; UNROLL: store float [[LOAD1]]
; UNROLL: store float [[LOAD2]]
; UNROLL: store float [[LOAD3]]
; UNROLL: store float [[LOAD4]]

View File

@ -1,17 +0,0 @@
{
"name": "%1 => %5",
"context": "{ [] }",
"statements": [{
"name": "Stmt_2",
"domain": "{ Stmt_2[i0] : i0 >= 0 and i0 <= 3 }",
"schedule": "{ Stmt_2[i0] -> scattering[0, 4i0, 0] }",
"accesses": [{
"kind": "read",
"relation": "{ Stmt_2[i0] -> MemRef_A[i0] }"
},
{
"kind": "write",
"relation": "{ Stmt_2[i0] -> MemRef_B[i0] }"
}]
}]
}

View File

@ -1,73 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen %vector-opt -dce -S < %s | FileCheck %s
;#define N 1024
;float A[N];
;float B[N];
;
;void simple_vec_stride_x(void) {
; int i;
;
; for (i = 0; i < 4; i++)
; B[2 * i] = A[2 * i];
;}
;int main()
;{
; simple_vec_stride_x();
; return A[42];
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float] zeroinitializer, align 16
define void @simple_vec_stride_x() nounwind {
bb:
br label %bb2
bb2: ; preds = %bb5, %bb
%indvar = phi i64 [ %indvar.next, %bb5 ], [ 0, %bb ]
%tmp = mul i64 %indvar, 2
%scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %tmp
%scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %tmp
%exitcond = icmp ne i64 %indvar, 4
br i1 %exitcond, label %bb3, label %bb6
bb3: ; preds = %bb2
%tmp4 = load float* %scevgep1, align 8
store float %tmp4, float* %scevgep, align 8
br label %bb5
bb5: ; preds = %bb3
%indvar.next = add i64 %indvar, 1
br label %bb2
bb6: ; preds = %bb2
ret void
}
define i32 @main() nounwind {
bb:
call void @simple_vec_stride_x()
%tmp = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
%tmp1 = fptosi float %tmp to i32
ret i32 %tmp1
}
; CHECK: [[LOAD1:%[a-zA-Z0-9_]+_scalar_]] = load float*
; CHECK: [[VEC1:%[a-zA-Z0-9_]+]] = insertelement <4 x float> undef, float [[LOAD1]], i32 0
; CHECK: [[LOAD2:%[a-zA-Z0-9_]+]] = load float*
; CHECK: [[VEC2:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC1]], float [[LOAD2]], i32 1
; CHECK: [[LOAD3:%[a-zA-Z0-9_]+]] = load float*
; CHECK: [[VEC3:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC2]], float [[LOAD3]], i32 2
; CHECK: [[LOAD4:%[a-zA-Z0-9_]+]] = load float*
; CHECK: [[VEC4:%[a-zA-Z0-9_]+]] = insertelement <4 x float> [[VEC3]], float [[LOAD4]], i32 3
; CHECK: [[EL1:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 0
; CHECK: store float [[EL1]]
; CHECK: [[EL2:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 1
; CHECK: store float [[EL2]]
; CHECK: [[EL3:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 2
; CHECK: store float [[EL3]]
; CHECK: [[EL4:%[a-zA-Z0-9_]+]] = extractelement <4 x float> [[VEC4]], i32 3
; CHECK: store float [[EL4]]

View File

@ -1,50 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-codegen %vector-opt -dce -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float] zeroinitializer, align 16
@C = common global [1024 x float] zeroinitializer, align 16
define void @simple_vec_stride_one() nounwind {
bb0:
br label %bb1
bb1:
%indvar = phi i64 [ %indvar.next, %bb4 ], [ 0, %bb0 ]
%scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
%scevgep2 = getelementptr [1024 x float]* @C, i64 0, i64 %indvar
%scevgep1 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar
%exitcond = icmp ne i64 %indvar, 4
br i1 %exitcond, label %bb2a, label %bb5
bb2a:
%tmp1 = load float* %scevgep1, align 4
store float %tmp1, float* %scevgep, align 4
br label %bb2b
bb2b:
%tmp2 = load float* %scevgep1, align 4
store float %tmp2, float* %scevgep2, align 4
br label %bb4
bb4:
%indvar.next = add i64 %indvar, 1
br label %bb1
bb5:
ret void
}
define i32 @main() nounwind {
call void @simple_vec_stride_one()
%1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 42), align 8
%2 = fptosi float %1 to i32
ret i32 %2
}
; CHECK: [[LOAD1:%[a-zA-Z0-9_]+_full]] = load <4 x float>*
; CHECK: store <4 x float> [[LOAD1]]
; CHECK: [[LOAD2:%[a-zA-Z0-9_]+_full]] = load <4 x float>*
; CHECK: store <4 x float> [[LOAD2]]

Some files were not shown because too many files have changed in this diff Show More