Remove independent blocks pass

Polly can now be used as a analysis only tool as long as the code
  generation is disabled. However, we do not have an alternative to the
  independent blocks pass in place yet, though in the relevant cases
  this does not seem to impact the performance much. Nevertheless, a
  virtual alternative that allows the same transformations without
  changing the input region will follow shortly.

llvm-svn: 250652
This commit is contained in:
Johannes Doerfert 2015-10-18 12:28:00 +00:00
parent c2c154e078
commit 01978cfa0c
18 changed files with 18 additions and 1010 deletions

View File

@ -33,7 +33,6 @@ llvm::Pass *createDOTOnlyPrinterPass();
llvm::Pass *createDOTOnlyViewerPass();
llvm::Pass *createDOTPrinterPass();
llvm::Pass *createDOTViewerPass();
llvm::Pass *createIndependentBlocksPass();
llvm::Pass *createJSONExporterPass();
llvm::Pass *createJSONImporterPass();
llvm::Pass *createPollyCanonicalizePass();
@ -43,7 +42,6 @@ llvm::Pass *createIslAstInfoPass();
llvm::Pass *createCodeGenerationPass();
llvm::Pass *createIslScheduleOptimizerPass();
extern char &IndependentBlocksID;
extern char &CodePreparationID;
}
@ -64,7 +62,6 @@ struct PollyForcePassLinking {
polly::createDOTOnlyViewerPass();
polly::createDOTPrinterPass();
polly::createDOTViewerPass();
polly::createIndependentBlocksPass();
polly::createJSONExporterPass();
polly::createJSONImporterPass();
polly::createScopDetectionPass();
@ -81,7 +78,6 @@ namespace llvm {
class PassRegistry;
void initializeCodePreparationPass(llvm::PassRegistry &);
void initializeDeadCodeElimPass(llvm::PassRegistry &);
void initializeIndependentBlocksPass(llvm::PassRegistry &);
void initializeJSONExporterPass(llvm::PassRegistry &);
void initializeJSONImporterPass(llvm::PassRegistry &);
void initializeIslAstInfoPass(llvm::PassRegistry &);

View File

@ -648,10 +648,7 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
// Check that the base address of the access is invariant in the current
// region.
if (!isInvariant(*BaseValue, CurRegion))
// Verification of this property is difficult as the independent blocks
// pass may introduce aliasing that we did not have when running the
// scop detection.
return invalid<ReportVariantBasePtr>(Context, /*Assert=*/false, BaseValue,
return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BaseValue,
&Inst);
AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer);
@ -684,8 +681,7 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
AccessFunction, &Inst, BaseValue);
}
// FIXME: Alias Analysis thinks IntToPtrInst aliases with alloca instructions
// created by IndependentBlocks Pass.
// FIXME: Think about allowing IntToPtrInst
if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BaseValue))
return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
@ -699,13 +695,6 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
AliasSet &AS = Context.AST.getAliasSetForPointer(
BaseValue, MemoryLocation::UnknownSize, AATags);
// INVALID triggers an assertion in verifying mode, if it detects that a
// SCoP was detected by SCoP detection and that this SCoP was invalidated by
// a pass that stated it would preserve the SCoPs. We disable this check as
// the independent blocks pass may create memory references which seem to
// alias, if -basicaa is not available. They actually do not, but as we can
// not proof this without -basicaa we would fail. We disable this check to
// not cause irrelevant verification failures.
if (!AS.isMustAlias()) {
if (PollyUseRuntimeAliasChecks) {
bool CanBuildRunTimeCheck = true;
@ -731,7 +720,7 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
if (CanBuildRunTimeCheck)
return true;
}
return invalid<ReportAlias>(Context, /*Assert=*/false, &Inst, AS);
return invalid<ReportAlias>(Context, /*Assert=*/true, &Inst, AS);
}
return true;

View File

@ -3651,7 +3651,6 @@ ScopInfo::~ScopInfo() {
}
void ScopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(IndependentBlocksID);
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<RegionInfoPass>();
AU.addRequired<DominatorTreeWrapperPass>();

View File

@ -49,7 +49,6 @@ add_polly_library(Polly
Transform/Canonicalization.cpp
Transform/CodePreparation.cpp
Transform/DeadCodeElimination.cpp
Transform/IndependentBlocks.cpp
Transform/ScheduleOptimizer.cpp
${POLLY_HEADER_FILES}
)

View File

@ -192,7 +192,6 @@ public:
// region tree.
AU.addPreserved<RegionInfoPass>();
AU.addPreserved<ScopInfo>();
AU.addPreservedID(IndependentBlocksID);
}
};
}

View File

@ -135,7 +135,6 @@ SOURCES= Polly.cpp \
Transform/Canonicalization.cpp \
Transform/CodePreparation.cpp \
Transform/DeadCodeElimination.cpp \
Transform/IndependentBlocks.cpp \
Transform/ScheduleOptimizer.cpp \
${GPGPU_FILES} \
${ISL_CODEGEN_FILES} \

View File

@ -146,7 +146,6 @@ void initializePollyPasses(PassRegistry &Registry) {
initializeCodePreparationPass(Registry);
initializeDeadCodeElimPass(Registry);
initializeDependenceInfoPass(Registry);
initializeIndependentBlocksPass(Registry);
initializeJSONExporterPass(Registry);
initializeJSONImporterPass(Registry);
initializeIslAstInfoPass(Registry);

View File

@ -1,373 +0,0 @@
//===------ IndependentBlocks.cpp - Create Independent Blocks in Regions --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Create independent blocks in the regions detected by ScopDetection.
//
//===----------------------------------------------------------------------===//
//
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
#include "polly/ScopDetection.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/Local.h"
#include <vector>
using namespace polly;
using namespace llvm;
#define DEBUG_TYPE "polly-independent"
namespace {
struct IndependentBlocks : public FunctionPass {
RegionInfo *RI;
ScalarEvolution *SE;
ScopDetection *SD;
LoopInfo *LI;
BasicBlock *AllocaBlock;
static char ID;
IndependentBlocks() : FunctionPass(ID) {}
// Create new code for every instruction operator that can be expressed by a
// SCEV. Like this there are just two types of instructions left:
//
// 1. Instructions that only reference loop ivs or parameters outside the
// region.
//
// 2. Instructions that are not used for any memory modification. (These
// will be ignored later on.)
//
// Blocks containing only these kind of instructions are called independent
// blocks as they can be scheduled arbitrarily.
bool createIndependentBlocks(BasicBlock *BB, const Region *R);
bool createIndependentBlocks(const Region *R);
// Elimination on the Scop to eliminate the scalar dependences come with
// trivially dead instructions.
bool eliminateDeadCode(const Region *R);
//===--------------------------------------------------------------------===//
/// Non trivial scalar dependences checking functions.
/// Non trivial scalar dependences occur when the def and use are located in
/// different BBs and we can not move them into the same one. This will
/// prevent use from schedule BBs arbitrarily.
///
/// @brief This function checks if a scalar value that is part of the
/// Scop is used outside of the Scop.
///
/// @param Use The use of the instruction.
/// @param R The maximum region in the Scop.
///
/// @return Return true if the Use of an instruction and the instruction
/// itself form a non trivial scalar dependence.
static bool isEscapeUse(const Value *Use, const Region *R);
//===--------------------------------------------------------------------===//
/// Operand tree moving functions.
/// Trivial scalar dependences can eliminate by move the def to the same BB
/// that containing use.
///
/// @brief Check if the instruction can be moved to another place safely.
///
/// @param Inst The instruction.
///
/// @return Return true if the instruction can be moved safely, false
/// otherwise.
static bool isSafeToMove(Instruction *Inst);
typedef std::map<Instruction *, Instruction *> ReplacedMapType;
/// @brief Move all safe to move instructions in the Operand Tree (DAG) to
/// eliminate trivial scalar dependences.
///
/// @param Inst The root of the operand Tree.
/// @param R The maximum region in the Scop.
/// @param ReplacedMap The map that mapping original instruction to the moved
/// instruction.
/// @param InsertPos The insert position of the moved instructions.
void moveOperandTree(Instruction *Inst, const Region *R,
ReplacedMapType &ReplacedMap, Instruction *InsertPos);
bool isIndependentBlock(const Region *R, BasicBlock *BB) const;
bool areAllBlocksIndependent(const Region *R) const;
bool runOnFunction(Function &F);
void verifyAnalysis() const;
void verifyScop(const Region *R) const;
void getAnalysisUsage(AnalysisUsage &AU) const;
};
}
bool IndependentBlocks::isSafeToMove(Instruction *Inst) {
if (Inst->mayReadFromMemory() || Inst->mayWriteToMemory())
return false;
return isSafeToSpeculativelyExecute(Inst);
}
void IndependentBlocks::moveOperandTree(Instruction *Inst, const Region *R,
ReplacedMapType &ReplacedMap,
Instruction *InsertPos) {
BasicBlock *CurBB = Inst->getParent();
// Depth first traverse the operand tree (or operand dag, because we will
// stop at PHINodes, so there are no cycle).
typedef Instruction::op_iterator ChildIt;
std::vector<std::pair<Instruction *, ChildIt>> WorkStack;
WorkStack.push_back(std::make_pair(Inst, Inst->op_begin()));
DenseSet<Instruction *> VisitedSet;
while (!WorkStack.empty()) {
Instruction *CurInst = WorkStack.back().first;
ChildIt It = WorkStack.back().second;
DEBUG(dbgs() << "Checking Operand of Node:\n" << *CurInst << "\n------>\n");
if (It == CurInst->op_end()) {
// Insert the new instructions in topological order.
if (!CurInst->getParent()) {
CurInst->insertBefore(InsertPos);
SE->forgetValue(CurInst);
}
WorkStack.pop_back();
} else {
// for each node N,
Instruction *Operand = dyn_cast<Instruction>(*It);
++WorkStack.back().second;
// Can not move no instruction value.
if (Operand == 0)
continue;
DEBUG(dbgs() << "For Operand:\n" << *Operand << "\n--->");
// If the Scop Region does not contain N, skip it and all its operands and
// continue: because we reach a "parameter".
// FIXME: we must keep the predicate instruction inside the Scop,
// otherwise it will be translated to a load instruction, and we can not
// handle load as affine predicate at this moment.
if (!R->contains(Operand) && !isa<TerminatorInst>(CurInst)) {
DEBUG(dbgs() << "Out of region.\n");
continue;
}
if (canSynthesize(Operand, LI, SE, R)) {
DEBUG(dbgs() << "is IV.\n");
continue;
}
// We can not move the operand, a non trivial scalar dependence found!
if (!isSafeToMove(Operand)) {
DEBUG(dbgs() << "Can not move!\n");
continue;
}
// Do not need to move instruction if it is contained in the same BB with
// the root instruction.
if (Operand->getParent() == CurBB) {
DEBUG(dbgs() << "No need to move.\n");
// Try to move its operand, but do not visit an instuction twice.
if (VisitedSet.insert(Operand).second)
WorkStack.push_back(std::make_pair(Operand, Operand->op_begin()));
continue;
}
// Now we need to move Operand to CurBB.
// Check if we already moved it.
ReplacedMapType::iterator At = ReplacedMap.find(Operand);
if (At != ReplacedMap.end()) {
DEBUG(dbgs() << "Moved.\n");
Instruction *MovedOp = At->second;
It->set(MovedOp);
SE->forgetValue(MovedOp);
} else {
// Note that NewOp is not inserted in any BB now, we will insert it when
// it popped form the work stack, so it will be inserted in topological
// order.
Instruction *NewOp = Operand->clone();
NewOp->setName(Operand->getName() + ".moved.to." + CurBB->getName());
DEBUG(dbgs() << "Move to " << *NewOp << "\n");
It->set(NewOp);
ReplacedMap.insert(std::make_pair(Operand, NewOp));
SE->forgetValue(Operand);
// Process its operands, but do not visit an instuction twice.
if (VisitedSet.insert(NewOp).second)
WorkStack.push_back(std::make_pair(NewOp, NewOp->op_begin()));
}
}
}
SE->forgetValue(Inst);
}
bool IndependentBlocks::createIndependentBlocks(BasicBlock *BB,
const Region *R) {
std::vector<Instruction *> WorkList;
for (Instruction &Inst : *BB)
if (!isSafeToMove(&Inst) && !canSynthesize(&Inst, LI, SE, R))
WorkList.push_back(&Inst);
ReplacedMapType ReplacedMap;
Instruction *InsertPos = BB->getFirstNonPHIOrDbg();
for (Instruction *Inst : WorkList)
if (!isa<PHINode>(Inst))
moveOperandTree(Inst, R, ReplacedMap, InsertPos);
// The BB was changed if we replaced any operand.
return !ReplacedMap.empty();
}
bool IndependentBlocks::createIndependentBlocks(const Region *R) {
bool Changed = false;
for (BasicBlock *BB : R->blocks())
Changed |= createIndependentBlocks(BB, R);
return Changed;
}
bool IndependentBlocks::eliminateDeadCode(const Region *R) {
std::vector<Instruction *> WorkList;
// Find all trivially dead instructions.
for (BasicBlock *BB : R->blocks())
for (Instruction &Inst : *BB)
if (!isIgnoredIntrinsic(&Inst) && isInstructionTriviallyDead(&Inst))
WorkList.push_back(&Inst);
if (WorkList.empty())
return false;
// Delete them so the cross BB scalar dependences come with them will
// also be eliminated.
while (!WorkList.empty()) {
RecursivelyDeleteTriviallyDeadInstructions(WorkList.back());
WorkList.pop_back();
}
return true;
}
bool IndependentBlocks::isEscapeUse(const Value *Use, const Region *R) {
// Non-instruction user will never escape.
if (!isa<Instruction>(Use))
return false;
return !R->contains(cast<Instruction>(Use));
}
bool IndependentBlocks::isIndependentBlock(const Region *R,
BasicBlock *BB) const {
for (Instruction &Inst : *BB) {
if (canSynthesize(&Inst, LI, SE, R))
continue;
if (isIgnoredIntrinsic(&Inst))
continue;
// A value inside the Scop is referenced outside.
for (User *U : Inst.users()) {
if (isEscapeUse(U, R)) {
DEBUG(dbgs() << "Instruction not independent:\n");
DEBUG(dbgs() << "Instruction used outside the Scop!\n");
DEBUG(Inst.print(dbgs()));
DEBUG(dbgs() << "\n");
return false;
}
}
}
return true;
}
bool IndependentBlocks::areAllBlocksIndependent(const Region *R) const {
for (BasicBlock *BB : R->blocks())
if (!isIndependentBlock(R, BB))
return false;
return true;
}
void IndependentBlocks::getAnalysisUsage(AnalysisUsage &AU) const {
// FIXME: If we set preserves cfg, the cfg only passes do not need to
// be "addPreserved"?
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<DominanceFrontier>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<PostDominatorTree>();
AU.addRequired<RegionInfoPass>();
AU.addPreserved<RegionInfoPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<SCEVAAWrapperPass>();
AU.addRequired<ScopDetection>();
AU.addPreserved<ScopDetection>();
}
bool IndependentBlocks::runOnFunction(llvm::Function &F) {
bool Changed = false;
RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SD = &getAnalysis<ScopDetection>();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
AllocaBlock = &F.getEntryBlock();
DEBUG(dbgs() << "Run IndepBlock on " << F.getName() << '\n');
for (const Region *R : *SD) {
Changed |= createIndependentBlocks(R);
Changed |= eliminateDeadCode(R);
}
verifyAnalysis();
return Changed;
}
void IndependentBlocks::verifyAnalysis() const {}
void IndependentBlocks::verifyScop(const Region *R) const {
assert(areAllBlocksIndependent(R) && "Cannot generate independent blocks");
}
char IndependentBlocks::ID = 0;
char &polly::IndependentBlocksID = IndependentBlocks::ID;
Pass *polly::createIndependentBlocksPass() { return new IndependentBlocks(); }
INITIALIZE_PASS_BEGIN(IndependentBlocks, "polly-independent",
"Polly - Create independent blocks", false, false);
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
INITIALIZE_PASS_DEPENDENCY(ScopDetection);
INITIALIZE_PASS_END(IndependentBlocks, "polly-independent",
"Polly - Create independent blocks", false, false)

View File

@ -1,48 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS
; void f(long A[], int N, int *init_ptr) {
; long i, j;
;
; for (i = 0; i < N; ++i) {
; init = *init_ptr;
; for (i = 0; i < N; ++i) {
; A[i] = init + 2;
; }
; }
; }
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
entry:
; SCALARACCESS-NOT: alloca
br label %for.i
for.i:
%indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
%indvar.i.next = add nsw i64 %indvar.i, 1
br label %entry.next
entry.next:
%init = load i64, i64* %init_ptr
; SCALARACCESS-NOT: store
br label %for.j
for.j:
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
%init_plus_two = add i64 %init, 2
; SCALARACCESS: %init_plus_two = add i64 %init, 2
%scevgep = getelementptr i64, i64* %A, i64 %indvar.j
store i64 %init_plus_two, i64* %scevgep
%indvar.j.next = add nsw i64 %indvar.j, 1
%exitcond.j = icmp eq i64 %indvar.j.next, %N
br i1 %exitcond.j, label %for.i.end, label %for.j
for.i.end:
%exitcond.i = icmp eq i64 %indvar.i.next, %N
br i1 %exitcond.i, label %return, label %for.i
return:
ret void
}

View File

@ -1,56 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS
; void f(long A[], int N, int *init_ptr) {
; long i, j;
;
; for (i = 0; i < N; ++i) {
; init = *init_ptr;
; for (i = 0; i < N; ++i) {
; init2 = *init_ptr;
; A[i] = init + init2;
; }
; }
; }
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
entry:
; SCALARACCESS-NOT: alloca
br label %for.i
for.i:
%indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
%indvar.i.next = add nsw i64 %indvar.i, 1
br label %entry.next
entry.next:
%init = load i64, i64* %init_ptr
; SCALARACCESS-NOT: store
br label %for.j
for.j:
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
%init_2 = load i64, i64* %init_ptr
%init_sum = add i64 %init, %init_2
; The SCEV of %init_sum is (%init + %init_2). It is referring to both an
; UnknownValue in the same and in a different basic block. We want only the
; reference to the different basic block to be replaced.
; SCALARACCESS: %init_2 = load i64, i64* %init_ptr
; SCALARACCESS: %init_sum = add i64 %init, %init_2
%scevgep = getelementptr i64, i64* %A, i64 %indvar.j
store i64 %init_sum, i64* %scevgep
%indvar.j.next = add nsw i64 %indvar.j, 1
%exitcond.j = icmp eq i64 %indvar.j.next, %N
br i1 %exitcond.j, label %for.i.end, label %for.j
for.i.end:
%exitcond.i = icmp eq i64 %indvar.i.next, %N
br i1 %exitcond.i, label %return, label %for.i
return:
ret void
}

View File

@ -1,50 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s
; void f(long A[], int N, int *init_ptr) {
; long i, j;
;
; for (i = 0; i < N; ++i) {
; for (i = 0; i < N; ++i) {
; init = *init_ptr;
; A[i] = init + 2;
; }
; }
; }
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
entry:
; CHECK: entry
; CHECK: br label %for.i
br label %for.i
for.i:
%indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
%indvar.i.next = add nsw i64 %indvar.i, 1
br label %entry.next
entry.next:
br label %for.j
for.j:
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
%init = load i64, i64* %init_ptr
%init_plus_two = add i64 %init, 2
; The scalar evolution of %init_plus_two is (2 + %init). So we have a
; non-trivial scalar evolution referring to a value in the same basic block.
; We want to ensure that this scalar is not translated into a memory copy.
%scevgep = getelementptr i64, i64* %A, i64 %indvar.j
store i64 %init_plus_two, i64* %scevgep
%indvar.j.next = add nsw i64 %indvar.j, 1
%exitcond.j = icmp eq i64 %indvar.j.next, %N
br i1 %exitcond.j, label %for.i.end, label %for.j
for.i.end:
%exitcond.i = icmp eq i64 %indvar.i.next, %N
br i1 %exitcond.i, label %return, label %for.i
return:
ret void
}

View File

@ -1,33 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALAR
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define void @phi_nodes_outside() {
entry:
br label %for.i.1
for.i.1:
%i.1 = phi i32 [ %i.1.next, %for.i.1 ], [ 0, %entry ]
%i.1.next = add nsw i32 %i.1, 1
br i1 false, label %for.i.1 , label %for.i.2.preheader
for.i.2.preheader:
br label %for.i.2
for.i.2:
; The value of %i.1.next is used outside of the scop in a PHI node.
%i.2 = phi i32 [ %i.2.next , %for.i.2 ], [ %i.1.next, %for.i.2.preheader ]
%i.2.next = add nsw i32 %i.2, 1
fence seq_cst
br i1 false, label %for.i.2, label %cleanup
cleanup:
ret void
}
; SCALAR-NOT: alloca
; SCALAR: for.i.2.preheader:
; SCALAR-NOT: load
; SCALAR: for.i.2:
; SCALAR: %i.2 = phi i32 [ %i.2.next, %for.i.2 ], [ %i.1.next, %for.i.2.preheader ]

View File

@ -1,222 +0,0 @@
; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS
; RAUN: opt %loadPolly -basicaa -polly-independent < %s -S | FileCheck %s -check-prefix=SCALARACCESS
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@A = common global [1024 x float] zeroinitializer, align 8
define i32 @empty() nounwind {
entry:
fence seq_cst
br label %for.cond
for.cond:
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %for.body, label %return
for.body:
br label %for.inc
for.inc:
%indvar.next = add i64 %indvar, 1
br label %for.cond
return:
fence seq_cst
ret i32 0
}
; SCALARACCESS-LABEL: @array_access()
define i32 @array_access() nounwind {
entry:
fence seq_cst
br label %for.cond
; SCALARACCESS: entry:
; SCALARACCESS-NOT: alloca
for.cond:
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %for.body, label %return
for.body:
%arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
%float = uitofp i64 %indvar to float
store float %float, float* %arrayidx
br label %for.inc
; SCALARACCESS: for.body:
; SCALARACCESS: %float = uitofp i64 %indvar to float
; SCALARACCESS: store float %float, float* %arrayidx
for.inc:
%indvar.next = add i64 %indvar, 1
br label %for.cond
return:
fence seq_cst
ret i32 0
}
; SCALARACCESS-LABEL: @intra_scop_dep()
define i32 @intra_scop_dep() nounwind {
entry:
fence seq_cst
br label %for.cond
; SCALARACCESS: entry:
; SCALARACCESS-NOT: alloca
; SCALARACCESS: fence
for.cond:
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %for.body.a, label %return
for.body.a:
%arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
%scalar = load float, float* %arrayidx
br label %for.body.b
; SCALARACCESS: for.body.a:
; SCALARACCESS: %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
; SCALARACCESS: %scalar = load float, float* %arrayidx
; SCALARACCESS-NOT: store
; SCALARACCESS: br label %for.body.b
for.body.b:
%arrayidx2 = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
%float = uitofp i64 %indvar to float
%sum = fadd float %scalar, %float
store float %sum, float* %arrayidx2
br label %for.inc
; SCALARACCESS: for.body.b:
; SCALARACCESS: %arrayidx2 = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
; SCALARACCESS: %float = uitofp i64 %indvar to float
; SCALARACCESS-NOT: load
; SCALARACCESS: %sum = fadd float %scalar, %float
; SCALARACCESS: store float %sum, float* %arrayidx2
; SCALARACCESS: br label %for.inc
for.inc:
%indvar.next = add i64 %indvar, 1
br label %for.cond
return:
fence seq_cst
ret i32 0
}
; It is not possible to have a scop which accesses a scalar element that is
; a global variable. All global variables are pointers containing possibly
; a single element.
; SCALARACCESS-LABEL: @use_after_scop()
define i32 @use_after_scop() nounwind {
entry:
fence seq_cst
br label %for.head
; SCALARACCESS: entry:
; SCALARACCESS-NOT: alloca
; SCALARACCESS: fence
for.head:
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
br label %for.body
for.body:
%arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
%scalar = load float, float* %arrayidx
br label %for.inc
; SCALARACCESS: for.body:
; SCALARACCESS: %scalar = load float, float* %arrayidx
; SCALARACCESS-NOT: store float %scalar
for.inc:
%indvar.next = add i64 %indvar, 1
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %for.head, label %for.after
for.after:
fence seq_cst
%return_value = fptosi float %scalar to i32
br label %return
; SCALARACCESS: for.after:
; SCALARACCESS: fence seq_cst
; SCALARACCESS: %return_value = fptosi float %scalar to i32
return:
ret i32 %return_value
}
; We currently do not transform scalar references, that have only read accesses
; in the scop. There are two reasons for this:
;
; o We don't introduce additional memory references which may yield to compile
; time overhead.
; o For integer values, such a translation may block the use of scalar
; evolution on those values.
;
; SCALARACCESS-LABEL: @before_scop()
define i32 @before_scop() nounwind {
entry:
br label %preheader
preheader:
%scalar = fadd float 4.0, 5.0
fence seq_cst
br label %for.cond
for.cond:
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %preheader ]
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %for.body, label %return
for.body:
%arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
store float %scalar, float* %arrayidx
br label %for.inc
; SCALARACCESS: for.body:
; SCALARACCESS: store float %scalar, float* %arrayidx
for.inc:
%indvar.next = add i64 %indvar, 1
br label %for.cond
return:
fence seq_cst
ret i32 0
}
; Currently not working
; SCALARACCESS-LABEL: @param_before_scop(
define i32 @param_before_scop(float %scalar) nounwind {
entry:
fence seq_cst
br label %for.cond
for.cond:
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i64 %indvar, 1024
br i1 %exitcond, label %for.body, label %return
for.body:
%arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
store float %scalar, float* %arrayidx
br label %for.inc
for.inc:
%indvar.next = add i64 %indvar, 1
br label %for.cond
return:
fence seq_cst
ret i32 0
}

View File

@ -6,9 +6,13 @@
; A[n % 42] += 1;
; }
;
; CHECK: polly.stmt.bb3:
; CHECK: %p_tmp.moved.to.bb3 = srem i64 %n, 42
; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %p_tmp.moved.to.bb3
; CHECK: polly.stmt.bb2:
; CHECK-NEXT: %p_tmp = srem i64 %n, 42
; CHECK-NEXT: store i64 %p_tmp, i64* %tmp.s2a
;
; CHECK: polly.stmt.bb3:
; CHECK: %tmp.s2a.reload = load i64, i64* %tmp.s2a
; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %tmp.s2a.reload
define void @pos(float* %A, i64 %n) {
bb:

View File

@ -1,5 +1,5 @@
; This should be run without alias analysis enabled.
;RUN: opt %loadPolly -polly-independent < %s
;RUN: opt %loadPolly -polly-scops < %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
define i32 @main() nounwind {

View File

@ -1,199 +0,0 @@
; RUN: opt %loadPolly -polly-independent < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define i32 @main() nounwind uwtable readnone {
%arr = alloca [100 x i32], align 16
br label %1
; <label>:1 ; preds = %1, %0
%indvars.iv3 = phi i64 [ 0, %0 ], [ %indvars.iv.next4, %1 ]
%2 = getelementptr inbounds [100 x i32], [100 x i32]* %arr, i64 0, i64 %indvars.iv3
%3 = trunc i64 %indvars.iv3 to i32
store i32 %3, i32* %2, align 4, !tbaa !0
%indvars.iv.next4 = add i64 %indvars.iv3, 1
%lftr.wideiv5 = trunc i64 %indvars.iv.next4 to i32
%exitcond6 = icmp eq i32 %lftr.wideiv5, 100
br i1 %exitcond6, label %.preheader, label %1
.preheader: ; preds = %.preheader, %1
%indvars.iv = phi i64 [ %indvars.iv.next, %.preheader ], [ 0, %1 ]
%4 = getelementptr inbounds [100 x i32], [100 x i32]* %arr, i64 0, i64 %indvars.iv
%5 = load i32, i32* %4, align 4, !tbaa !0
%6 = xor i32 %5, -1
%7 = shl i32 %5, 15
%8 = add nsw i32 %7, %6
%9 = ashr i32 %8, 12
%10 = xor i32 %9, %8
%11 = mul i32 %10, 9
%12 = ashr i32 %11, 4
%13 = xor i32 %12, %11
%14 = mul nsw i32 %13, 20571
%15 = ashr i32 %14, 16
%16 = xor i32 %15, %14
%17 = xor i32 %16, -1
%18 = shl i32 %16, 15
%19 = add nsw i32 %18, %17
%20 = ashr i32 %19, 12
%21 = xor i32 %20, %19
%22 = mul i32 %21, 5
%23 = ashr i32 %22, 4
%24 = xor i32 %23, %22
%25 = mul nsw i32 %24, 20576
%26 = ashr i32 %25, 16
%27 = xor i32 %26, %25
%28 = xor i32 %27, -1
%29 = shl i32 %27, 15
%30 = add nsw i32 %29, %28
%31 = ashr i32 %30, 12
%32 = xor i32 %31, %30
%33 = mul i32 %32, 5
%34 = ashr i32 %33, 4
%35 = xor i32 %34, %33
%36 = mul nsw i32 %35, 2057
%37 = ashr i32 %36, 16
%38 = xor i32 %37, %36
%39 = xor i32 %38, -1
%40 = shl i32 %38, 15
%41 = add nsw i32 %40, %39
%42 = ashr i32 %41, 12
%43 = xor i32 %42, %41
%44 = mul i32 %43, 5
%45 = ashr i32 %44, 4
%46 = xor i32 %45, %44
%47 = mul nsw i32 %46, 20572
%48 = ashr i32 %47, 16
%49 = xor i32 %48, %47
%50 = xor i32 %49, -1
%51 = shl i32 %49, 15
%52 = add nsw i32 %51, %50
%53 = ashr i32 %52, 12
%54 = xor i32 %53, %52
%55 = mul i32 %54, 5
%56 = ashr i32 %55, 4
%57 = xor i32 %56, %55
%58 = mul nsw i32 %57, 2051
%59 = ashr i32 %58, 16
%60 = xor i32 %59, %58
%61 = xor i32 %60, -1
%62 = shl i32 %60, 15
%63 = add nsw i32 %62, %61
%64 = ashr i32 %63, 12
%65 = xor i32 %64, %63
%66 = mul i32 %65, 5
%67 = ashr i32 %66, 4
%68 = xor i32 %67, %66
%69 = mul nsw i32 %68, 2057
%70 = ashr i32 %69, 16
%71 = xor i32 %70, %69
%72 = xor i32 %71, -1
%73 = shl i32 %71, 15
%74 = add nsw i32 %73, %72
%75 = ashr i32 %74, 12
%76 = xor i32 %75, %74
%77 = mul i32 %76, 5
%78 = ashr i32 %77, 4
%79 = xor i32 %78, %77
%80 = mul nsw i32 %79, 205
%81 = ashr i32 %80, 17
%82 = xor i32 %81, %80
%83 = xor i32 %82, -1
%84 = shl i32 %82, 15
%85 = add nsw i32 %84, %83
%86 = ashr i32 %85, 12
%87 = xor i32 %86, %85
%88 = mul i32 %87, 5
%89 = ashr i32 %88, 4
%90 = xor i32 %89, %88
%91 = mul nsw i32 %90, 2057
%92 = ashr i32 %91, 16
%93 = xor i32 %92, %91
%94 = xor i32 %93, -1
%95 = shl i32 %93, 15
%96 = add nsw i32 %95, %94
%97 = ashr i32 %96, 12
%98 = xor i32 %97, %96
%99 = mul i32 %98, 5
%100 = ashr i32 %99, 3
%101 = xor i32 %100, %99
%102 = mul nsw i32 %101, 20571
%103 = ashr i32 %102, 16
%104 = xor i32 %103, %102
%105 = xor i32 %104, -1
%106 = shl i32 %104, 15
%107 = add nsw i32 %106, %105
%108 = ashr i32 %107, 12
%109 = xor i32 %108, %107
%110 = mul i32 %109, 5
%111 = ashr i32 %110, 4
%112 = xor i32 %111, %110
%113 = mul nsw i32 %112, 2057
%114 = ashr i32 %113, 16
%115 = xor i32 %114, %113
%116 = xor i32 %115, -1
%117 = shl i32 %115, 15
%118 = add nsw i32 %117, %116
%119 = ashr i32 %118, 12
%120 = xor i32 %119, %118
%121 = mul i32 %120, 5
%122 = ashr i32 %121, 4
%123 = xor i32 %122, %121
%124 = mul nsw i32 %123, 20572
%125 = ashr i32 %124, 16
%126 = xor i32 %125, %124
%127 = xor i32 %126, -1
%128 = shl i32 %126, 15
%129 = add nsw i32 %128, %127
%130 = ashr i32 %129, 12
%131 = xor i32 %130, %129
%132 = mul i32 %131, 5
%133 = ashr i32 %132, 4
%134 = xor i32 %133, %132
%135 = mul nsw i32 %134, 2057
%136 = ashr i32 %135, 16
%137 = xor i32 %136, %135
%138 = xor i32 %137, -1
%139 = shl i32 %137, 15
%140 = add nsw i32 %139, %138
%141 = ashr i32 %140, 12
%142 = xor i32 %141, %140
%143 = mul i32 %142, 5
%144 = ashr i32 %143, 4
%145 = xor i32 %144, %143
%146 = mul nsw i32 %145, 2057
%147 = ashr i32 %146, 16
%148 = xor i32 %147, %146
%149 = xor i32 %148, -1
%150 = shl i32 %148, 15
%151 = add nsw i32 %150, %149
%152 = ashr i32 %151, 12
%153 = xor i32 %152, %151
%154 = mul i32 %153, 5
%155 = ashr i32 %154, 4
%156 = xor i32 %155, %154
%157 = mul nsw i32 %156, 2057
%158 = ashr i32 %157, 16
%159 = xor i32 %158, %157
%160 = xor i32 %159, -1
%161 = shl i32 %159, 15
%162 = add nsw i32 %161, %160
%163 = ashr i32 %162, 12
%164 = xor i32 %163, %162
%165 = mul i32 %164, 5
%166 = ashr i32 %165, 4
%167 = xor i32 %166, %165
%168 = mul nsw i32 %167, 2057
%169 = ashr i32 %168, 16
%170 = xor i32 %169, %168
store i32 %170, i32* %4, align 4, !tbaa !0
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 100
br i1 %exitcond, label %171, label %.preheader
; <label>:171 ; preds = %.preheader
ret i32 0
}
!0 = !{!"int", !1}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA", null}

View File

@ -6,11 +6,13 @@
; A[i]++;
; }
;
; FIXME: This test is a negative test until we have an independent blocks alternative.
;
; We should move operands as close to their use as possible, hence in this case
; there should not be any scalar dependence anymore after %cmp1 is moved to
; %for.body (%c and %indvar.iv are synthesis able).
;
; CHECK-NOT: [Scalar: 1]
; CHECK: [Scalar: 1]
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -1,4 +1,7 @@
; RUN: opt %loadPolly -polly-independent < %s
; RUN: opt %loadPolly -polly-scops < %s
;
; CHECK: Region: %if.then6---%return
;
target datalayout ="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define void @arc_either() {