forked from OSchip/llvm-project
Remove independent blocks pass
Polly can now be used as a analysis only tool as long as the code generation is disabled. However, we do not have an alternative to the independent blocks pass in place yet, though in the relevant cases this does not seem to impact the performance much. Nevertheless, a virtual alternative that allows the same transformations without changing the input region will follow shortly. llvm-svn: 250652
This commit is contained in:
parent
c2c154e078
commit
01978cfa0c
|
@ -33,7 +33,6 @@ llvm::Pass *createDOTOnlyPrinterPass();
|
|||
llvm::Pass *createDOTOnlyViewerPass();
|
||||
llvm::Pass *createDOTPrinterPass();
|
||||
llvm::Pass *createDOTViewerPass();
|
||||
llvm::Pass *createIndependentBlocksPass();
|
||||
llvm::Pass *createJSONExporterPass();
|
||||
llvm::Pass *createJSONImporterPass();
|
||||
llvm::Pass *createPollyCanonicalizePass();
|
||||
|
@ -43,7 +42,6 @@ llvm::Pass *createIslAstInfoPass();
|
|||
llvm::Pass *createCodeGenerationPass();
|
||||
llvm::Pass *createIslScheduleOptimizerPass();
|
||||
|
||||
extern char &IndependentBlocksID;
|
||||
extern char &CodePreparationID;
|
||||
}
|
||||
|
||||
|
@ -64,7 +62,6 @@ struct PollyForcePassLinking {
|
|||
polly::createDOTOnlyViewerPass();
|
||||
polly::createDOTPrinterPass();
|
||||
polly::createDOTViewerPass();
|
||||
polly::createIndependentBlocksPass();
|
||||
polly::createJSONExporterPass();
|
||||
polly::createJSONImporterPass();
|
||||
polly::createScopDetectionPass();
|
||||
|
@ -81,7 +78,6 @@ namespace llvm {
|
|||
class PassRegistry;
|
||||
void initializeCodePreparationPass(llvm::PassRegistry &);
|
||||
void initializeDeadCodeElimPass(llvm::PassRegistry &);
|
||||
void initializeIndependentBlocksPass(llvm::PassRegistry &);
|
||||
void initializeJSONExporterPass(llvm::PassRegistry &);
|
||||
void initializeJSONImporterPass(llvm::PassRegistry &);
|
||||
void initializeIslAstInfoPass(llvm::PassRegistry &);
|
||||
|
|
|
@ -648,10 +648,7 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
|
|||
// Check that the base address of the access is invariant in the current
|
||||
// region.
|
||||
if (!isInvariant(*BaseValue, CurRegion))
|
||||
// Verification of this property is difficult as the independent blocks
|
||||
// pass may introduce aliasing that we did not have when running the
|
||||
// scop detection.
|
||||
return invalid<ReportVariantBasePtr>(Context, /*Assert=*/false, BaseValue,
|
||||
return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BaseValue,
|
||||
&Inst);
|
||||
|
||||
AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer);
|
||||
|
@ -684,8 +681,7 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
|
|||
AccessFunction, &Inst, BaseValue);
|
||||
}
|
||||
|
||||
// FIXME: Alias Analysis thinks IntToPtrInst aliases with alloca instructions
|
||||
// created by IndependentBlocks Pass.
|
||||
// FIXME: Think about allowing IntToPtrInst
|
||||
if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BaseValue))
|
||||
return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
|
||||
|
||||
|
@ -699,13 +695,6 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
|
|||
AliasSet &AS = Context.AST.getAliasSetForPointer(
|
||||
BaseValue, MemoryLocation::UnknownSize, AATags);
|
||||
|
||||
// INVALID triggers an assertion in verifying mode, if it detects that a
|
||||
// SCoP was detected by SCoP detection and that this SCoP was invalidated by
|
||||
// a pass that stated it would preserve the SCoPs. We disable this check as
|
||||
// the independent blocks pass may create memory references which seem to
|
||||
// alias, if -basicaa is not available. They actually do not, but as we can
|
||||
// not proof this without -basicaa we would fail. We disable this check to
|
||||
// not cause irrelevant verification failures.
|
||||
if (!AS.isMustAlias()) {
|
||||
if (PollyUseRuntimeAliasChecks) {
|
||||
bool CanBuildRunTimeCheck = true;
|
||||
|
@ -731,7 +720,7 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst,
|
|||
if (CanBuildRunTimeCheck)
|
||||
return true;
|
||||
}
|
||||
return invalid<ReportAlias>(Context, /*Assert=*/false, &Inst, AS);
|
||||
return invalid<ReportAlias>(Context, /*Assert=*/true, &Inst, AS);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -3651,7 +3651,6 @@ ScopInfo::~ScopInfo() {
|
|||
}
|
||||
|
||||
void ScopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequiredID(IndependentBlocksID);
|
||||
AU.addRequired<LoopInfoWrapperPass>();
|
||||
AU.addRequired<RegionInfoPass>();
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
|
|
|
@ -49,7 +49,6 @@ add_polly_library(Polly
|
|||
Transform/Canonicalization.cpp
|
||||
Transform/CodePreparation.cpp
|
||||
Transform/DeadCodeElimination.cpp
|
||||
Transform/IndependentBlocks.cpp
|
||||
Transform/ScheduleOptimizer.cpp
|
||||
${POLLY_HEADER_FILES}
|
||||
)
|
||||
|
|
|
@ -192,7 +192,6 @@ public:
|
|||
// region tree.
|
||||
AU.addPreserved<RegionInfoPass>();
|
||||
AU.addPreserved<ScopInfo>();
|
||||
AU.addPreservedID(IndependentBlocksID);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -135,7 +135,6 @@ SOURCES= Polly.cpp \
|
|||
Transform/Canonicalization.cpp \
|
||||
Transform/CodePreparation.cpp \
|
||||
Transform/DeadCodeElimination.cpp \
|
||||
Transform/IndependentBlocks.cpp \
|
||||
Transform/ScheduleOptimizer.cpp \
|
||||
${GPGPU_FILES} \
|
||||
${ISL_CODEGEN_FILES} \
|
||||
|
|
|
@ -146,7 +146,6 @@ void initializePollyPasses(PassRegistry &Registry) {
|
|||
initializeCodePreparationPass(Registry);
|
||||
initializeDeadCodeElimPass(Registry);
|
||||
initializeDependenceInfoPass(Registry);
|
||||
initializeIndependentBlocksPass(Registry);
|
||||
initializeJSONExporterPass(Registry);
|
||||
initializeJSONImporterPass(Registry);
|
||||
initializeIslAstInfoPass(Registry);
|
||||
|
|
|
@ -1,373 +0,0 @@
|
|||
//===------ IndependentBlocks.cpp - Create Independent Blocks in Regions --===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Create independent blocks in the regions detected by ScopDetection.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
#include "polly/LinkAllPasses.h"
|
||||
#include "polly/Options.h"
|
||||
#include "polly/ScopDetection.h"
|
||||
#include "polly/Support/ScopHelper.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/BasicAliasAnalysis.h"
|
||||
#include "llvm/Analysis/DominanceFrontier.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/PostDominators.h"
|
||||
#include "llvm/Analysis/RegionInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include <vector>
|
||||
|
||||
using namespace polly;
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "polly-independent"
|
||||
|
||||
namespace {
|
||||
struct IndependentBlocks : public FunctionPass {
|
||||
RegionInfo *RI;
|
||||
ScalarEvolution *SE;
|
||||
ScopDetection *SD;
|
||||
LoopInfo *LI;
|
||||
|
||||
BasicBlock *AllocaBlock;
|
||||
|
||||
static char ID;
|
||||
|
||||
IndependentBlocks() : FunctionPass(ID) {}
|
||||
|
||||
// Create new code for every instruction operator that can be expressed by a
|
||||
// SCEV. Like this there are just two types of instructions left:
|
||||
//
|
||||
// 1. Instructions that only reference loop ivs or parameters outside the
|
||||
// region.
|
||||
//
|
||||
// 2. Instructions that are not used for any memory modification. (These
|
||||
// will be ignored later on.)
|
||||
//
|
||||
// Blocks containing only these kind of instructions are called independent
|
||||
// blocks as they can be scheduled arbitrarily.
|
||||
bool createIndependentBlocks(BasicBlock *BB, const Region *R);
|
||||
bool createIndependentBlocks(const Region *R);
|
||||
|
||||
// Elimination on the Scop to eliminate the scalar dependences come with
|
||||
// trivially dead instructions.
|
||||
bool eliminateDeadCode(const Region *R);
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
/// Non trivial scalar dependences checking functions.
|
||||
/// Non trivial scalar dependences occur when the def and use are located in
|
||||
/// different BBs and we can not move them into the same one. This will
|
||||
/// prevent use from schedule BBs arbitrarily.
|
||||
///
|
||||
/// @brief This function checks if a scalar value that is part of the
|
||||
/// Scop is used outside of the Scop.
|
||||
///
|
||||
/// @param Use The use of the instruction.
|
||||
/// @param R The maximum region in the Scop.
|
||||
///
|
||||
/// @return Return true if the Use of an instruction and the instruction
|
||||
/// itself form a non trivial scalar dependence.
|
||||
static bool isEscapeUse(const Value *Use, const Region *R);
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
/// Operand tree moving functions.
|
||||
/// Trivial scalar dependences can eliminate by move the def to the same BB
|
||||
/// that containing use.
|
||||
///
|
||||
/// @brief Check if the instruction can be moved to another place safely.
|
||||
///
|
||||
/// @param Inst The instruction.
|
||||
///
|
||||
/// @return Return true if the instruction can be moved safely, false
|
||||
/// otherwise.
|
||||
static bool isSafeToMove(Instruction *Inst);
|
||||
|
||||
typedef std::map<Instruction *, Instruction *> ReplacedMapType;
|
||||
|
||||
/// @brief Move all safe to move instructions in the Operand Tree (DAG) to
|
||||
/// eliminate trivial scalar dependences.
|
||||
///
|
||||
/// @param Inst The root of the operand Tree.
|
||||
/// @param R The maximum region in the Scop.
|
||||
/// @param ReplacedMap The map that mapping original instruction to the moved
|
||||
/// instruction.
|
||||
/// @param InsertPos The insert position of the moved instructions.
|
||||
void moveOperandTree(Instruction *Inst, const Region *R,
|
||||
ReplacedMapType &ReplacedMap, Instruction *InsertPos);
|
||||
|
||||
bool isIndependentBlock(const Region *R, BasicBlock *BB) const;
|
||||
bool areAllBlocksIndependent(const Region *R) const;
|
||||
|
||||
bool runOnFunction(Function &F);
|
||||
void verifyAnalysis() const;
|
||||
void verifyScop(const Region *R) const;
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const;
|
||||
};
|
||||
}
|
||||
|
||||
bool IndependentBlocks::isSafeToMove(Instruction *Inst) {
|
||||
if (Inst->mayReadFromMemory() || Inst->mayWriteToMemory())
|
||||
return false;
|
||||
|
||||
return isSafeToSpeculativelyExecute(Inst);
|
||||
}
|
||||
|
||||
void IndependentBlocks::moveOperandTree(Instruction *Inst, const Region *R,
|
||||
ReplacedMapType &ReplacedMap,
|
||||
Instruction *InsertPos) {
|
||||
BasicBlock *CurBB = Inst->getParent();
|
||||
|
||||
// Depth first traverse the operand tree (or operand dag, because we will
|
||||
// stop at PHINodes, so there are no cycle).
|
||||
typedef Instruction::op_iterator ChildIt;
|
||||
std::vector<std::pair<Instruction *, ChildIt>> WorkStack;
|
||||
|
||||
WorkStack.push_back(std::make_pair(Inst, Inst->op_begin()));
|
||||
DenseSet<Instruction *> VisitedSet;
|
||||
|
||||
while (!WorkStack.empty()) {
|
||||
Instruction *CurInst = WorkStack.back().first;
|
||||
ChildIt It = WorkStack.back().second;
|
||||
DEBUG(dbgs() << "Checking Operand of Node:\n" << *CurInst << "\n------>\n");
|
||||
if (It == CurInst->op_end()) {
|
||||
// Insert the new instructions in topological order.
|
||||
if (!CurInst->getParent()) {
|
||||
CurInst->insertBefore(InsertPos);
|
||||
SE->forgetValue(CurInst);
|
||||
}
|
||||
|
||||
WorkStack.pop_back();
|
||||
} else {
|
||||
// for each node N,
|
||||
Instruction *Operand = dyn_cast<Instruction>(*It);
|
||||
++WorkStack.back().second;
|
||||
|
||||
// Can not move no instruction value.
|
||||
if (Operand == 0)
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "For Operand:\n" << *Operand << "\n--->");
|
||||
|
||||
// If the Scop Region does not contain N, skip it and all its operands and
|
||||
// continue: because we reach a "parameter".
|
||||
// FIXME: we must keep the predicate instruction inside the Scop,
|
||||
// otherwise it will be translated to a load instruction, and we can not
|
||||
// handle load as affine predicate at this moment.
|
||||
if (!R->contains(Operand) && !isa<TerminatorInst>(CurInst)) {
|
||||
DEBUG(dbgs() << "Out of region.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (canSynthesize(Operand, LI, SE, R)) {
|
||||
DEBUG(dbgs() << "is IV.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
// We can not move the operand, a non trivial scalar dependence found!
|
||||
if (!isSafeToMove(Operand)) {
|
||||
DEBUG(dbgs() << "Can not move!\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Do not need to move instruction if it is contained in the same BB with
|
||||
// the root instruction.
|
||||
if (Operand->getParent() == CurBB) {
|
||||
DEBUG(dbgs() << "No need to move.\n");
|
||||
// Try to move its operand, but do not visit an instuction twice.
|
||||
if (VisitedSet.insert(Operand).second)
|
||||
WorkStack.push_back(std::make_pair(Operand, Operand->op_begin()));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Now we need to move Operand to CurBB.
|
||||
// Check if we already moved it.
|
||||
ReplacedMapType::iterator At = ReplacedMap.find(Operand);
|
||||
if (At != ReplacedMap.end()) {
|
||||
DEBUG(dbgs() << "Moved.\n");
|
||||
Instruction *MovedOp = At->second;
|
||||
It->set(MovedOp);
|
||||
SE->forgetValue(MovedOp);
|
||||
} else {
|
||||
// Note that NewOp is not inserted in any BB now, we will insert it when
|
||||
// it popped form the work stack, so it will be inserted in topological
|
||||
// order.
|
||||
Instruction *NewOp = Operand->clone();
|
||||
NewOp->setName(Operand->getName() + ".moved.to." + CurBB->getName());
|
||||
DEBUG(dbgs() << "Move to " << *NewOp << "\n");
|
||||
It->set(NewOp);
|
||||
ReplacedMap.insert(std::make_pair(Operand, NewOp));
|
||||
SE->forgetValue(Operand);
|
||||
|
||||
// Process its operands, but do not visit an instuction twice.
|
||||
if (VisitedSet.insert(NewOp).second)
|
||||
WorkStack.push_back(std::make_pair(NewOp, NewOp->op_begin()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SE->forgetValue(Inst);
|
||||
}
|
||||
|
||||
bool IndependentBlocks::createIndependentBlocks(BasicBlock *BB,
|
||||
const Region *R) {
|
||||
std::vector<Instruction *> WorkList;
|
||||
for (Instruction &Inst : *BB)
|
||||
if (!isSafeToMove(&Inst) && !canSynthesize(&Inst, LI, SE, R))
|
||||
WorkList.push_back(&Inst);
|
||||
|
||||
ReplacedMapType ReplacedMap;
|
||||
Instruction *InsertPos = BB->getFirstNonPHIOrDbg();
|
||||
|
||||
for (Instruction *Inst : WorkList)
|
||||
if (!isa<PHINode>(Inst))
|
||||
moveOperandTree(Inst, R, ReplacedMap, InsertPos);
|
||||
|
||||
// The BB was changed if we replaced any operand.
|
||||
return !ReplacedMap.empty();
|
||||
}
|
||||
|
||||
bool IndependentBlocks::createIndependentBlocks(const Region *R) {
|
||||
bool Changed = false;
|
||||
|
||||
for (BasicBlock *BB : R->blocks())
|
||||
Changed |= createIndependentBlocks(BB, R);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool IndependentBlocks::eliminateDeadCode(const Region *R) {
|
||||
std::vector<Instruction *> WorkList;
|
||||
|
||||
// Find all trivially dead instructions.
|
||||
for (BasicBlock *BB : R->blocks())
|
||||
for (Instruction &Inst : *BB)
|
||||
if (!isIgnoredIntrinsic(&Inst) && isInstructionTriviallyDead(&Inst))
|
||||
WorkList.push_back(&Inst);
|
||||
|
||||
if (WorkList.empty())
|
||||
return false;
|
||||
|
||||
// Delete them so the cross BB scalar dependences come with them will
|
||||
// also be eliminated.
|
||||
while (!WorkList.empty()) {
|
||||
RecursivelyDeleteTriviallyDeadInstructions(WorkList.back());
|
||||
WorkList.pop_back();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IndependentBlocks::isEscapeUse(const Value *Use, const Region *R) {
|
||||
// Non-instruction user will never escape.
|
||||
if (!isa<Instruction>(Use))
|
||||
return false;
|
||||
|
||||
return !R->contains(cast<Instruction>(Use));
|
||||
}
|
||||
|
||||
bool IndependentBlocks::isIndependentBlock(const Region *R,
|
||||
BasicBlock *BB) const {
|
||||
for (Instruction &Inst : *BB) {
|
||||
if (canSynthesize(&Inst, LI, SE, R))
|
||||
continue;
|
||||
if (isIgnoredIntrinsic(&Inst))
|
||||
continue;
|
||||
|
||||
// A value inside the Scop is referenced outside.
|
||||
for (User *U : Inst.users()) {
|
||||
if (isEscapeUse(U, R)) {
|
||||
DEBUG(dbgs() << "Instruction not independent:\n");
|
||||
DEBUG(dbgs() << "Instruction used outside the Scop!\n");
|
||||
DEBUG(Inst.print(dbgs()));
|
||||
DEBUG(dbgs() << "\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IndependentBlocks::areAllBlocksIndependent(const Region *R) const {
|
||||
for (BasicBlock *BB : R->blocks())
|
||||
if (!isIndependentBlock(R, BB))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void IndependentBlocks::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
// FIXME: If we set preserves cfg, the cfg only passes do not need to
|
||||
// be "addPreserved"?
|
||||
AU.addPreserved<AAResultsWrapperPass>();
|
||||
AU.addPreserved<BasicAAWrapperPass>();
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
AU.addPreserved<DominanceFrontier>();
|
||||
AU.addPreserved<GlobalsAAWrapperPass>();
|
||||
AU.addPreserved<PostDominatorTree>();
|
||||
AU.addRequired<RegionInfoPass>();
|
||||
AU.addPreserved<RegionInfoPass>();
|
||||
AU.addRequired<LoopInfoWrapperPass>();
|
||||
AU.addPreserved<LoopInfoWrapperPass>();
|
||||
AU.addRequired<ScalarEvolutionWrapperPass>();
|
||||
AU.addPreserved<ScalarEvolutionWrapperPass>();
|
||||
AU.addPreserved<SCEVAAWrapperPass>();
|
||||
AU.addRequired<ScopDetection>();
|
||||
AU.addPreserved<ScopDetection>();
|
||||
}
|
||||
|
||||
bool IndependentBlocks::runOnFunction(llvm::Function &F) {
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
|
||||
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
SD = &getAnalysis<ScopDetection>();
|
||||
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
|
||||
|
||||
AllocaBlock = &F.getEntryBlock();
|
||||
|
||||
DEBUG(dbgs() << "Run IndepBlock on " << F.getName() << '\n');
|
||||
|
||||
for (const Region *R : *SD) {
|
||||
Changed |= createIndependentBlocks(R);
|
||||
Changed |= eliminateDeadCode(R);
|
||||
}
|
||||
|
||||
verifyAnalysis();
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
void IndependentBlocks::verifyAnalysis() const {}
|
||||
|
||||
void IndependentBlocks::verifyScop(const Region *R) const {
|
||||
assert(areAllBlocksIndependent(R) && "Cannot generate independent blocks");
|
||||
}
|
||||
|
||||
char IndependentBlocks::ID = 0;
|
||||
char &polly::IndependentBlocksID = IndependentBlocks::ID;
|
||||
|
||||
Pass *polly::createIndependentBlocksPass() { return new IndependentBlocks(); }
|
||||
|
||||
INITIALIZE_PASS_BEGIN(IndependentBlocks, "polly-independent",
|
||||
"Polly - Create independent blocks", false, false);
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
|
||||
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
|
||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
|
||||
INITIALIZE_PASS_DEPENDENCY(ScopDetection);
|
||||
INITIALIZE_PASS_END(IndependentBlocks, "polly-independent",
|
||||
"Polly - Create independent blocks", false, false)
|
|
@ -1,48 +0,0 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS
|
||||
|
||||
; void f(long A[], int N, int *init_ptr) {
|
||||
; long i, j;
|
||||
;
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; init = *init_ptr;
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; A[i] = init + 2;
|
||||
; }
|
||||
; }
|
||||
; }
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
|
||||
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
|
||||
entry:
|
||||
|
||||
; SCALARACCESS-NOT: alloca
|
||||
br label %for.i
|
||||
|
||||
for.i:
|
||||
%indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
|
||||
%indvar.i.next = add nsw i64 %indvar.i, 1
|
||||
br label %entry.next
|
||||
|
||||
entry.next:
|
||||
%init = load i64, i64* %init_ptr
|
||||
; SCALARACCESS-NOT: store
|
||||
br label %for.j
|
||||
|
||||
for.j:
|
||||
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
|
||||
%init_plus_two = add i64 %init, 2
|
||||
; SCALARACCESS: %init_plus_two = add i64 %init, 2
|
||||
%scevgep = getelementptr i64, i64* %A, i64 %indvar.j
|
||||
store i64 %init_plus_two, i64* %scevgep
|
||||
%indvar.j.next = add nsw i64 %indvar.j, 1
|
||||
%exitcond.j = icmp eq i64 %indvar.j.next, %N
|
||||
br i1 %exitcond.j, label %for.i.end, label %for.j
|
||||
|
||||
for.i.end:
|
||||
%exitcond.i = icmp eq i64 %indvar.i.next, %N
|
||||
br i1 %exitcond.i, label %return, label %for.i
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS
|
||||
|
||||
; void f(long A[], int N, int *init_ptr) {
|
||||
; long i, j;
|
||||
;
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; init = *init_ptr;
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; init2 = *init_ptr;
|
||||
; A[i] = init + init2;
|
||||
; }
|
||||
; }
|
||||
; }
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
|
||||
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
|
||||
entry:
|
||||
|
||||
; SCALARACCESS-NOT: alloca
|
||||
br label %for.i
|
||||
|
||||
for.i:
|
||||
%indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
|
||||
%indvar.i.next = add nsw i64 %indvar.i, 1
|
||||
br label %entry.next
|
||||
|
||||
entry.next:
|
||||
%init = load i64, i64* %init_ptr
|
||||
; SCALARACCESS-NOT: store
|
||||
br label %for.j
|
||||
|
||||
for.j:
|
||||
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
|
||||
%init_2 = load i64, i64* %init_ptr
|
||||
%init_sum = add i64 %init, %init_2
|
||||
|
||||
; The SCEV of %init_sum is (%init + %init_2). It is referring to both an
|
||||
; UnknownValue in the same and in a different basic block. We want only the
|
||||
; reference to the different basic block to be replaced.
|
||||
|
||||
; SCALARACCESS: %init_2 = load i64, i64* %init_ptr
|
||||
; SCALARACCESS: %init_sum = add i64 %init, %init_2
|
||||
%scevgep = getelementptr i64, i64* %A, i64 %indvar.j
|
||||
store i64 %init_sum, i64* %scevgep
|
||||
%indvar.j.next = add nsw i64 %indvar.j, 1
|
||||
%exitcond.j = icmp eq i64 %indvar.j.next, %N
|
||||
br i1 %exitcond.j, label %for.i.end, label %for.j
|
||||
|
||||
for.i.end:
|
||||
%exitcond.i = icmp eq i64 %indvar.i.next, %N
|
||||
br i1 %exitcond.i, label %return, label %for.i
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s
|
||||
|
||||
; void f(long A[], int N, int *init_ptr) {
|
||||
; long i, j;
|
||||
;
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; for (i = 0; i < N; ++i) {
|
||||
; init = *init_ptr;
|
||||
; A[i] = init + 2;
|
||||
; }
|
||||
; }
|
||||
; }
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
|
||||
define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
|
||||
entry:
|
||||
|
||||
; CHECK: entry
|
||||
; CHECK: br label %for.i
|
||||
br label %for.i
|
||||
|
||||
for.i:
|
||||
%indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
|
||||
%indvar.i.next = add nsw i64 %indvar.i, 1
|
||||
br label %entry.next
|
||||
|
||||
entry.next:
|
||||
br label %for.j
|
||||
|
||||
for.j:
|
||||
%indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
|
||||
%init = load i64, i64* %init_ptr
|
||||
%init_plus_two = add i64 %init, 2
|
||||
; The scalar evolution of %init_plus_two is (2 + %init). So we have a
|
||||
; non-trivial scalar evolution referring to a value in the same basic block.
|
||||
; We want to ensure that this scalar is not translated into a memory copy.
|
||||
%scevgep = getelementptr i64, i64* %A, i64 %indvar.j
|
||||
store i64 %init_plus_two, i64* %scevgep
|
||||
%indvar.j.next = add nsw i64 %indvar.j, 1
|
||||
%exitcond.j = icmp eq i64 %indvar.j.next, %N
|
||||
br i1 %exitcond.j, label %for.i.end, label %for.j
|
||||
|
||||
for.i.end:
|
||||
%exitcond.i = icmp eq i64 %indvar.i.next, %N
|
||||
br i1 %exitcond.i, label %return, label %for.i
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALAR
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
define void @phi_nodes_outside() {
|
||||
entry:
|
||||
br label %for.i.1
|
||||
|
||||
for.i.1:
|
||||
%i.1 = phi i32 [ %i.1.next, %for.i.1 ], [ 0, %entry ]
|
||||
%i.1.next = add nsw i32 %i.1, 1
|
||||
br i1 false, label %for.i.1 , label %for.i.2.preheader
|
||||
|
||||
for.i.2.preheader:
|
||||
br label %for.i.2
|
||||
|
||||
for.i.2:
|
||||
; The value of %i.1.next is used outside of the scop in a PHI node.
|
||||
%i.2 = phi i32 [ %i.2.next , %for.i.2 ], [ %i.1.next, %for.i.2.preheader ]
|
||||
%i.2.next = add nsw i32 %i.2, 1
|
||||
fence seq_cst
|
||||
br i1 false, label %for.i.2, label %cleanup
|
||||
|
||||
cleanup:
|
||||
ret void
|
||||
}
|
||||
|
||||
; SCALAR-NOT: alloca
|
||||
|
||||
; SCALAR: for.i.2.preheader:
|
||||
; SCALAR-NOT: load
|
||||
|
||||
; SCALAR: for.i.2:
|
||||
; SCALAR: %i.2 = phi i32 [ %i.2.next, %for.i.2 ], [ %i.1.next, %for.i.2.preheader ]
|
|
@ -1,222 +0,0 @@
|
|||
; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS
|
||||
; RAUN: opt %loadPolly -basicaa -polly-independent < %s -S | FileCheck %s -check-prefix=SCALARACCESS
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
|
||||
@A = common global [1024 x float] zeroinitializer, align 8
|
||||
|
||||
define i32 @empty() nounwind {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
|
||||
%exitcond = icmp ne i64 %indvar, 1024
|
||||
br i1 %exitcond, label %for.body, label %return
|
||||
|
||||
for.body:
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %for.cond
|
||||
|
||||
return:
|
||||
fence seq_cst
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
|
||||
; SCALARACCESS-LABEL: @array_access()
|
||||
define i32 @array_access() nounwind {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %for.cond
|
||||
; SCALARACCESS: entry:
|
||||
; SCALARACCESS-NOT: alloca
|
||||
|
||||
for.cond:
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
|
||||
%exitcond = icmp ne i64 %indvar, 1024
|
||||
br i1 %exitcond, label %for.body, label %return
|
||||
|
||||
for.body:
|
||||
%arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
|
||||
%float = uitofp i64 %indvar to float
|
||||
store float %float, float* %arrayidx
|
||||
br label %for.inc
|
||||
|
||||
; SCALARACCESS: for.body:
|
||||
; SCALARACCESS: %float = uitofp i64 %indvar to float
|
||||
; SCALARACCESS: store float %float, float* %arrayidx
|
||||
|
||||
for.inc:
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %for.cond
|
||||
|
||||
return:
|
||||
fence seq_cst
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; SCALARACCESS-LABEL: @intra_scop_dep()
|
||||
define i32 @intra_scop_dep() nounwind {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %for.cond
|
||||
|
||||
; SCALARACCESS: entry:
|
||||
; SCALARACCESS-NOT: alloca
|
||||
; SCALARACCESS: fence
|
||||
|
||||
for.cond:
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
|
||||
%exitcond = icmp ne i64 %indvar, 1024
|
||||
br i1 %exitcond, label %for.body.a, label %return
|
||||
|
||||
for.body.a:
|
||||
%arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
|
||||
%scalar = load float, float* %arrayidx
|
||||
br label %for.body.b
|
||||
|
||||
; SCALARACCESS: for.body.a:
|
||||
; SCALARACCESS: %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
|
||||
; SCALARACCESS: %scalar = load float, float* %arrayidx
|
||||
; SCALARACCESS-NOT: store
|
||||
; SCALARACCESS: br label %for.body.b
|
||||
|
||||
for.body.b:
|
||||
%arrayidx2 = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
|
||||
%float = uitofp i64 %indvar to float
|
||||
%sum = fadd float %scalar, %float
|
||||
store float %sum, float* %arrayidx2
|
||||
br label %for.inc
|
||||
|
||||
; SCALARACCESS: for.body.b:
|
||||
; SCALARACCESS: %arrayidx2 = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
|
||||
; SCALARACCESS: %float = uitofp i64 %indvar to float
|
||||
; SCALARACCESS-NOT: load
|
||||
; SCALARACCESS: %sum = fadd float %scalar, %float
|
||||
; SCALARACCESS: store float %sum, float* %arrayidx2
|
||||
; SCALARACCESS: br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %for.cond
|
||||
|
||||
return:
|
||||
fence seq_cst
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; It is not possible to have a scop which accesses a scalar element that is
|
||||
; a global variable. All global variables are pointers containing possibly
|
||||
; a single element.
|
||||
|
||||
; SCALARACCESS-LABEL: @use_after_scop()
|
||||
define i32 @use_after_scop() nounwind {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %for.head
|
||||
|
||||
; SCALARACCESS: entry:
|
||||
; SCALARACCESS-NOT: alloca
|
||||
; SCALARACCESS: fence
|
||||
|
||||
for.head:
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
|
||||
%scalar = load float, float* %arrayidx
|
||||
br label %for.inc
|
||||
|
||||
; SCALARACCESS: for.body:
|
||||
; SCALARACCESS: %scalar = load float, float* %arrayidx
|
||||
; SCALARACCESS-NOT: store float %scalar
|
||||
|
||||
for.inc:
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
%exitcond = icmp ne i64 %indvar, 1024
|
||||
br i1 %exitcond, label %for.head, label %for.after
|
||||
|
||||
for.after:
|
||||
fence seq_cst
|
||||
%return_value = fptosi float %scalar to i32
|
||||
br label %return
|
||||
|
||||
; SCALARACCESS: for.after:
|
||||
; SCALARACCESS: fence seq_cst
|
||||
; SCALARACCESS: %return_value = fptosi float %scalar to i32
|
||||
|
||||
return:
|
||||
ret i32 %return_value
|
||||
}
|
||||
|
||||
; We currently do not transform scalar references, that have only read accesses
|
||||
; in the scop. There are two reasons for this:
|
||||
;
|
||||
; o We don't introduce additional memory references which may yield to compile
|
||||
; time overhead.
|
||||
; o For integer values, such a translation may block the use of scalar
|
||||
; evolution on those values.
|
||||
;
|
||||
; SCALARACCESS-LABEL: @before_scop()
|
||||
define i32 @before_scop() nounwind {
|
||||
entry:
|
||||
br label %preheader
|
||||
|
||||
preheader:
|
||||
%scalar = fadd float 4.0, 5.0
|
||||
fence seq_cst
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %preheader ]
|
||||
%exitcond = icmp ne i64 %indvar, 1024
|
||||
br i1 %exitcond, label %for.body, label %return
|
||||
|
||||
for.body:
|
||||
%arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
|
||||
store float %scalar, float* %arrayidx
|
||||
br label %for.inc
|
||||
|
||||
; SCALARACCESS: for.body:
|
||||
; SCALARACCESS: store float %scalar, float* %arrayidx
|
||||
|
||||
for.inc:
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %for.cond
|
||||
|
||||
return:
|
||||
fence seq_cst
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Currently not working
|
||||
; SCALARACCESS-LABEL: @param_before_scop(
|
||||
define i32 @param_before_scop(float %scalar) nounwind {
|
||||
entry:
|
||||
fence seq_cst
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
|
||||
%exitcond = icmp ne i64 %indvar, 1024
|
||||
br i1 %exitcond, label %for.body, label %return
|
||||
|
||||
for.body:
|
||||
%arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
|
||||
store float %scalar, float* %arrayidx
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %for.cond
|
||||
|
||||
return:
|
||||
fence seq_cst
|
||||
ret i32 0
|
||||
}
|
|
@ -6,9 +6,13 @@
|
|||
; A[n % 42] += 1;
|
||||
; }
|
||||
;
|
||||
; CHECK: polly.stmt.bb3:
|
||||
; CHECK: %p_tmp.moved.to.bb3 = srem i64 %n, 42
|
||||
; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %p_tmp.moved.to.bb3
|
||||
; CHECK: polly.stmt.bb2:
|
||||
; CHECK-NEXT: %p_tmp = srem i64 %n, 42
|
||||
; CHECK-NEXT: store i64 %p_tmp, i64* %tmp.s2a
|
||||
;
|
||||
; CHECK: polly.stmt.bb3:
|
||||
; CHECK: %tmp.s2a.reload = load i64, i64* %tmp.s2a
|
||||
; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %tmp.s2a.reload
|
||||
|
||||
define void @pos(float* %A, i64 %n) {
|
||||
bb:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; This should be run without alias analysis enabled.
|
||||
;RUN: opt %loadPolly -polly-independent < %s
|
||||
;RUN: opt %loadPolly -polly-scops < %s
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
|
||||
|
||||
define i32 @main() nounwind {
|
|
@ -1,199 +0,0 @@
|
|||
; RUN: opt %loadPolly -polly-independent < %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
define i32 @main() nounwind uwtable readnone {
|
||||
%arr = alloca [100 x i32], align 16
|
||||
br label %1
|
||||
|
||||
; <label>:1 ; preds = %1, %0
|
||||
%indvars.iv3 = phi i64 [ 0, %0 ], [ %indvars.iv.next4, %1 ]
|
||||
%2 = getelementptr inbounds [100 x i32], [100 x i32]* %arr, i64 0, i64 %indvars.iv3
|
||||
%3 = trunc i64 %indvars.iv3 to i32
|
||||
store i32 %3, i32* %2, align 4, !tbaa !0
|
||||
%indvars.iv.next4 = add i64 %indvars.iv3, 1
|
||||
%lftr.wideiv5 = trunc i64 %indvars.iv.next4 to i32
|
||||
%exitcond6 = icmp eq i32 %lftr.wideiv5, 100
|
||||
br i1 %exitcond6, label %.preheader, label %1
|
||||
|
||||
.preheader: ; preds = %.preheader, %1
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %.preheader ], [ 0, %1 ]
|
||||
%4 = getelementptr inbounds [100 x i32], [100 x i32]* %arr, i64 0, i64 %indvars.iv
|
||||
%5 = load i32, i32* %4, align 4, !tbaa !0
|
||||
%6 = xor i32 %5, -1
|
||||
%7 = shl i32 %5, 15
|
||||
%8 = add nsw i32 %7, %6
|
||||
%9 = ashr i32 %8, 12
|
||||
%10 = xor i32 %9, %8
|
||||
%11 = mul i32 %10, 9
|
||||
%12 = ashr i32 %11, 4
|
||||
%13 = xor i32 %12, %11
|
||||
%14 = mul nsw i32 %13, 20571
|
||||
%15 = ashr i32 %14, 16
|
||||
%16 = xor i32 %15, %14
|
||||
%17 = xor i32 %16, -1
|
||||
%18 = shl i32 %16, 15
|
||||
%19 = add nsw i32 %18, %17
|
||||
%20 = ashr i32 %19, 12
|
||||
%21 = xor i32 %20, %19
|
||||
%22 = mul i32 %21, 5
|
||||
%23 = ashr i32 %22, 4
|
||||
%24 = xor i32 %23, %22
|
||||
%25 = mul nsw i32 %24, 20576
|
||||
%26 = ashr i32 %25, 16
|
||||
%27 = xor i32 %26, %25
|
||||
%28 = xor i32 %27, -1
|
||||
%29 = shl i32 %27, 15
|
||||
%30 = add nsw i32 %29, %28
|
||||
%31 = ashr i32 %30, 12
|
||||
%32 = xor i32 %31, %30
|
||||
%33 = mul i32 %32, 5
|
||||
%34 = ashr i32 %33, 4
|
||||
%35 = xor i32 %34, %33
|
||||
%36 = mul nsw i32 %35, 2057
|
||||
%37 = ashr i32 %36, 16
|
||||
%38 = xor i32 %37, %36
|
||||
%39 = xor i32 %38, -1
|
||||
%40 = shl i32 %38, 15
|
||||
%41 = add nsw i32 %40, %39
|
||||
%42 = ashr i32 %41, 12
|
||||
%43 = xor i32 %42, %41
|
||||
%44 = mul i32 %43, 5
|
||||
%45 = ashr i32 %44, 4
|
||||
%46 = xor i32 %45, %44
|
||||
%47 = mul nsw i32 %46, 20572
|
||||
%48 = ashr i32 %47, 16
|
||||
%49 = xor i32 %48, %47
|
||||
%50 = xor i32 %49, -1
|
||||
%51 = shl i32 %49, 15
|
||||
%52 = add nsw i32 %51, %50
|
||||
%53 = ashr i32 %52, 12
|
||||
%54 = xor i32 %53, %52
|
||||
%55 = mul i32 %54, 5
|
||||
%56 = ashr i32 %55, 4
|
||||
%57 = xor i32 %56, %55
|
||||
%58 = mul nsw i32 %57, 2051
|
||||
%59 = ashr i32 %58, 16
|
||||
%60 = xor i32 %59, %58
|
||||
%61 = xor i32 %60, -1
|
||||
%62 = shl i32 %60, 15
|
||||
%63 = add nsw i32 %62, %61
|
||||
%64 = ashr i32 %63, 12
|
||||
%65 = xor i32 %64, %63
|
||||
%66 = mul i32 %65, 5
|
||||
%67 = ashr i32 %66, 4
|
||||
%68 = xor i32 %67, %66
|
||||
%69 = mul nsw i32 %68, 2057
|
||||
%70 = ashr i32 %69, 16
|
||||
%71 = xor i32 %70, %69
|
||||
%72 = xor i32 %71, -1
|
||||
%73 = shl i32 %71, 15
|
||||
%74 = add nsw i32 %73, %72
|
||||
%75 = ashr i32 %74, 12
|
||||
%76 = xor i32 %75, %74
|
||||
%77 = mul i32 %76, 5
|
||||
%78 = ashr i32 %77, 4
|
||||
%79 = xor i32 %78, %77
|
||||
%80 = mul nsw i32 %79, 205
|
||||
%81 = ashr i32 %80, 17
|
||||
%82 = xor i32 %81, %80
|
||||
%83 = xor i32 %82, -1
|
||||
%84 = shl i32 %82, 15
|
||||
%85 = add nsw i32 %84, %83
|
||||
%86 = ashr i32 %85, 12
|
||||
%87 = xor i32 %86, %85
|
||||
%88 = mul i32 %87, 5
|
||||
%89 = ashr i32 %88, 4
|
||||
%90 = xor i32 %89, %88
|
||||
%91 = mul nsw i32 %90, 2057
|
||||
%92 = ashr i32 %91, 16
|
||||
%93 = xor i32 %92, %91
|
||||
%94 = xor i32 %93, -1
|
||||
%95 = shl i32 %93, 15
|
||||
%96 = add nsw i32 %95, %94
|
||||
%97 = ashr i32 %96, 12
|
||||
%98 = xor i32 %97, %96
|
||||
%99 = mul i32 %98, 5
|
||||
%100 = ashr i32 %99, 3
|
||||
%101 = xor i32 %100, %99
|
||||
%102 = mul nsw i32 %101, 20571
|
||||
%103 = ashr i32 %102, 16
|
||||
%104 = xor i32 %103, %102
|
||||
%105 = xor i32 %104, -1
|
||||
%106 = shl i32 %104, 15
|
||||
%107 = add nsw i32 %106, %105
|
||||
%108 = ashr i32 %107, 12
|
||||
%109 = xor i32 %108, %107
|
||||
%110 = mul i32 %109, 5
|
||||
%111 = ashr i32 %110, 4
|
||||
%112 = xor i32 %111, %110
|
||||
%113 = mul nsw i32 %112, 2057
|
||||
%114 = ashr i32 %113, 16
|
||||
%115 = xor i32 %114, %113
|
||||
%116 = xor i32 %115, -1
|
||||
%117 = shl i32 %115, 15
|
||||
%118 = add nsw i32 %117, %116
|
||||
%119 = ashr i32 %118, 12
|
||||
%120 = xor i32 %119, %118
|
||||
%121 = mul i32 %120, 5
|
||||
%122 = ashr i32 %121, 4
|
||||
%123 = xor i32 %122, %121
|
||||
%124 = mul nsw i32 %123, 20572
|
||||
%125 = ashr i32 %124, 16
|
||||
%126 = xor i32 %125, %124
|
||||
%127 = xor i32 %126, -1
|
||||
%128 = shl i32 %126, 15
|
||||
%129 = add nsw i32 %128, %127
|
||||
%130 = ashr i32 %129, 12
|
||||
%131 = xor i32 %130, %129
|
||||
%132 = mul i32 %131, 5
|
||||
%133 = ashr i32 %132, 4
|
||||
%134 = xor i32 %133, %132
|
||||
%135 = mul nsw i32 %134, 2057
|
||||
%136 = ashr i32 %135, 16
|
||||
%137 = xor i32 %136, %135
|
||||
%138 = xor i32 %137, -1
|
||||
%139 = shl i32 %137, 15
|
||||
%140 = add nsw i32 %139, %138
|
||||
%141 = ashr i32 %140, 12
|
||||
%142 = xor i32 %141, %140
|
||||
%143 = mul i32 %142, 5
|
||||
%144 = ashr i32 %143, 4
|
||||
%145 = xor i32 %144, %143
|
||||
%146 = mul nsw i32 %145, 2057
|
||||
%147 = ashr i32 %146, 16
|
||||
%148 = xor i32 %147, %146
|
||||
%149 = xor i32 %148, -1
|
||||
%150 = shl i32 %148, 15
|
||||
%151 = add nsw i32 %150, %149
|
||||
%152 = ashr i32 %151, 12
|
||||
%153 = xor i32 %152, %151
|
||||
%154 = mul i32 %153, 5
|
||||
%155 = ashr i32 %154, 4
|
||||
%156 = xor i32 %155, %154
|
||||
%157 = mul nsw i32 %156, 2057
|
||||
%158 = ashr i32 %157, 16
|
||||
%159 = xor i32 %158, %157
|
||||
%160 = xor i32 %159, -1
|
||||
%161 = shl i32 %159, 15
|
||||
%162 = add nsw i32 %161, %160
|
||||
%163 = ashr i32 %162, 12
|
||||
%164 = xor i32 %163, %162
|
||||
%165 = mul i32 %164, 5
|
||||
%166 = ashr i32 %165, 4
|
||||
%167 = xor i32 %166, %165
|
||||
%168 = mul nsw i32 %167, 2057
|
||||
%169 = ashr i32 %168, 16
|
||||
%170 = xor i32 %169, %168
|
||||
store i32 %170, i32* %4, align 4, !tbaa !0
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 100
|
||||
br i1 %exitcond, label %171, label %.preheader
|
||||
|
||||
; <label>:171 ; preds = %.preheader
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
!0 = !{!"int", !1}
|
||||
!1 = !{!"omnipotent char", !2}
|
||||
!2 = !{!"Simple C/C++ TBAA", null}
|
|
@ -6,11 +6,13 @@
|
|||
; A[i]++;
|
||||
; }
|
||||
;
|
||||
; FIXME: This test is a negative test until we have an independent blocks alternative.
|
||||
;
|
||||
; We should move operands as close to their use as possible, hence in this case
|
||||
; there should not be any scalar dependence anymore after %cmp1 is moved to
|
||||
; %for.body (%c and %indvar.iv are synthesis able).
|
||||
;
|
||||
; CHECK-NOT: [Scalar: 1]
|
||||
; CHECK: [Scalar: 1]
|
||||
;
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
; RUN: opt %loadPolly -polly-independent < %s
|
||||
; RUN: opt %loadPolly -polly-scops < %s
|
||||
;
|
||||
; CHECK: Region: %if.then6---%return
|
||||
;
|
||||
target datalayout ="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
define void @arc_either() {
|
Loading…
Reference in New Issue