forked from OSchip/llvm-project
[LoopVersioning] Expose loop versioning as a pass too
Summary: LoopVersioning is a transform utility that transform passes can use to run-time disambiguate may-aliasing accesses. I'd like to also expose as pass to allow it to be unit-tested. I am planning to add support for non-aliasing annotation in LoopVersioning and I'd like to be able to write tests directly using this pass. (After that feature is done, the pass could also be used to look for optimization opportunities that are hidden behind incomplete alias information at compile time.) The pass drives LoopVersioning in its default way which is to fully disambiguate may-aliasing accesses no matter how many checks are required. Reviewers: hfinkel, ashutosh.nema, sbaranga Subscribers: zzheng, mssimpso, llvm-commits, sanjoy Differential Revision: http://reviews.llvm.org/D16612 llvm-svn: 259610
This commit is contained in:
parent
59b982e1be
commit
d52ed84160
|
@ -314,6 +314,7 @@ void initializeDemandedBitsPass(PassRegistry&);
|
|||
void initializeFuncletLayoutPass(PassRegistry &);
|
||||
void initializeLoopLoadEliminationPass(PassRegistry&);
|
||||
void initializeFunctionImportPassPass(PassRegistry &);
|
||||
void initializeLoopVersioningPassPass(PassRegistry &);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -483,6 +483,12 @@ FunctionPass *createLoopLoadEliminationPass();
|
|||
//
|
||||
Pass *createLoopSimplifyCFGPass();
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// LoopVersioning - Perform loop multi-versioning.
|
||||
//
|
||||
FunctionPass *createLoopVersioningPass();
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
|
|
@ -85,6 +85,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
|
|||
initializeLoopDistributePass(Registry);
|
||||
initializeLoopLoadEliminationPass(Registry);
|
||||
initializeLoopSimplifyCFGPass(Registry);
|
||||
initializeLoopVersioningPassPass(Registry);
|
||||
}
|
||||
|
||||
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
|
||||
|
|
|
@ -145,3 +145,77 @@ void LoopVersioning::addPHINodes(
|
|||
PN->addIncoming(NonVersionedLoopInst, NonVersionedLoop->getExitingBlock());
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// \brief Also expose this is a pass. Currently this is only used for
|
||||
/// unit-testing. It adds all memchecks necessary to remove all may-aliasing
|
||||
/// array accesses from the loop.
|
||||
class LoopVersioningPass : public FunctionPass {
|
||||
public:
|
||||
LoopVersioningPass() : FunctionPass(ID) {
|
||||
initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnFunction(Function &F) override {
|
||||
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
auto *LAA = &getAnalysis<LoopAccessAnalysis>();
|
||||
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
|
||||
|
||||
// Build up a worklist of inner-loops to version. This is necessary as the
|
||||
// act of versioning a loop creates new loops and can invalidate iterators
|
||||
// across the loops.
|
||||
SmallVector<Loop *, 8> Worklist;
|
||||
|
||||
for (Loop *TopLevelLoop : *LI)
|
||||
for (Loop *L : depth_first(TopLevelLoop))
|
||||
// We only handle inner-most loops.
|
||||
if (L->empty())
|
||||
Worklist.push_back(L);
|
||||
|
||||
// Now walk the identified inner loops.
|
||||
bool Changed = false;
|
||||
for (Loop *L : Worklist) {
|
||||
const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap());
|
||||
if (LAI.getNumRuntimePointerChecks() ||
|
||||
!LAI.PSE.getUnionPredicate().isAlwaysTrue()) {
|
||||
LoopVersioning LVer(LAI, L, LI, DT, SE);
|
||||
LVer.versionLoop();
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<LoopInfoWrapperPass>();
|
||||
AU.addPreserved<LoopInfoWrapperPass>();
|
||||
AU.addRequired<LoopAccessAnalysis>();
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
AU.addRequired<ScalarEvolutionWrapperPass>();
|
||||
}
|
||||
|
||||
static char ID;
|
||||
};
|
||||
}
|
||||
|
||||
#define LVER_OPTION "loop-versioning"
|
||||
#define DEBUG_TYPE LVER_OPTION
|
||||
|
||||
char LoopVersioningPass::ID;
|
||||
static const char LVer_name[] = "Loop Versioning";
|
||||
|
||||
INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
|
||||
INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false)
|
||||
|
||||
namespace llvm {
|
||||
FunctionPass *createLoopVersioningPass() {
|
||||
return new LoopVersioningPass();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; Version this loop with overlap checks between a, c and b, c.
|
||||
|
||||
define void @f(i32* %a, i32* %b, i32* %c) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
; CHECK: for.body.lver.check:
|
||||
; CHECK: icmp
|
||||
; CHECK: icmp
|
||||
; CHECK: icmp
|
||||
; CHECK: icmp
|
||||
; CHECK-NOT: icmp
|
||||
; CHECK: br i1 %memcheck.conflict, label %for.body.ph.lver.orig, label %for.body.ph
|
||||
|
||||
; CHECK: for.body.ph.lver.orig:
|
||||
; CHECK: for.body.lver.orig:
|
||||
; CHECK: br i1 %exitcond.lver.orig, label %for.end, label %for.body.lver.orig
|
||||
; CHECK: for.body.ph:
|
||||
; CHECK: for.body:
|
||||
; CHECK: br i1 %exitcond, label %for.end, label %for.body
|
||||
; CHECK: for.end:
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
|
||||
|
||||
%arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
|
||||
%loadA = load i32, i32* %arrayidxA, align 4
|
||||
|
||||
%arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
|
||||
%loadB = load i32, i32* %arrayidxB, align 4
|
||||
|
||||
%mulC = mul i32 %loadA, %loadB
|
||||
|
||||
%arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
|
||||
store i32 %mulC, i32* %arrayidxC, align 4
|
||||
|
||||
%add = add nuw nsw i64 %ind, 1
|
||||
%exitcond = icmp eq i64 %add, 20
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue