forked from OSchip/llvm-project
[ScopInliner] Add a simple Scop-based inliner to polly.
We add a ScopInliner pass which inlines functions based on a simple heuristic: Let `g` call `f`. If we can model all of `f` as a Scop, we inline `f` into `g`. This requires `-polly-detect-full-function` to be enabled. So, the pass asserts that `-polly-detect-full-function` is enabled. Differential Revision: https://reviews.llvm.org/D36832 llvm-svn: 311126
This commit is contained in:
parent
e608ef7635
commit
b46847c035
|
@ -32,6 +32,7 @@ class RegionPass;
|
|||
|
||||
namespace polly {
|
||||
llvm::Pass *createCodePreparationPass();
|
||||
llvm::Pass *createScopInlinerPass();
|
||||
llvm::Pass *createDeadCodeElimPass();
|
||||
llvm::Pass *createDependenceInfoPass();
|
||||
llvm::Pass *createDependenceInfoWrapperPassPass();
|
||||
|
@ -108,6 +109,7 @@ struct PollyForcePassLinking {
|
|||
namespace llvm {
|
||||
class PassRegistry;
|
||||
void initializeCodePreparationPass(llvm::PassRegistry &);
|
||||
void initializeScopInlinerPass(llvm::PassRegistry &);
|
||||
void initializeDeadCodeElimPass(llvm::PassRegistry &);
|
||||
void initializeJSONExporterPass(llvm::PassRegistry &);
|
||||
void initializeJSONImporterPass(llvm::PassRegistry &);
|
||||
|
|
|
@ -113,6 +113,7 @@ extern bool PollyUseRuntimeAliasChecks;
|
|||
extern bool PollyProcessUnprofitable;
|
||||
extern bool PollyInvariantLoadHoisting;
|
||||
extern bool PollyAllowUnsignedOperations;
|
||||
extern bool PollyAllowFullFunction;
|
||||
|
||||
/// A function attribute which will cause Polly to skip the function
|
||||
extern llvm::StringRef PollySkipFnAttr;
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
//===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
|
@ -107,10 +106,12 @@ static cl::list<std::string> IgnoredFunctions(
|
|||
"ANY of the regexes provided."),
|
||||
cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
AllowFullFunction("polly-detect-full-functions",
|
||||
cl::desc("Allow the detection of full functions"),
|
||||
cl::init(false), cl::cat(PollyCategory));
|
||||
bool polly::PollyAllowFullFunction;
|
||||
static cl::opt<bool, true>
|
||||
XAllowFullFunction("polly-detect-full-functions",
|
||||
cl::desc("Allow the detection of full functions"),
|
||||
cl::location(polly::PollyAllowFullFunction),
|
||||
cl::init(false), cl::cat(PollyCategory));
|
||||
|
||||
static cl::opt<std::string> OnlyRegion(
|
||||
"polly-only-region",
|
||||
|
@ -1541,7 +1542,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const {
|
|||
|
||||
DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
|
||||
|
||||
if (!AllowFullFunction && CurRegion.isTopLevelRegion()) {
|
||||
if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
|
||||
DEBUG(dbgs() << "Top level region is invalid\n");
|
||||
return false;
|
||||
}
|
||||
|
@ -1564,7 +1565,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const {
|
|||
|
||||
// SCoP cannot contain the entry block of the function, because we need
|
||||
// to insert alloca instruction there when translate scalar to array.
|
||||
if (!AllowFullFunction &&
|
||||
if (!PollyAllowFullFunction &&
|
||||
CurRegion.getEntry() ==
|
||||
&(CurRegion.getEntry()->getParent()->getEntryBlock()))
|
||||
return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
|
||||
|
|
|
@ -65,6 +65,7 @@ add_library(PollyCore OBJECT
|
|||
Transform/Simplify.cpp
|
||||
Transform/MaximalStaticExpansion.cpp
|
||||
Transform/RewriteByReferenceParameters.cpp
|
||||
Transform/ScopInliner.cpp
|
||||
${POLLY_HEADER_FILES}
|
||||
)
|
||||
set_target_properties(PollyCore PROPERTIES FOLDER "Polly")
|
||||
|
|
|
@ -264,6 +264,7 @@ void initializePollyPasses(PassRegistry &Registry) {
|
|||
initializePollyCanonicalizePass(Registry);
|
||||
initializePolyhedralInfoPass(Registry);
|
||||
initializeScopDetectionWrapperPassPass(Registry);
|
||||
initializeScopInlinerPass(Registry);
|
||||
initializeScopInfoRegionPassPass(Registry);
|
||||
initializeScopInfoWrapperPassPass(Registry);
|
||||
initializeRewriteByrefParamsPass(Registry);
|
||||
|
|
|
@ -0,0 +1,119 @@
|
|||
//===---- ScopInliner.cpp - Polyhedral based inliner ----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
/// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Take a SCC and:
|
||||
// 1. If it has more than one component, bail out (contains cycles)
|
||||
// 2. If it has just one component, and if the function is entirely a scop,
|
||||
// inline it.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "polly-scop-inliner"
|
||||
|
||||
#include "polly/LinkAllPasses.h"
|
||||
#include "polly/RegisterPasses.h"
|
||||
#include "polly/ScopDetection.h"
|
||||
#include "llvm/Analysis/CallGraphSCCPass.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Passes/PassBuilder.h"
|
||||
#include "llvm/Transforms/IPO/AlwaysInliner.h"
|
||||
|
||||
using namespace polly;
|
||||
extern bool polly::PollyAllowFullFunction;
|
||||
|
||||
namespace {
|
||||
class ScopInliner : public CallGraphSCCPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
ScopInliner() : CallGraphSCCPass(ID) {}
|
||||
|
||||
bool doInitialization(CallGraph &CG) override {
|
||||
if (!polly::PollyAllowFullFunction) {
|
||||
report_fatal_error(
|
||||
"Aborting from ScopInliner because it only makes sense to run with "
|
||||
"-polly-allow-full-function. "
|
||||
"The heurtistic for ScopInliner checks that the full function is a "
|
||||
"Scop, which happens if and only if polly-allow-full-function is "
|
||||
" enabled. "
|
||||
" If not, the entry block is not included in the Scop");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool runOnSCC(CallGraphSCC &SCC) override {
|
||||
// We do not try to inline non-trivial SCCs because this would lead to
|
||||
// "infinite" inlining if we are not careful.
|
||||
if (SCC.size() > 1)
|
||||
return false;
|
||||
assert(SCC.size() == 1 && "found empty SCC");
|
||||
Function *F = (*SCC.begin())->getFunction();
|
||||
|
||||
// If the function is a nullptr, or the function is a declaration.
|
||||
if (!F)
|
||||
return false;
|
||||
if (F->isDeclaration()) {
|
||||
DEBUG(dbgs() << "Skipping " << F->getName()
|
||||
<< "because it is a declaration.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
PassBuilder PB;
|
||||
FunctionAnalysisManager FAM;
|
||||
FAM.registerPass([] { return ScopAnalysis(); });
|
||||
PB.registerFunctionAnalyses(FAM);
|
||||
|
||||
RegionInfo &RI = FAM.getResult<RegionInfoAnalysis>(*F);
|
||||
ScopDetection &SD = FAM.getResult<ScopAnalysis>(*F);
|
||||
|
||||
const bool HasScopAsTopLevelRegion =
|
||||
SD.ValidRegions.count(RI.getTopLevelRegion()) > 0;
|
||||
|
||||
if (HasScopAsTopLevelRegion) {
|
||||
DEBUG(dbgs() << "Skipping " << F->getName()
|
||||
<< " has scop as top level region");
|
||||
F->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
|
||||
ModuleAnalysisManager MAM;
|
||||
PB.registerModuleAnalyses(MAM);
|
||||
ModulePassManager MPM;
|
||||
MPM.addPass(AlwaysInlinerPass());
|
||||
Module *M = F->getParent();
|
||||
assert(M && "Function has illegal module");
|
||||
MPM.run(*M, MAM);
|
||||
} else {
|
||||
DEBUG(dbgs() << F->getName()
|
||||
<< " does NOT have scop as top level region\n");
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
CallGraphSCCPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
char ScopInliner::ID;
|
||||
|
||||
Pass *polly::createScopInlinerPass() {
|
||||
ScopInliner *pass = new ScopInliner();
|
||||
return pass;
|
||||
}
|
||||
|
||||
INITIALIZE_PASS_BEGIN(
|
||||
ScopInliner, "polly-scop-inliner",
|
||||
"inline functions based on how much of the function is a scop.", false,
|
||||
false)
|
||||
INITIALIZE_PASS_END(
|
||||
ScopInliner, "polly-scop-inliner",
|
||||
"inline functions based on how much of the function is a scop.", false,
|
||||
false)
|
|
@ -0,0 +1,8 @@
|
|||
; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \
|
||||
; RUN: -polly-scops -analyze < %s
|
||||
|
||||
; Check that we do not crash if there are declares. We should skip function
|
||||
; declarations and not try to query for domtree.
|
||||
|
||||
declare void @foo()
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \
|
||||
; RUN: -polly-scops -analyze -polly-invariant-load-hoisting < %s | FileCheck %s
|
||||
|
||||
; Check that we inline a function that requires invariant load hoisting
|
||||
; correctly.
|
||||
; CHECK: Max Loop Depth: 2
|
||||
|
||||
; REQUIRES: pollyacc
|
||||
|
||||
|
||||
; void to_be_inlined(int A[], int *begin, int *end) {
|
||||
; for(int i = *begin; i < *end; i++) {
|
||||
; A[i] = 10;
|
||||
; }
|
||||
; }
|
||||
;
|
||||
; static const int N = 1000;
|
||||
;
|
||||
; void inline_site(int A[], int *begin, int *end) {
|
||||
; for(int i = 0; i < N; i++)
|
||||
; to_be_inlined(A);
|
||||
; }
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.12.0"
|
||||
|
||||
define void @to_be_inlined(i32* %A, i32* %begin, i32* %end) {
|
||||
entry:
|
||||
br label %entry.split
|
||||
|
||||
entry.split: ; preds = %entry
|
||||
%tmp = load i32, i32* %begin, align 4
|
||||
%tmp21 = load i32, i32* %end, align 4
|
||||
%cmp3 = icmp slt i32 %tmp, %tmp21
|
||||
br i1 %cmp3, label %for.body.lr.ph, label %for.end
|
||||
|
||||
for.body.lr.ph: ; preds = %entry.split
|
||||
%tmp1 = sext i32 %tmp to i64
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.lr.ph, %for.body
|
||||
%indvars.iv4 = phi i64 [ %tmp1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv4
|
||||
store i32 10, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv4, 1
|
||||
%tmp2 = load i32, i32* %end, align 4
|
||||
%tmp3 = sext i32 %tmp2 to i64
|
||||
%cmp = icmp slt i64 %indvars.iv.next, %tmp3
|
||||
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
|
||||
|
||||
for.cond.for.end_crit_edge: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @inline_site(i32* %A, i32* %begin, i32 *%end) {
|
||||
entry:
|
||||
br label %entry.split
|
||||
|
||||
entry.split: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry.split, %for.body
|
||||
%i.01 = phi i32 [ 0, %entry.split ], [ %inc, %for.body ]
|
||||
tail call void @to_be_inlined(i32* %A, i32* %begin, i32* %end)
|
||||
%inc = add nuw nsw i32 %i.01, 1
|
||||
%exitcond = icmp eq i32 %inc, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \
|
||||
; RUN: -polly-scops -analyze < %s | FileCheck %s
|
||||
|
||||
; Check that we get the 2 nested loops by inlining `to_be_inlined` into
|
||||
; `inline_site`.
|
||||
; CHECK: Max Loop Depth: 2
|
||||
|
||||
; static const int N = 1000;
|
||||
;
|
||||
; void to_be_inlined(int A[]) {
|
||||
; for(int i = 0; i < N; i++)
|
||||
; A[i] *= 10;
|
||||
; }
|
||||
;
|
||||
; void inline_site(int A[]) {
|
||||
; for(int i = 0; i < N; i++)
|
||||
; to_be_inlined(A);
|
||||
; }
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.12.0"
|
||||
|
||||
|
||||
define void @to_be_inlined(i32* %A) {
|
||||
entry:
|
||||
br label %entry.split
|
||||
|
||||
entry.split: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry.split, %for.body
|
||||
%indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1
|
||||
%tmp = load i32, i32* %arrayidx, align 4
|
||||
%mul = mul nsw i32 %tmp, 10
|
||||
store i32 %mul, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @inline_site(i32* %A) {
|
||||
entry:
|
||||
br label %entry.split
|
||||
|
||||
entry.split: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry.split, %for.body
|
||||
%i.01 = phi i32 [ 0, %entry.split ], [ %inc, %for.body ]
|
||||
tail call void @to_be_inlined(i32* %A)
|
||||
%inc = add nuw nsw i32 %i.01, 1
|
||||
%exitcond = icmp eq i32 %inc, 1000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue