[ScopInliner] Add a simple Scop-based inliner to polly.

We add a ScopInliner pass which inlines functions based on a simple heuristic:
Let `g` call `f`.
If we can model all of `f` as a Scop, we inline `f` into `g`.

This requires `-polly-detect-full-function` to be enabled. So, the pass
asserts that `-polly-detect-full-function` is enabled.

Differential Revision: https://reviews.llvm.org/D36832

llvm-svn: 311126
This commit is contained in:
Siddharth Bhat 2017-08-17 21:57:23 +00:00
parent e608ef7635
commit b46847c035
9 changed files with 278 additions and 7 deletions

View File

@ -32,6 +32,7 @@ class RegionPass;
namespace polly {
llvm::Pass *createCodePreparationPass();
llvm::Pass *createScopInlinerPass();
llvm::Pass *createDeadCodeElimPass();
llvm::Pass *createDependenceInfoPass();
llvm::Pass *createDependenceInfoWrapperPassPass();
@ -108,6 +109,7 @@ struct PollyForcePassLinking {
namespace llvm {
class PassRegistry;
void initializeCodePreparationPass(llvm::PassRegistry &);
void initializeScopInlinerPass(llvm::PassRegistry &);
void initializeDeadCodeElimPass(llvm::PassRegistry &);
void initializeJSONExporterPass(llvm::PassRegistry &);
void initializeJSONImporterPass(llvm::PassRegistry &);

View File

@ -113,6 +113,7 @@ extern bool PollyUseRuntimeAliasChecks;
extern bool PollyProcessUnprofitable;
extern bool PollyInvariantLoadHoisting;
extern bool PollyAllowUnsignedOperations;
extern bool PollyAllowFullFunction;
/// A function attribute which will cause Polly to skip the function
extern llvm::StringRef PollySkipFnAttr;

View File

@ -1,4 +1,3 @@
//===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -107,10 +106,12 @@ static cl::list<std::string> IgnoredFunctions(
"ANY of the regexes provided."),
cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
static cl::opt<bool>
AllowFullFunction("polly-detect-full-functions",
cl::desc("Allow the detection of full functions"),
cl::init(false), cl::cat(PollyCategory));
bool polly::PollyAllowFullFunction;
static cl::opt<bool, true>
XAllowFullFunction("polly-detect-full-functions",
cl::desc("Allow the detection of full functions"),
cl::location(polly::PollyAllowFullFunction),
cl::init(false), cl::cat(PollyCategory));
static cl::opt<std::string> OnlyRegion(
"polly-only-region",
@ -1541,7 +1542,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const {
DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
if (!AllowFullFunction && CurRegion.isTopLevelRegion()) {
if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
DEBUG(dbgs() << "Top level region is invalid\n");
return false;
}
@ -1564,7 +1565,7 @@ bool ScopDetection::isValidRegion(DetectionContext &Context) const {
// SCoP cannot contain the entry block of the function, because we need
// to insert alloca instruction there when translate scalar to array.
if (!AllowFullFunction &&
if (!PollyAllowFullFunction &&
CurRegion.getEntry() ==
&(CurRegion.getEntry()->getParent()->getEntryBlock()))
return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());

View File

@ -65,6 +65,7 @@ add_library(PollyCore OBJECT
Transform/Simplify.cpp
Transform/MaximalStaticExpansion.cpp
Transform/RewriteByReferenceParameters.cpp
Transform/ScopInliner.cpp
${POLLY_HEADER_FILES}
)
set_target_properties(PollyCore PROPERTIES FOLDER "Polly")

View File

@ -264,6 +264,7 @@ void initializePollyPasses(PassRegistry &Registry) {
initializePollyCanonicalizePass(Registry);
initializePolyhedralInfoPass(Registry);
initializeScopDetectionWrapperPassPass(Registry);
initializeScopInlinerPass(Registry);
initializeScopInfoRegionPassPass(Registry);
initializeScopInfoWrapperPassPass(Registry);
initializeRewriteByrefParamsPass(Registry);

View File

@ -0,0 +1,119 @@
//===---- ScopInliner.cpp - Polyhedral based inliner ----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
/// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Take a SCC and:
// 1. If it has more than one component, bail out (contains cycles)
// 2. If it has just one component, and if the function is entirely a scop,
// inline it.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "polly-scop-inliner"
#include "polly/LinkAllPasses.h"
#include "polly/RegisterPasses.h"
#include "polly/ScopDetection.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
using namespace polly;
extern bool polly::PollyAllowFullFunction;
namespace {
class ScopInliner : public CallGraphSCCPass {
public:
static char ID;
ScopInliner() : CallGraphSCCPass(ID) {}
bool doInitialization(CallGraph &CG) override {
if (!polly::PollyAllowFullFunction) {
report_fatal_error(
"Aborting from ScopInliner because it only makes sense to run with "
"-polly-allow-full-function. "
"The heurtistic for ScopInliner checks that the full function is a "
"Scop, which happens if and only if polly-allow-full-function is "
" enabled. "
" If not, the entry block is not included in the Scop");
}
return true;
}
bool runOnSCC(CallGraphSCC &SCC) override {
// We do not try to inline non-trivial SCCs because this would lead to
// "infinite" inlining if we are not careful.
if (SCC.size() > 1)
return false;
assert(SCC.size() == 1 && "found empty SCC");
Function *F = (*SCC.begin())->getFunction();
// If the function is a nullptr, or the function is a declaration.
if (!F)
return false;
if (F->isDeclaration()) {
DEBUG(dbgs() << "Skipping " << F->getName()
<< "because it is a declaration.\n");
return false;
}
PassBuilder PB;
FunctionAnalysisManager FAM;
FAM.registerPass([] { return ScopAnalysis(); });
PB.registerFunctionAnalyses(FAM);
RegionInfo &RI = FAM.getResult<RegionInfoAnalysis>(*F);
ScopDetection &SD = FAM.getResult<ScopAnalysis>(*F);
const bool HasScopAsTopLevelRegion =
SD.ValidRegions.count(RI.getTopLevelRegion()) > 0;
if (HasScopAsTopLevelRegion) {
DEBUG(dbgs() << "Skipping " << F->getName()
<< " has scop as top level region");
F->addFnAttr(llvm::Attribute::AlwaysInline);
ModuleAnalysisManager MAM;
PB.registerModuleAnalyses(MAM);
ModulePassManager MPM;
MPM.addPass(AlwaysInlinerPass());
Module *M = F->getParent();
assert(M && "Function has illegal module");
MPM.run(*M, MAM);
} else {
DEBUG(dbgs() << F->getName()
<< " does NOT have scop as top level region\n");
}
return false;
};
void getAnalysisUsage(AnalysisUsage &AU) const override {
CallGraphSCCPass::getAnalysisUsage(AU);
}
};
} // namespace
char ScopInliner::ID;
Pass *polly::createScopInlinerPass() {
ScopInliner *pass = new ScopInliner();
return pass;
}
INITIALIZE_PASS_BEGIN(
ScopInliner, "polly-scop-inliner",
"inline functions based on how much of the function is a scop.", false,
false)
INITIALIZE_PASS_END(
ScopInliner, "polly-scop-inliner",
"inline functions based on how much of the function is a scop.", false,
false)

View File

@ -0,0 +1,8 @@
; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \
; RUN: -polly-scops -analyze < %s
; Check that we do not crash if there are declares. We should skip function
; declarations and not try to query for domtree.
declare void @foo()

View File

@ -0,0 +1,76 @@
; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \
; RUN: -polly-scops -analyze -polly-invariant-load-hoisting < %s | FileCheck %s
; Check that we inline a function that requires invariant load hoisting
; correctly.
; CHECK: Max Loop Depth: 2
; REQUIRES: pollyacc
; void to_be_inlined(int A[], int *begin, int *end) {
; for(int i = *begin; i < *end; i++) {
; A[i] = 10;
; }
; }
;
; static const int N = 1000;
;
; void inline_site(int A[], int *begin, int *end) {
; for(int i = 0; i < N; i++)
; to_be_inlined(A);
; }
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"
define void @to_be_inlined(i32* %A, i32* %begin, i32* %end) {
entry:
br label %entry.split
entry.split: ; preds = %entry
%tmp = load i32, i32* %begin, align 4
%tmp21 = load i32, i32* %end, align 4
%cmp3 = icmp slt i32 %tmp, %tmp21
br i1 %cmp3, label %for.body.lr.ph, label %for.end
for.body.lr.ph: ; preds = %entry.split
%tmp1 = sext i32 %tmp to i64
br label %for.body
for.body: ; preds = %for.body.lr.ph, %for.body
%indvars.iv4 = phi i64 [ %tmp1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv4
store i32 10, i32* %arrayidx, align 4
%indvars.iv.next = add i64 %indvars.iv4, 1
%tmp2 = load i32, i32* %end, align 4
%tmp3 = sext i32 %tmp2 to i64
%cmp = icmp slt i64 %indvars.iv.next, %tmp3
br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
for.cond.for.end_crit_edge: ; preds = %for.body
br label %for.end
for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
ret void
}
define void @inline_site(i32* %A, i32* %begin, i32 *%end) {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %entry.split, %for.body
%i.01 = phi i32 [ 0, %entry.split ], [ %inc, %for.body ]
tail call void @to_be_inlined(i32* %A, i32* %begin, i32* %end)
%inc = add nuw nsw i32 %i.01, 1
%exitcond = icmp eq i32 %inc, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@ -0,0 +1,62 @@
; RUN: opt %loadPolly -polly-detect-full-functions -polly-scop-inliner \
; RUN: -polly-scops -analyze < %s | FileCheck %s
; Check that we get the 2 nested loops by inlining `to_be_inlined` into
; `inline_site`.
; CHECK: Max Loop Depth: 2
; static const int N = 1000;
;
; void to_be_inlined(int A[]) {
; for(int i = 0; i < N; i++)
; A[i] *= 10;
; }
;
; void inline_site(int A[]) {
; for(int i = 0; i < N; i++)
; to_be_inlined(A);
; }
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.12.0"
define void @to_be_inlined(i32* %A) {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %entry.split, %for.body
%indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1
%tmp = load i32, i32* %arrayidx, align 4
%mul = mul nsw i32 %tmp, 10
store i32 %mul, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}
define void @inline_site(i32* %A) {
entry:
br label %entry.split
entry.split: ; preds = %entry
br label %for.body
for.body: ; preds = %entry.split, %for.body
%i.01 = phi i32 [ 0, %entry.split ], [ %inc, %for.body ]
tail call void @to_be_inlined(i32* %A)
%inc = add nuw nsw i32 %i.01, 1
%exitcond = icmp eq i32 %inc, 1000
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}