[PruneUnprofitable] Add -polly-prune-unprofitable pass.

ScopInfo's normal profitability heuristic considers SCoPs where all
statements have scalar writes as not profitably optimizable and
invalidate the SCoP in that case. However, -polly-delicm and
-polly-simplify may be able to remove some of the scalar writes such
that the flag -polly-unprofitable-scalar-accs=false allows disabling
that part of the heuristic.

In cases where DeLICM (or other passes after ScopInfo) are not
successful in removing scalar writes, the SCoP is still not profitably
optimizable. The schedule optimizer would again try computing another
schedule, resulting in slower compilation.

The -polly-prune-unprofitable pass applies the profitability heuristic
again before the schedule optimizer Polly can still bail out even with
-polly-unprofitable-scalar-accs=false.

Differential Revision: https://reviews.llvm.org/D31033

llvm-svn: 298080
This commit is contained in:
Michael Kruse 2017-03-17 13:09:52 +00:00
parent 72dc195d78
commit f3091bf4cf
8 changed files with 180 additions and 6 deletions

View File

@ -16,6 +16,7 @@
#define POLLY_LINKALLPASSES_H
#include "polly/Config/config.h"
#include "polly/PruneUnprofitable.h"
#include "polly/Simplify.h"
#include "polly/Support/DumpModulePass.h"
#include "llvm/ADT/StringRef.h"
@ -89,6 +90,7 @@ struct PollyForcePassLinking {
polly::createDeLICMPass();
polly::createDumpModulePass("", true);
polly::createSimplifyPass();
polly::createPruneUnprofitablePass();
}
} PollyForcePassLinking; // Force link by creating a global definition.
} // namespace

View File

@ -0,0 +1,30 @@
//===- PruneUnprofitable.h --------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Mark a SCoP as unfeasible if not deemed profitable to optimize.
//
//===----------------------------------------------------------------------===//
#ifndef POLLY_ANALYSIS_PRUNEUNPROFITABLE_H
#define POLLY_ANALYSIS_PRUNEUNPROFITABLE_H
namespace llvm {
class PassRegistry;
class Pass;
} // namespace llvm
namespace polly {
llvm::Pass *createPruneUnprofitablePass();
} // namespace polly
namespace llvm {
void initializePruneUnprofitablePass(llvm::PassRegistry &);
} // namespace llvm
#endif /* POLLY_ANALYSIS_PRUNEUNPROFITABLE_H */

View File

@ -2480,7 +2480,12 @@ public:
void realignParams();
/// Return true if this SCoP can be profitably optimized.
bool isProfitable() const;
///
/// @param ScalarsAreUnprofitable Never consider statements with scalar writes
/// as profitably optimizable.
///
/// @return Whether this SCoP can be profitably optimized.
bool isProfitable(bool ScalarsAreUnprofitable) const;
/// Return true if the SCoP contained at least one error block.
bool hasErrorBlock() const { return HasErrorBlock; }

View File

@ -0,0 +1,71 @@
//===- PruneUnprofitable.cpp ------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Mark a SCoP as unfeasible if not deemed profitable to optimize.
//
//===----------------------------------------------------------------------===//
#include "polly/PruneUnprofitable.h"
#include "polly/ScopInfo.h"
#include "polly/ScopPass.h"
#define DEBUG_TYPE "polly-prune-unprofitable"
using namespace polly;
using namespace llvm;
namespace {
STATISTIC(ScopsProcessed,
"Number of SCoPs considered for unprofitability pruning");
STATISTIC(ScopsPruned, "Number of pruned SCoPs because it they cannot be "
"optimized in a significant way");
class PruneUnprofitable : public ScopPass {
private:
PruneUnprofitable(const PruneUnprofitable &) = delete;
const PruneUnprofitable &operator=(const PruneUnprofitable &) = delete;
public:
static char ID;
explicit PruneUnprofitable() : ScopPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<ScopInfoRegionPass>();
AU.setPreservesAll();
}
virtual bool runOnScop(Scop &S) override {
if (PollyProcessUnprofitable) {
DEBUG(dbgs() << "NOTE: -polly-process-unprofitable active, won't prune "
"anything\n");
return false;
}
ScopsProcessed++;
if (!S.isProfitable(true)) {
DEBUG(dbgs() << "SCoP pruned because it probably cannot be optimized in "
"a significant way\n");
ScopsPruned++;
S.invalidate(PROFITABLE, DebugLoc());
}
return false;
}
};
char PruneUnprofitable::ID;
} // anonymous namespace
Pass *polly::createPruneUnprofitablePass() { return new PruneUnprofitable(); }
INITIALIZE_PASS_BEGIN(PruneUnprofitable, "polly-prune-unprofitable",
"Polly - Prune unprofitable SCoPs", false, false)
INITIALIZE_PASS_END(PruneUnprofitable, "polly-prune-unprofitable",
"Polly - Prune unprofitable SCoPs", false, false)

View File

@ -3387,7 +3387,7 @@ void Scop::init(AliasAnalysis &AA, DominatorTree &DT, LoopInfo &LI) {
// Check early for profitability. Afterwards it cannot change anymore,
// only the runtime context could become infeasible.
if (!isProfitable()) {
if (!isProfitable(UnprofitableScalarAccs)) {
invalidate(PROFITABLE, DebugLoc());
return;
}
@ -3901,7 +3901,7 @@ __isl_give isl_set *Scop::getAssumedContext() const {
return isl_set_copy(AssumedContext);
}
bool Scop::isProfitable() const {
bool Scop::isProfitable(bool ScalarsAreUnprofitable) const {
if (PollyProcessUnprofitable)
return true;
@ -3918,11 +3918,11 @@ bool Scop::isProfitable() const {
for (auto *MA : Stmt) {
if (MA->isRead())
continue;
ContainsArrayAccs |= MA->isArrayKind();
ContainsScalarAccs |= MA->isScalarKind();
ContainsArrayAccs |= MA->isLatestArrayKind();
ContainsScalarAccs |= MA->isLatestScalarKind();
}
if (!UnprofitableScalarAccs || (ContainsArrayAccs && !ContainsScalarAccs))
if (!ScalarsAreUnprofitable || (ContainsArrayAccs && !ContainsScalarAccs))
OptimizableStmtsOrLoops += Stmt.getNumIterators();
}

View File

@ -35,6 +35,7 @@ add_polly_library(Polly
Analysis/ScopBuilder.cpp
Analysis/ScopGraphPrinter.cpp
Analysis/ScopPass.cpp
Analysis/PruneUnprofitable.cpp
CodeGen/BlockGenerators.cpp
${ISL_CODEGEN_FILES}
CodeGen/LoopGenerators.cpp

View File

@ -194,6 +194,11 @@ static cl::opt<bool>
cl::desc("Simplify SCoP after optimizations"),
cl::init(false), cl::cat(PollyCategory));
static cl::opt<bool> EnablePruneUnprofitable(
"polly-enable-prune-unprofitable",
cl::desc("Bail out on unprofitable SCoPs before rescheduling"), cl::Hidden,
cl::init(false), cl::cat(PollyCategory));
namespace polly {
void initializePollyPasses(PassRegistry &Registry) {
initializeCodeGenerationPass(Registry);
@ -219,6 +224,7 @@ void initializePollyPasses(PassRegistry &Registry) {
initializeDeLICMPass(Registry);
initializeSimplifyPass(Registry);
initializeDumpModulePass(Registry);
initializePruneUnprofitablePass(Registry);
}
/// Register Polly passes such that they form a polyhedral optimizer.
@ -282,6 +288,9 @@ void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
if (DeadCodeElim)
PM.add(polly::createDeadCodeElimPass());
if (EnablePruneUnprofitable)
PM.add(polly::createPruneUnprofitablePass());
if (Target == TARGET_GPU) {
// GPU generation provides its own scheduling optimization strategy.
} else {

View File

@ -0,0 +1,56 @@
; RUN: opt %loadPolly -polly-process-unprofitable=false -polly-unprofitable-scalar-accs=false -polly-prune-unprofitable -disable-output -stats < %s 2>&1 | FileCheck -match-full-lines %s
; REQUIRES: asserts
;
; Skip this SCoP for having scalar dependencies between all statements,
; but only after ScopInfo (because optimization passes using ScopInfo such
; as DeLICM might remove these scalar dependencies).
;
; double x = 0;
; for (int i = 0; i < n; i += 1)
; for (int j = 0; j < m; j += 1) {
; B[0] = x;
; x = A[0];
; }
; return x;
;
define double @func(i32 %n, i32 %m, double* noalias nonnull %A, double* noalias nonnull %B) {
entry:
br label %outer.for
outer.for:
%outer.phi = phi double [0.0, %entry], [%inner.phi, %outer.inc]
%i = phi i32 [0, %entry], [%i.inc, %outer.inc]
%i.cmp = icmp slt i32 %i, %n
br i1 %i.cmp, label %inner.for, label %outer.exit
inner.for:
%inner.phi = phi double [%outer.phi, %outer.for], [%load, %inner.inc]
%j = phi i32 [0, %outer.for], [%j.inc, %inner.inc]
%j.cmp = icmp slt i32 %j, %m
br i1 %j.cmp, label %body, label %inner.exit
body:
store double %inner.phi, double* %B
%load = load double, double* %A
br label %inner.inc
inner.inc:
%j.inc = add nuw nsw i32 %j, 1
br label %inner.for
inner.exit:
br label %outer.inc
outer.inc:
%i.inc = add nuw nsw i32 %i, 1
br label %outer.for
outer.exit:
br label %return
return:
ret double %outer.phi
}
; CHECK: 1 polly-prune-unprofitable - Number of pruned SCoPs because it they cannot be optimized in a significant way