llvm-project/polly/lib/Analysis/ScopDetection.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1935 lines
67 KiB
C++
Raw Normal View History

//===- ScopDetection.cpp - Detect Scops -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Detect the maximal Scops of a function.
//
// A static control part (Scop) is a subgraph of the control flow graph (CFG)
// that only has statically known control flow and can therefore be described
// within the polyhedral model.
//
// Every Scop fulfills these restrictions:
//
// * It is a single entry single exit region
//
// * Only affine linear bounds in the loops
//
// Every natural loop in a Scop must have a number of loop iterations that can
// be described as an affine linear function in surrounding loop iterators or
// parameters. (A parameter is a scalar that does not change its value during
// execution of the Scop).
//
// * Only comparisons of affine linear expressions in conditions
//
// * All loops and conditions perfectly nested
//
// The control flow needs to be structured such that it could be written using
// just 'for' and 'if' statements, without the need for any 'goto', 'break' or
// 'continue'.
//
// * Side effect free functions call
//
// Function calls and intrinsics that do not have side effects (readnone)
// or memory intrinsics (memset, memcpy, memmove) are allowed.
//
// The Scop detection finds the largest Scops by checking if the largest
// region is a Scop. If this is not the case, its canonical subregions are
// checked until a region is a Scop. It is now tried to extend this Scop by
// creating a larger non canonical region.
//
//===----------------------------------------------------------------------===//
#include "polly/ScopDetection.h"
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
#include "polly/ScopDetectionDiagnostic.h"
#include "polly/Support/SCEVValidator.h"
#include "polly/Support/ScopHelper.h"
#include "polly/Support/ScopLocation.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
using namespace llvm;
using namespace polly;
#define DEBUG_TYPE "polly-detect"
// This option is set to a very high value, as analyzing such loops increases
// compile time on several cases. For experiments that enable this option,
// a value of around 40 has been working to avoid run-time regressions with
// Polly while still exposing interesting optimization opportunities.
static cl::opt<int> ProfitabilityMinPerLoopInstructions(
"polly-detect-profitability-min-per-loop-insts",
cl::desc("The minimal number of per-loop instructions before a single loop "
"region is considered profitable"),
cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory));
bool polly::PollyProcessUnprofitable;
static cl::opt<bool, true> XPollyProcessUnprofitable(
"polly-process-unprofitable",
cl::desc(
"Process scops that are unlikely to benefit from Polly optimizations."),
cl::location(PollyProcessUnprofitable), cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
static cl::list<std::string> OnlyFunctions(
"polly-only-func",
cl::desc("Only run on functions that match a regex. "
"Multiple regexes can be comma separated. "
"Scop detection will run on all functions that match "
"ANY of the regexes provided."),
cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
static cl::list<std::string> IgnoredFunctions(
"polly-ignore-func",
cl::desc("Ignore functions that match a regex. "
"Multiple regexes can be comma separated. "
"Scop detection will ignore all functions that match "
"ANY of the regexes provided."),
cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
bool polly::PollyAllowFullFunction;
static cl::opt<bool, true>
XAllowFullFunction("polly-detect-full-functions",
cl::desc("Allow the detection of full functions"),
cl::location(polly::PollyAllowFullFunction),
cl::init(false), cl::cat(PollyCategory));
static cl::opt<std::string> OnlyRegion(
"polly-only-region",
cl::desc("Only run on certain regions (The provided identifier must "
"appear in the name of the region's entry block"),
cl::value_desc("identifier"), cl::ValueRequired, cl::init(""),
cl::cat(PollyCategory));
static cl::opt<bool>
IgnoreAliasing("polly-ignore-aliasing",
cl::desc("Ignore possible aliasing of the array bases"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
bool polly::PollyAllowUnsignedOperations;
static cl::opt<bool, true> XPollyAllowUnsignedOperations(
"polly-allow-unsigned-operations",
cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::ZeroOrMore,
cl::init(true), cl::cat(PollyCategory));
bool polly::PollyUseRuntimeAliasChecks;
static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
"polly-use-runtime-alias-checks",
cl::desc("Use runtime alias checks to resolve possible aliasing."),
cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::ZeroOrMore,
cl::init(true), cl::cat(PollyCategory));
static cl::opt<bool>
ReportLevel("polly-report",
cl::desc("Print information about the activities of Polly"),
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
static cl::opt<bool> AllowDifferentTypes(
"polly-allow-differing-element-types",
cl::desc("Allow different element types for array accesses"), cl::Hidden,
cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
static cl::opt<bool>
AllowNonAffine("polly-allow-nonaffine",
cl::desc("Allow non affine access functions in arrays"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
static cl::opt<bool>
AllowModrefCall("polly-allow-modref-calls",
cl::desc("Allow functions with known modref behavior"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
static cl::opt<bool> AllowNonAffineSubRegions(
"polly-allow-nonaffine-branches",
cl::desc("Allow non affine conditions for branches"), cl::Hidden,
cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
static cl::opt<bool>
AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
cl::desc("Allow non affine conditions for loops"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
static cl::opt<bool, true>
TrackFailures("polly-detect-track-failures",
cl::desc("Track failure strings in detecting scop regions"),
cl::location(PollyTrackFailures), cl::Hidden, cl::ZeroOrMore,
cl::init(true), cl::cat(PollyCategory));
static cl::opt<bool> KeepGoing("polly-detect-keep-going",
cl::desc("Do not fail on the first error."),
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::cat(PollyCategory));
static cl::opt<bool, true>
PollyDelinearizeX("polly-delinearize",
cl::desc("Delinearize array access functions"),
cl::location(PollyDelinearize), cl::Hidden,
cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory));
Check scops a second time before working on them In rare cases the modification of one scop can effect the validity of other scops, as code generation of an earlier scop may make the scalar evolution functions derived for later scops less precise. The example that triggered this patch was a scop that contained an 'or' expression as follows: %add13710 = or i32 %j.19, 1 --> {(1 + (4 * %l)),+,2}<nsw><%for.body81> Scev could only analyze the 'or' as it knew %j.19 is a multiple of 2. This information was not available after the first scop was code generated (or independent-blocks was run on it) and SCEV could not derive a precise SCEV expression any more. This means we could not any more code generate this SCoP. My current understanding is that there is always the risk that an earlier code generation change invalidates later scops. As the example we have seen here is difficult to avoid, we use this occasion to guard us against all such invalidations. This patch "solves" this issue by verifying right before we start working on a detected scop, if this scop is in fact still valid. This adds a certain overhead. However the verification we run is anyways very fast and secondly it is only run on detected scops. So the overhead should not be very large. As a later optimization we could detect scops only on demand, such that we need to run scop-detections always only a single time. This should fix the single last failure in the LLVM test-suite for the new scev-based code generation. llvm-svn: 201593
2014-02-19 02:49:49 +08:00
static cl::opt<bool>
VerifyScops("polly-detect-verify",
cl::desc("Verify the detected SCoPs after each transformation"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
Check scops a second time before working on them In rare cases the modification of one scop can effect the validity of other scops, as code generation of an earlier scop may make the scalar evolution functions derived for later scops less precise. The example that triggered this patch was a scop that contained an 'or' expression as follows: %add13710 = or i32 %j.19, 1 --> {(1 + (4 * %l)),+,2}<nsw><%for.body81> Scev could only analyze the 'or' as it knew %j.19 is a multiple of 2. This information was not available after the first scop was code generated (or independent-blocks was run on it) and SCEV could not derive a precise SCEV expression any more. This means we could not any more code generate this SCoP. My current understanding is that there is always the risk that an earlier code generation change invalidates later scops. As the example we have seen here is difficult to avoid, we use this occasion to guard us against all such invalidations. This patch "solves" this issue by verifying right before we start working on a detected scop, if this scop is in fact still valid. This adds a certain overhead. However the verification we run is anyways very fast and secondly it is only run on detected scops. So the overhead should not be very large. As a later optimization we could detect scops only on demand, such that we need to run scop-detections always only a single time. This should fix the single last failure in the LLVM test-suite for the new scev-based code generation. llvm-svn: 201593
2014-02-19 02:49:49 +08:00
bool polly::PollyInvariantLoadHoisting;
static cl::opt<bool, true> XPollyInvariantLoadHoisting(
"polly-invariant-load-hoisting", cl::desc("Hoist invariant loads."),
cl::location(PollyInvariantLoadHoisting), cl::Hidden, cl::ZeroOrMore,
cl::init(false), cl::cat(PollyCategory));
/// The minimal trip count under which loops are considered unprofitable.
static const unsigned MIN_LOOP_TRIP_COUNT = 8;
bool polly::PollyTrackFailures = false;
bool polly::PollyDelinearize = false;
StringRef polly::PollySkipFnAttr = "polly.skip.fn";
//===----------------------------------------------------------------------===//
// Statistics.
STATISTIC(NumScopRegions, "Number of scops");
STATISTIC(NumLoopsInScop, "Number of loops in scops");
STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
STATISTIC(NumScopsDepthLarger,
"Number of scops with maximal loop depth 6 and larger");
STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
STATISTIC(NumLoopsInProfScop,
"Number of loops in scops (profitable scops only)");
STATISTIC(NumLoopsOverall, "Number of total loops");
STATISTIC(NumProfScopsDepthZero,
"Number of scops with maximal loop depth 0 (profitable scops only)");
STATISTIC(NumProfScopsDepthOne,
"Number of scops with maximal loop depth 1 (profitable scops only)");
STATISTIC(NumProfScopsDepthTwo,
"Number of scops with maximal loop depth 2 (profitable scops only)");
STATISTIC(NumProfScopsDepthThree,
"Number of scops with maximal loop depth 3 (profitable scops only)");
STATISTIC(NumProfScopsDepthFour,
"Number of scops with maximal loop depth 4 (profitable scops only)");
STATISTIC(NumProfScopsDepthFive,
"Number of scops with maximal loop depth 5 (profitable scops only)");
STATISTIC(NumProfScopsDepthLarger,
"Number of scops with maximal loop depth 6 and larger "
"(profitable scops only)");
STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
STATISTIC(MaxNumLoopsInProfScop,
"Maximal number of loops in scops (profitable scops only)");
static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
bool OnlyProfitable);
namespace {
class DiagnosticScopFound : public DiagnosticInfo {
private:
static int PluginDiagnosticKind;
Function &F;
std::string FileName;
unsigned EntryLine, ExitLine;
public:
DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
unsigned ExitLine)
: DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
EntryLine(EntryLine), ExitLine(ExitLine) {}
void print(DiagnosticPrinter &DP) const override;
static bool classof(const DiagnosticInfo *DI) {
return DI->getKind() == PluginDiagnosticKind;
}
};
} // namespace
int DiagnosticScopFound::PluginDiagnosticKind =
getNextAvailablePluginDiagnosticKind();
void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
DP << "Polly detected an optimizable loop region (scop) in function '" << F
<< "'\n";
if (FileName.empty()) {
DP << "Scop location is unknown. Compile with debug info "
"(-g) to get more precise information. ";
return;
}
DP << FileName << ":" << EntryLine << ": Start of scop\n";
DP << FileName << ":" << ExitLine << ": End of scop";
}
/// Check if a string matches any regex in a list of regexes.
/// @param Str the input string to match against.
/// @param RegexList a list of strings that are regular expressions.
static bool doesStringMatchAnyRegex(StringRef Str,
const cl::list<std::string> &RegexList) {
for (auto RegexStr : RegexList) {
Regex R(RegexStr);
std::string Err;
if (!R.isValid(Err))
report_fatal_error("invalid regex given as input to polly: " + Err, true);
if (R.match(Str))
return true;
}
return false;
}
//===----------------------------------------------------------------------===//
// ScopDetection.
ScopDetection::ScopDetection(Function &F, const DominatorTree &DT,
ScalarEvolution &SE, LoopInfo &LI, RegionInfo &RI,
AliasAnalysis &AA, OptimizationRemarkEmitter &ORE)
: DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {
if (!PollyProcessUnprofitable && LI.empty())
return;
Region *TopRegion = RI.getTopLevelRegion();
if (!OnlyFunctions.empty() &&
!doesStringMatchAnyRegex(F.getName(), OnlyFunctions))
return;
if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions))
return;
if (!isValidFunction(F))
return;
findScops(*TopRegion);
NumScopRegions += ValidRegions.size();
// Prune non-profitable regions.
for (auto &DIt : DetectionContextMap) {
auto &DC = DIt.getSecond();
if (DC.Log.hasErrors())
continue;
if (!ValidRegions.count(&DC.CurRegion))
continue;
LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0);
updateLoopCountStatistic(Stats, false /* OnlyProfitable */);
if (isProfitableRegion(DC)) {
updateLoopCountStatistic(Stats, true /* OnlyProfitable */);
continue;
}
ValidRegions.remove(&DC.CurRegion);
}
NumProfScopRegions += ValidRegions.size();
NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops;
// Only makes sense when we tracked errors.
if (PollyTrackFailures)
emitMissedRemarks(F);
if (ReportLevel)
printLocations(F);
assert(ValidRegions.size() <= DetectionContextMap.size() &&
"Cached more results than valid regions");
}
template <class RR, typename... Args>
inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
Args &&... Arguments) const {
if (!Context.Verifying) {
RejectLog &Log = Context.Log;
std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
if (PollyTrackFailures)
Log.report(RejectReason);
LLVM_DEBUG(dbgs() << RejectReason->getMessage());
LLVM_DEBUG(dbgs() << "\n");
} else {
assert(!Assert && "Verification of detected scop failed");
}
return false;
}
Check scops a second time before working on them In rare cases the modification of one scop can effect the validity of other scops, as code generation of an earlier scop may make the scalar evolution functions derived for later scops less precise. The example that triggered this patch was a scop that contained an 'or' expression as follows: %add13710 = or i32 %j.19, 1 --> {(1 + (4 * %l)),+,2}<nsw><%for.body81> Scev could only analyze the 'or' as it knew %j.19 is a multiple of 2. This information was not available after the first scop was code generated (or independent-blocks was run on it) and SCEV could not derive a precise SCEV expression any more. This means we could not any more code generate this SCoP. My current understanding is that there is always the risk that an earlier code generation change invalidates later scops. As the example we have seen here is difficult to avoid, we use this occasion to guard us against all such invalidations. This patch "solves" this issue by verifying right before we start working on a detected scop, if this scop is in fact still valid. This adds a certain overhead. However the verification we run is anyways very fast and secondly it is only run on detected scops. So the overhead should not be very large. As a later optimization we could detect scops only on demand, such that we need to run scop-detections always only a single time. This should fix the single last failure in the LLVM test-suite for the new scev-based code generation. llvm-svn: 201593
2014-02-19 02:49:49 +08:00
bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) const {
if (!ValidRegions.count(&R))
return false;
if (Verify) {
DetectionContextMap.erase(getBBPairForRegion(&R));
const auto &It = DetectionContextMap.insert(std::make_pair(
getBBPairForRegion(&R),
DetectionContext(const_cast<Region &>(R), AA, false /*verifying*/)));
DetectionContext &Context = It.first->second;
return isValidRegion(Context);
}
Check scops a second time before working on them In rare cases the modification of one scop can effect the validity of other scops, as code generation of an earlier scop may make the scalar evolution functions derived for later scops less precise. The example that triggered this patch was a scop that contained an 'or' expression as follows: %add13710 = or i32 %j.19, 1 --> {(1 + (4 * %l)),+,2}<nsw><%for.body81> Scev could only analyze the 'or' as it knew %j.19 is a multiple of 2. This information was not available after the first scop was code generated (or independent-blocks was run on it) and SCEV could not derive a precise SCEV expression any more. This means we could not any more code generate this SCoP. My current understanding is that there is always the risk that an earlier code generation change invalidates later scops. As the example we have seen here is difficult to avoid, we use this occasion to guard us against all such invalidations. This patch "solves" this issue by verifying right before we start working on a detected scop, if this scop is in fact still valid. This adds a certain overhead. However the verification we run is anyways very fast and secondly it is only run on detected scops. So the overhead should not be very large. As a later optimization we could detect scops only on demand, such that we need to run scop-detections always only a single time. This should fix the single last failure in the LLVM test-suite for the new scev-based code generation. llvm-svn: 201593
2014-02-19 02:49:49 +08:00
return true;
}
std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
// Get the first error we found. Even in keep-going mode, this is the first
// reason that caused the candidate to be rejected.
auto *Log = lookupRejectionLog(R);
// This can happen when we marked a region invalid, but didn't track
// an error for it.
if (!Log || !Log->hasErrors())
return "";
RejectReasonPtr RR = *Log->begin();
return RR->getMessage();
}
bool ScopDetection::addOverApproximatedRegion(Region *AR,
DetectionContext &Context) const {
// If we already know about Ar we can exit.
if (!Context.NonAffineSubRegionSet.insert(AR))
return true;
// All loops in the region have to be overapproximated too if there
// are accesses that depend on the iteration count.
for (BasicBlock *BB : AR->blocks()) {
Loop *L = LI.getLoopFor(BB);
if (AR->contains(L))
Context.BoxedLoopsSet.insert(L);
}
return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
}
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-08 04:17:36 +08:00
bool ScopDetection::onlyValidRequiredInvariantLoads(
InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
Region &CurRegion = Context.CurRegion;
const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-08 04:17:36 +08:00
if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
return false;
for (LoadInst *Load : RequiredILS) {
// If we already know a load has been accepted as required invariant, we
// already run the validation below once and consequently don't need to
// run it again. Hence, we return early. For certain test cases (e.g.,
// COSMO this avoids us spending 50% of scop-detection time in this
// very function (and its children).
if (Context.RequiredILS.count(Load))
continue;
if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-08 04:17:36 +08:00
return false;
for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
2019-10-21 23:48:42 +08:00
Load->getType(),
MaybeAlign(Load->getAlignment()), DL))
continue;
if (NonAffineRegion->contains(Load) &&
Load->getParent() != NonAffineRegion->getEntry())
return false;
}
}
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-08 04:17:36 +08:00
Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());
return true;
}
bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
Loop *Scope) const {
SetVector<Value *> Values;
findValues(S0, SE, Values);
if (S1)
findValues(S1, SE, Values);
SmallPtrSet<Value *, 8> PtrVals;
for (auto *V : Values) {
if (auto *P2I = dyn_cast<PtrToIntInst>(V))
V = P2I->getOperand(0);
if (!V->getType()->isPointerTy())
continue;
auto *PtrSCEV = SE.getSCEVAtScope(V, Scope);
if (isa<SCEVConstant>(PtrSCEV))
continue;
auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV));
if (!BasePtr)
return true;
auto *BasePtrVal = BasePtr->getValue();
if (PtrVals.insert(BasePtrVal).second) {
for (auto *PtrVal : PtrVals)
if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal))
return true;
}
}
return false;
}
bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
DetectionContext &Context) const {
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-08 04:17:36 +08:00
InvariantLoadsSetTy AccessILS;
if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS))
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-08 04:17:36 +08:00
return false;
if (!onlyValidRequiredInvariantLoads(AccessILS, Context))
return false;
return true;
}
bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
Value *Condition, bool IsLoopBranch,
DetectionContext &Context) const {
Loop *L = LI.getLoopFor(&BB);
const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L);
if (IsLoopBranch && L->isLoopLatch(&BB))
return false;
// Check for invalid usage of different pointers in one expression.
if (involvesMultiplePtrs(ConditionSCEV, nullptr, L))
return false;
if (isAffine(ConditionSCEV, L, Context))
return true;
if (AllowNonAffineSubRegions &&
addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
return true;
return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB,
ConditionSCEV, ConditionSCEV, SI);
}
bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
Value *Condition, bool IsLoopBranch,
DetectionContext &Context) const {
// Constant integer conditions are always affine.
if (isa<ConstantInt>(Condition))
return true;
if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
auto Opcode = BinOp->getOpcode();
if (Opcode == Instruction::And || Opcode == Instruction::Or) {
Value *Op0 = BinOp->getOperand(0);
Value *Op1 = BinOp->getOperand(1);
return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) &&
isValidBranch(BB, BI, Op1, IsLoopBranch, Context);
}
}
if (auto PHI = dyn_cast<PHINode>(Condition)) {
auto *Unique = dyn_cast_or_null<ConstantInt>(
getUniqueNonErrorValue(PHI, &Context.CurRegion, LI, DT));
if (Unique && (Unique->isZero() || Unique->isOne()))
return true;
}
if (auto Load = dyn_cast<LoadInst>(Condition))
if (!IsLoopBranch && Context.CurRegion.contains(Load)) {
Context.RequiredILS.insert(Load);
return true;
}
// Non constant conditions of branches need to be ICmpInst.
if (!isa<ICmpInst>(Condition)) {
if (!IsLoopBranch && AllowNonAffineSubRegions &&
addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
return true;
return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB);
}
ICmpInst *ICmp = cast<ICmpInst>(Condition);
// Are both operands of the ICmp affine?
if (isa<UndefValue>(ICmp->getOperand(0)) ||
isa<UndefValue>(ICmp->getOperand(1)))
return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp);
Loop *L = LI.getLoopFor(&BB);
const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L);
const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L);
LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, LI, DT);
RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, LI, DT);
// If unsigned operations are not allowed try to approximate the region.
if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
return !IsLoopBranch && AllowNonAffineSubRegions &&
addOverApproximatedRegion(RI.getRegionFor(&BB), Context);
// Check for invalid usage of different pointers in one expression.
if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) &&
involvesMultiplePtrs(RHS, nullptr, L))
return false;
// Check for invalid usage of different pointers in a relational comparison.
if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L))
return false;
if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context))
return true;
if (!IsLoopBranch && AllowNonAffineSubRegions &&
addOverApproximatedRegion(RI.getRegionFor(&BB), Context))
return true;
if (IsLoopBranch)
return false;
return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS,
ICmp);
}
bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
bool AllowUnreachable,
DetectionContext &Context) const {
Region &CurRegion = Context.CurRegion;
Instruction *TI = BB.getTerminator();
if (AllowUnreachable && isa<UnreachableInst>(TI))
return true;
// Return instructions are only valid if the region is the top level region.
if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion())
return true;
Value *Condition = getConditionFromTerminator(TI);
if (!Condition)
return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB);
// UndefValue is not allowed as condition.
if (isa<UndefValue>(Condition))
return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB);
if (BranchInst *BI = dyn_cast<BranchInst>(TI))
return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
SwitchInst *SI = dyn_cast<SwitchInst>(TI);
assert(SI && "Terminator was neither branch nor switch");
return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
}
bool ScopDetection::isValidCallInst(CallInst &CI,
DetectionContext &Context) const {
if (CI.doesNotReturn())
return false;
if (CI.doesNotAccessMemory())
return true;
if (auto *II = dyn_cast<IntrinsicInst>(&CI))
if (isValidIntrinsicInst(*II, Context))
return true;
Function *CalledFunction = CI.getCalledFunction();
// Indirect calls are not supported.
if (CalledFunction == nullptr)
return false;
if (isDebugCall(&CI)) {
LLVM_DEBUG(dbgs() << "Allow call to debug function: "
<< CalledFunction->getName() << '\n');
return true;
}
if (AllowModrefCall) {
switch (AA.getModRefBehavior(CalledFunction)) {
case FMRB_UnknownModRefBehavior:
return false;
case FMRB_DoesNotAccessMemory:
case FMRB_OnlyReadsMemory:
// Implicitly disable delinearization since we have an unknown
// accesses with an unknown access function.
Context.HasUnknownAccess = true;
// Explicitly use addUnknown so we don't put a loop-variant
// pointer into the alias set.
Context.AST.addUnknown(&CI);
return true;
case FMRB_OnlyReadsArgumentPointees:
case FMRB_OnlyAccessesArgumentPointees:
for (const auto &Arg : CI.arg_operands()) {
if (!Arg->getType()->isPointerTy())
continue;
// Bail if a pointer argument has a base address not known to
// ScalarEvolution. Note that a zero pointer is acceptable.
auto *ArgSCEV = SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent()));
if (ArgSCEV->isZero())
continue;
auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
if (!BP)
return false;
// Implicitly disable delinearization since we have an unknown
// accesses with an unknown access function.
Context.HasUnknownAccess = true;
}
// Explicitly use addUnknown so we don't put a loop-variant
// pointer into the alias set.
Context.AST.addUnknown(&CI);
return true;
case FMRB_DoesNotReadMemory:
case FMRB_OnlyAccessesInaccessibleMem:
case FMRB_OnlyAccessesInaccessibleOrArgMem:
return false;
}
}
return false;
}
bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
DetectionContext &Context) const {
if (isIgnoredIntrinsic(&II))
return true;
// The closest loop surrounding the call instruction.
Loop *L = LI.getLoopFor(II.getParent());
// The access function and base pointer for memory intrinsics.
const SCEV *AF;
const SCEVUnknown *BP;
switch (II.getIntrinsicID()) {
// Memory intrinsics that can be represented are supported.
case Intrinsic::memmove:
case Intrinsic::memcpy:
AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L);
if (!AF->isZero()) {
BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
// Bail if the source pointer is not valid.
if (!isValidAccess(&II, AF, BP, Context))
return false;
}
LLVM_FALLTHROUGH;
case Intrinsic::memset:
AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L);
if (!AF->isZero()) {
BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF));
// Bail if the destination pointer is not valid.
if (!isValidAccess(&II, AF, BP, Context))
return false;
}
// Bail if the length is not affine.
if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L,
Context))
return false;
return true;
default:
break;
}
return false;
}
bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
DetectionContext &Ctx) const {
// A reference to function argument or constant value is invariant.
if (isa<Argument>(Val) || isa<Constant>(Val))
return true;
Instruction *I = dyn_cast<Instruction>(&Val);
if (!I)
return false;
if (!Reg.contains(I))
return true;
// Loads within the SCoP may read arbitrary values, need to hoist them. If it
// is not hoistable, it will be rejected later, but here we assume it is and
// that makes the value invariant.
if (auto LI = dyn_cast<LoadInst>(I)) {
Ctx.RequiredILS.insert(LI);
return true;
}
[ScopDetection] Only allow SCoP-wide available base pointers. Simplify ScopDetection::isInvariant(). Essentially deny everything that is defined within the SCoP and is not load-hoisted. The previous understanding of "invariant" has a few holes: - Expressions without side-effects with only invariant arguments, but are defined withing the SCoP's region with the exception of selects and PHIs. These should be part of the index expression derived by ScalarEvolution and not of the base pointer. - Function calls with that are !mayHaveSideEffects() (typically functions with "readnone nounwind" attributes). An example is given below. @C = external global i32 declare float* @getNextBasePtr(float*) readnone nounwind ... %ptr = call float* @getNextBasePtr(float* %A, float %B) The call might return: * %A, so %ptr aliases with it in the SCoP * %B, so %ptr aliases with it in the SCoP * @C, so %ptr aliases with it in the SCoP * a new pointer everytime it is called, such as malloc() * a pointer into the allocated block of one of the aforementioned * any of the above, at random at each call Hence and contrast to a comment in the base_pointer.ll regression test, %ptr is not necessarily the same all the time. It might also alias with anything and no AliasAnalysis can tell otherwise if the definition is external. It is hence not suitable in the role of a base pointer. The practical problem with base pointers defined in SCoP statements is that it is not available globally in the SCoP. The statement instance must be executed first before the base pointer can be used. This is no problem if the base pointer is transferred as a scalar value between statements. Uses of MemoryAccess::setNewAccessRelation may add a use of the base pointer anywhere in the array. setNewAccessRelation is used by JSONImporter, DeLICM and D28518. Indeed, BlockGenerator currently assumes that base pointers are available globally and generates invalid code for new access relation (referring to the base pointer of the original code) if not, even if the base pointer would be available in the statement. This could be fixed with some added complexity and restrictions. The ExprBuilder must lookup the local BBMap and code that call setNewAccessRelation must check whether the base pointer is available first. The code would still be incorrect in the presence of aliasing. There is the switch -polly-ignore-aliasing to explicitly allow this, but it is hardly a justification for the additional complexity. It would still be mostly useless because in most cases either getNextBasePtr() has external linkage in which case the readnone nounwind attributes cannot be derived in the translation unit itself, or is defined in the same translation unit and gets inlined. Reviewed By: grosser Differential Revision: https://reviews.llvm.org/D30695 llvm-svn: 297281
2017-03-08 23:14:46 +08:00
return false;
}
namespace {
/// Remove smax of smax(0, size) expressions from a SCEV expression and
/// register the '...' components.
///
/// Array access expressions as they are generated by GFortran contain smax(0,
/// size) expressions that confuse the 'normal' delinearization algorithm.
/// However, if we extract such expressions before the normal delinearization
/// takes place they can actually help to identify array size expressions in
/// Fortran accesses. For the subsequently following delinearization the smax(0,
/// size) component can be replaced by just 'size'. This is correct as we will
/// always add and verify the assumption that for all subscript expressions
/// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
/// that 0 <= size, which means smax(0, size) == size.
class SCEVRemoveMax : public SCEVRewriteVisitor<SCEVRemoveMax> {
public:
SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
: SCEVRewriteVisitor(SE), Terms(Terms) {}
static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
std::vector<const SCEV *> *Terms = nullptr) {
SCEVRemoveMax Rewriter(SE, Terms);
return Rewriter.visit(Scev);
}
const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) {
auto Res = visit(Expr->getOperand(1));
if (Terms)
(*Terms).push_back(Res);
return Res;
}
return Expr;
}
private:
std::vector<const SCEV *> *Terms;
};
} // namespace
SmallVector<const SCEV *, 4>
ScopDetection::getDelinearizationTerms(DetectionContext &Context,
const SCEVUnknown *BasePointer) const {
SmallVector<const SCEV *, 4> Terms;
for (const auto &Pair : Context.Accesses[BasePointer]) {
std::vector<const SCEV *> MaxTerms;
SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms);
if (!MaxTerms.empty()) {
Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end());
continue;
}
// In case the outermost expression is a plain add, we check if any of its
// terms has the form 4 * %inst * %param * %param ..., aka a term that
// contains a product between a parameter and an instruction that is
// inside the scop. Such instructions, if allowed at all, are instructions
// SCEV can not represent, but Polly is still looking through. As a
// result, these instructions can depend on induction variables and are
// most likely no array sizes. However, terms that are multiplied with
// them are likely candidates for array sizes.
if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) {
for (auto Op : AF->operands()) {
if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op))
SE.collectParametricTerms(AF2, Terms);
if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) {
SmallVector<const SCEV *, 0> Operands;
for (auto *MulOp : AF2->operands()) {
if (auto *Const = dyn_cast<SCEVConstant>(MulOp))
Operands.push_back(Const);
if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) {
if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) {
if (!Context.CurRegion.contains(Inst))
Operands.push_back(MulOp);
} else {
Operands.push_back(MulOp);
}
}
}
if (Operands.size())
Terms.push_back(SE.getMulExpr(Operands));
}
}
}
if (Terms.empty())
SE.collectParametricTerms(Pair.second, Terms);
}
return Terms;
}
bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
SmallVectorImpl<const SCEV *> &Sizes,
const SCEVUnknown *BasePointer,
Loop *Scope) const {
// If no sizes were found, all sizes are trivially valid. We allow this case
// to make it possible to pass known-affine accesses to the delinearization to
// try to recover some interesting multi-dimensional accesses, but to still
// allow the already known to be affine access in case the delinearization
// fails. In such situations, the delinearization will just return a Sizes
// array of size zero.
if (Sizes.size() == 0)
return true;
Value *BaseValue = BasePointer->getValue();
Region &CurRegion = Context.CurRegion;
for (const SCEV *DelinearizedSize : Sizes) {
// Don't pass down the scope to isAfffine; array dimensions must be
// invariant across the entire scop.
if (!isAffine(DelinearizedSize, nullptr, Context)) {
Sizes.clear();
break;
}
if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) {
auto *V = dyn_cast<Value>(Unknown->getValue());
if (auto *Load = dyn_cast<LoadInst>(V)) {
if (Context.CurRegion.contains(Load) &&
isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS))
Context.RequiredILS.insert(Load);
continue;
}
}
if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false,
Context.RequiredILS))
return invalid<ReportNonAffineAccess>(
Context, /*Assert=*/true, DelinearizedSize,
Context.Accesses[BasePointer].front().first, BaseValue);
}
// No array shape derived.
if (Sizes.empty()) {
if (AllowNonAffine)
return true;
for (const auto &Pair : Context.Accesses[BasePointer]) {
const Instruction *Insn = Pair.first;
const SCEV *AF = Pair.second;
if (!isAffine(AF, Scope, Context)) {
invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn,
BaseValue);
if (!KeepGoing)
return false;
}
}
return false;
}
return true;
}
// We first store the resulting memory accesses in TempMemoryAccesses. Only
// if the access functions for all memory accesses have been successfully
// delinearized we continue. Otherwise, we either report a failure or, if
// non-affine accesses are allowed, we drop the information. In case the
// information is dropped the memory accesses need to be overapproximated
// when translated to a polyhedral representation.
bool ScopDetection::computeAccessFunctions(
DetectionContext &Context, const SCEVUnknown *BasePointer,
std::shared_ptr<ArrayShape> Shape) const {
Value *BaseValue = BasePointer->getValue();
bool BasePtrHasNonAffine = false;
MapInsnToMemAcc TempMemoryAccesses;
for (const auto &Pair : Context.Accesses[BasePointer]) {
const Instruction *Insn = Pair.first;
auto *AF = Pair.second;
AF = SCEVRemoveMax::rewrite(AF, SE);
bool IsNonAffine = false;
TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape)));
MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second;
auto *Scope = LI.getLoopFor(Insn->getParent());
if (!AF) {
if (isAffine(Pair.second, Scope, Context))
Acc->DelinearizedSubscripts.push_back(Pair.second);
else
IsNonAffine = true;
} else {
if (Shape->DelinearizedSizes.size() == 0) {
Acc->DelinearizedSubscripts.push_back(AF);
} else {
SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts,
Shape->DelinearizedSizes);
if (Acc->DelinearizedSubscripts.size() == 0)
IsNonAffine = true;
}
for (const SCEV *S : Acc->DelinearizedSubscripts)
if (!isAffine(S, Scope, Context))
IsNonAffine = true;
}
// (Possibly) report non affine access
if (IsNonAffine) {
BasePtrHasNonAffine = true;
if (!AllowNonAffine)
invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second,
Insn, BaseValue);
if (!KeepGoing && !AllowNonAffine)
return false;
}
}
if (!BasePtrHasNonAffine)
Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(),
TempMemoryAccesses.end());
return true;
}
bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
const SCEVUnknown *BasePointer,
Loop *Scope) const {
auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
auto Terms = getDelinearizationTerms(Context, BasePointer);
SE.findArrayDimensions(Terms, Shape->DelinearizedSizes,
Context.ElementSize[BasePointer]);
if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer,
Scope))
return false;
return computeAccessFunctions(Context, BasePointer, Shape);
}
bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
// TODO: If we have an unknown access and other non-affine accesses we do
// not try to delinearize them for now.
if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
return AllowNonAffine;
for (auto &Pair : Context.NonAffineAccesses) {
auto *BasePointer = Pair.first;
auto *Scope = Pair.second;
if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
if (KeepGoing)
continue;
else
return false;
}
}
return true;
}
bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
const SCEVUnknown *BP,
DetectionContext &Context) const {
if (!BP)
return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst);
auto *BV = BP->getValue();
if (isa<UndefValue>(BV))
return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst);
// FIXME: Think about allowing IntToPtrInst
if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV))
return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst);
// Check that the base address of the access is invariant in the current
// region.
if (!isInvariant(*BV, Context.CurRegion, Context))
return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst);
AF = SE.getMinusSCEV(AF, BP);
const SCEV *Size;
if (!isa<MemIntrinsic>(Inst)) {
Size = SE.getElementSize(Inst);
} else {
auto *SizeTy =
SE.getEffectiveSCEVType(PointerType::getInt8PtrTy(SE.getContext()));
Size = SE.getConstant(SizeTy, 8);
}
if (Context.ElementSize[BP]) {
if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
Inst, BV);
Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]);
} else {
Context.ElementSize[BP] = Size;
}
bool IsVariantInNonAffineLoop = false;
SetVector<const Loop *> Loops;
findLoops(AF, Loops);
for (const Loop *L : Loops)
if (Context.BoxedLoopsSet.count(L))
IsVariantInNonAffineLoop = true;
auto *Scope = LI.getLoopFor(Inst->getParent());
bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context);
// Do not try to delinearize memory intrinsics and force them to be affine.
if (isa<MemIntrinsic>(Inst) && !IsAffine) {
return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
BV);
} else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
Context.Accesses[BP].push_back({Inst, AF});
if (!IsAffine || hasIVParams(AF))
Context.NonAffineAccesses.insert(
std::make_pair(BP, LI.getLoopFor(Inst->getParent())));
} else if (!AllowNonAffine && !IsAffine) {
return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst,
BV);
}
if (IgnoreAliasing)
return true;
// Check if the base pointer of the memory access does alias with
// any other pointer. This cannot be handled at the moment.
AAMDNodes AATags;
Inst->getAAMetadata(AATags);
AliasSet &AS = Context.AST.getAliasSetFor(
MemoryLocation(BP->getValue(), MemoryLocation::UnknownSize, AATags));
if (!AS.isMustAlias()) {
if (PollyUseRuntimeAliasChecks) {
bool CanBuildRunTimeCheck = true;
// The run-time alias check places code that involves the base pointer at
// the beginning of the SCoP. This breaks if the base pointer is defined
// inside the scop. Hence, we can only create a run-time check if we are
// sure the base pointer is not an instruction defined inside the scop.
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-08 04:17:36 +08:00
// However, we can ignore loads that will be hoisted.
InvariantLoadsSetTy VariantLS, InvariantLS;
// In order to detect loads which are dependent on other invariant loads
// as invariant, we use fixed-point iteration method here i.e we iterate
// over the alias set for arbitrary number of times until it is safe to
// assume that all the invariant loads have been detected
while (1) {
const unsigned int VariantSize = VariantLS.size(),
InvariantSize = InvariantLS.size();
for (const auto &Ptr : AS) {
Instruction *Inst = dyn_cast<Instruction>(Ptr.getValue());
if (Inst && Context.CurRegion.contains(Inst)) {
auto *Load = dyn_cast<LoadInst>(Inst);
if (Load && InvariantLS.count(Load))
continue;
if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT,
InvariantLS)) {
if (VariantLS.count(Load))
VariantLS.remove(Load);
Context.RequiredILS.insert(Load);
InvariantLS.insert(Load);
} else {
CanBuildRunTimeCheck = false;
VariantLS.insert(Load);
}
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-08 04:17:36 +08:00
}
}
Allow invariant loads in the SCoP description This patch allows invariant loads to be used in the SCoP description, e.g., as loop bounds, conditions or in memory access functions. First we collect "required invariant loads" during SCoP detection that would otherwise make an expression we care about non-affine. To this end a new level of abstraction was introduced before SCEVValidator::isAffineExpr() namely ScopDetection::isAffine() and ScopDetection::onlyValidRequiredInvariantLoads(). Here we can decide if we want a load inside the region to be optimistically assumed invariant or not. If we do, it will be marked as required and in the SCoP generation we bail if it is actually not invariant. If we don't it will be a non-affine expression as before. At the moment we optimistically assume all "hoistable" (namely non-loop-carried) loads to be invariant. This causes us to expand some SCoPs and dismiss them later but it also allows us to detect a lot we would dismiss directly if we would ask e.g., AliasAnalysis::canBasicBlockModify(). We also allow potential aliases between optimistically assumed invariant loads and other pointers as our runtime alias checks are sound in case the loads are actually invariant. Together with the invariant checks this combination allows to handle a lot more than LICM can. The code generation of the invariant loads had to be extended as we can now have dependences between parameters and invariant (hoisted) loads as well as the other way around, e.g., test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll First, it is important to note that we cannot have real cycles but only dependences from a hoisted load to a parameter and from another parameter to that hoisted load (and so on). To handle such cases we materialize llvm::Values for parameters that are referred by a hoisted load on demand and then materialize the remaining parameters. Second, there are new kinds of dependences between hoisted loads caused by the constraints on their execution. If a hoisted load is conditionally executed it might depend on the value of another hoisted load. To deal with such situations we sort them already in the ScopInfo such that they can be generated in the order they are listed in the Scop::InvariantAccesses list (see compareInvariantAccesses). The dependences between hoisted loads caused by indirect accesses are handled the same way as before. llvm-svn: 249607
2015-10-08 04:17:36 +08:00
if (InvariantSize == InvariantLS.size() &&
VariantSize == VariantLS.size())
break;
}
if (CanBuildRunTimeCheck)
return true;
}
return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS);
}
return true;
}
bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
DetectionContext &Context) const {
Value *Ptr = Inst.getPointerOperand();
Loop *L = LI.getLoopFor(Inst->getParent());
const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L);
const SCEVUnknown *BasePointer;
BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
return isValidAccess(Inst, AccessFunction, BasePointer, Context);
}
bool ScopDetection::isValidInstruction(Instruction &Inst,
DetectionContext &Context) const {
for (auto &Op : Inst.operands()) {
auto *OpInst = dyn_cast<Instruction>(&Op);
if (!OpInst)
continue;
if (isErrorBlock(*OpInst->getParent(), Context.CurRegion, LI, DT)) {
auto *PHI = dyn_cast<PHINode>(OpInst);
if (PHI) {
for (User *U : PHI->users()) {
auto *UI = dyn_cast<Instruction>(U);
if (!UI || !UI->isTerminator())
return false;
}
} else {
return false;
}
}
}
if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst))
return false;
// We only check the call instruction but not invoke instruction.
if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
if (isValidCallInst(*CI, Context))
return true;
return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst);
}
if (!Inst.mayReadOrWriteMemory()) {
if (!isa<AllocaInst>(Inst))
return true;
return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst);
}
// Check the access function.
if (auto MemInst = MemAccInst::dyn_cast(Inst)) {
Context.hasStores |= isa<StoreInst>(MemInst);
Context.hasLoads |= isa<LoadInst>(MemInst);
if (!MemInst.isSimple())
return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
&Inst);
return isValidMemoryAccess(MemInst, Context);
}
// We do not know this instruction, therefore we assume it is invalid.
return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst);
}
/// Check whether @p L has exiting blocks.
///
/// @param L The loop of interest
///
/// @return True if the loop has exiting blocks, false otherwise.
static bool hasExitingBlocks(Loop *L) {
SmallVector<BasicBlock *, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
return !ExitingBlocks.empty();
}
bool ScopDetection::canUseISLTripCount(Loop *L,
DetectionContext &Context) const {
// Ensure the loop has valid exiting blocks as well as latches, otherwise we
// need to overapproximate it as a boxed loop.
SmallVector<BasicBlock *, 4> LoopControlBlocks;
L->getExitingBlocks(LoopControlBlocks);
L->getLoopLatches(LoopControlBlocks);
for (BasicBlock *ControlBB : LoopControlBlocks) {
if (!isValidCFG(*ControlBB, true, false, Context))
return false;
}
// We can use ISL to compute the trip count of L.
return true;
}
bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const {
// Loops that contain part but not all of the blocks of a region cannot be
// handled by the schedule generation. Such loop constructs can happen
// because a region can contain BBs that have no path to the exit block
// (Infinite loops, UnreachableInst), but such blocks are never part of a
// loop.
//
// _______________
// | Loop Header | <-----------.
// --------------- |
// | |
// _______________ ______________
// | RegionEntry |-----> | RegionExit |----->
// --------------- --------------
// |
// _______________
// | EndlessLoop | <--.
// --------------- |
// | |
// \------------/
//
// In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
// neither entirely contained in the region RegionEntry->RegionExit
// (containing RegionEntry,EndlessLoop) nor is the region entirely contained
// in the loop.
// The block EndlessLoop is contained in the region because Region::contains
// tests whether it is not dominated by RegionExit. This is probably to not
// having to query the PostdominatorTree. Instead of an endless loop, a dead
// end can also be formed by an UnreachableInst. This case is already caught
// by isErrorBlock(). We hence only have to reject endless loops here.
if (!hasExitingBlocks(L))
return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L);
[ScopDetect] Reject loop with multiple exit blocks. The current statement domain derivation algorithm does not (always) consider that different exit blocks of a loop can have different conditions to be reached. From the code for (int i = n; ; i-=2) { if (i <= 0) goto even; if (i <= 1) goto odd; A[i] = i; } even: A[0] = 42; return; odd: A[1] = 21; return; Polly currently derives the following domains: Stmt_even_critedge Domain := [n] -> { Stmt_even_critedge[] }; Stmt_odd Domain := [n] -> { Stmt_odd[] : (1 + n) mod 2 = 0 and n > 0 }; while the domain for the odd case is correct, Stmt_even is assumed to be executed unconditionally, which is obviously wrong. While projecting out the loop dimension in `adjustDomainDimensions`, it does not consider that there are other exit condition that have matched before. I don't know a how to fix this without changing a lot of code. Therefore This patch rejects loops with multiple exist blocks to fix the miscompile of test-suite's uuencode. The odd condition is transformed by LLVM to %cmp1 = icmp eq i64 %indvars.iv, 1 such that the project_out in adjustDomainDimensions() indeed only matches for odd n (using this condition only, we'd have an infinite loop otherwise). The even condition manifests as %cmp = icmp slt i64 %indvars.iv, 3 Because buildDomainsWithBranchConstraints() does not consider other exit conditions, it has to assume that the induction variable will eventually be lower than 3 and taking this exit. IMHO we need to reuse the algorithm that determines the number of iterations (addLoopBoundsToHeaderDomain) to determine which exit condition applies first. It has to happen in buildDomainsWithBranchConstraints() because the result will need to propagate to successor BBs. Currently addLoopBoundsToHeaderDomain() just look for union of all backedge conditions (which means leaving not the loop here). The patch in llvm.org/PR35465 changes it to look for exit conditions instead. This is required because there might be other exit conditions that do not alternatively go back to the loop header. Differential Revision: https://reviews.llvm.org/D45649 llvm-svn: 330858
2018-04-26 02:53:33 +08:00
// The algorithm for domain construction assumes that loops has only a single
// exit block (and hence corresponds to a subregion). Note that we cannot use
// L->getExitBlock() because it does not check whether all exiting edges point
// to the same BB.
SmallVector<BasicBlock *, 4> ExitBlocks;
L->getExitBlocks(ExitBlocks);
BasicBlock *TheExitBlock = ExitBlocks[0];
for (BasicBlock *ExitBB : ExitBlocks) {
if (TheExitBlock != ExitBB)
return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L);
}
if (canUseISLTripCount(L, Context))
return true;
if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
Region *R = RI.getRegionFor(L->getHeader());
while (R != &Context.CurRegion && !R->contains(L))
R = R->getParent();
if (addOverApproximatedRegion(R, Context))
return true;
}
const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount);
}
/// Return the number of loops in @p L (incl. @p L) that have a trip
/// count that is not known to be less than @MinProfitableTrips.
ScopDetection::LoopStats
ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
unsigned MinProfitableTrips) {
auto *TripCount = SE.getBackedgeTakenCount(L);
int NumLoops = 1;
int MaxLoopDepth = 1;
if (MinProfitableTrips > 0)
if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount))
if (TripCountC->getType()->getScalarSizeInBits() <= 64)
if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
NumLoops -= 1;
for (auto &SubLoop : *L) {
LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
NumLoops += Stats.NumLoops;
MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1);
}
return {NumLoops, MaxLoopDepth};
}
ScopDetection::LoopStats
ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
LoopInfo &LI, unsigned MinProfitableTrips) {
int LoopNum = 0;
int MaxLoopDepth = 0;
auto L = LI.getLoopFor(R->getEntry());
// If L is fully contained in R, move to first loop surrounding R. Otherwise,
// L is either nullptr or already surrounding R.
if (L && R->contains(L)) {
L = R->outermostLoopInRegion(L);
L = L->getParentLoop();
}
auto SubLoops =
L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
for (auto &SubLoop : SubLoops)
if (R->contains(SubLoop)) {
LoopStats Stats =
countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips);
LoopNum += Stats.NumLoops;
MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth);
}
return {LoopNum, MaxLoopDepth};
}
Region *ScopDetection::expandRegion(Region &R) {
// Initial no valid region was found (greater than R)
std::unique_ptr<Region> LastValidRegion;
auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
while (ExpandedRegion) {
const auto &It = DetectionContextMap.insert(std::make_pair(
getBBPairForRegion(ExpandedRegion.get()),
DetectionContext(*ExpandedRegion, AA, false /*verifying*/)));
DetectionContext &Context = It.first->second;
LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
// Only expand when we did not collect errors.
if (!Context.Log.hasErrors()) {
// If the exit is valid check all blocks
// - if true, a valid region was found => store it + keep expanding
// - if false, .tbd. => stop (should this really end the loop?)
if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
removeCachedResults(*ExpandedRegion);
DetectionContextMap.erase(It.first);
break;
}
// Store this region, because it is the greatest valid (encountered so
// far).
if (LastValidRegion) {
removeCachedResults(*LastValidRegion);
DetectionContextMap.erase(getBBPairForRegion(LastValidRegion.get()));
}
LastValidRegion = std::move(ExpandedRegion);
// Create and test the next greater region (if any)
ExpandedRegion =
std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
} else {
// Create and test the next greater region (if any)
removeCachedResults(*ExpandedRegion);
DetectionContextMap.erase(It.first);
ExpandedRegion =
std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
}
}
LLVM_DEBUG({
if (LastValidRegion)
dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
else
dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
});
return LastValidRegion.release();
}
static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
for (const BasicBlock *BB : R.blocks())
if (R.contains(LI.getLoopFor(BB)))
return false;
return true;
}
void ScopDetection::removeCachedResultsRecursively(const Region &R) {
for (auto &SubRegion : R) {
if (ValidRegions.count(SubRegion.get())) {
removeCachedResults(*SubRegion.get());
} else
removeCachedResultsRecursively(*SubRegion);
}
}
void ScopDetection::removeCachedResults(const Region &R) {
ValidRegions.remove(&R);
}
void ScopDetection::findScops(Region &R) {
const auto &It = DetectionContextMap.insert(std::make_pair(
getBBPairForRegion(&R), DetectionContext(R, AA, false /*verifying*/)));
DetectionContext &Context = It.first->second;
bool RegionIsValid = false;
if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R);
else
RegionIsValid = isValidRegion(Context);
bool HasErrors = !RegionIsValid || Context.Log.size() > 0;
if (HasErrors) {
removeCachedResults(R);
} else {
ValidRegions.insert(&R);
return;
}
for (auto &SubRegion : R)
findScops(*SubRegion);
// Try to expand regions.
//
// As the region tree normally only contains canonical regions, non canonical
// regions that form a Scop are not found. Therefore, those non canonical
// regions are checked by expanding the canonical ones.
std::vector<Region *> ToExpand;
for (auto &SubRegion : R)
ToExpand.push_back(SubRegion.get());
for (Region *CurrentRegion : ToExpand) {
// Skip invalid regions. Regions may become invalid, if they are element of
// an already expanded region.
if (!ValidRegions.count(CurrentRegion))
continue;
// Skip regions that had errors.
bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors();
if (HadErrors)
continue;
Region *ExpandedR = expandRegion(*CurrentRegion);
if (!ExpandedR)
continue;
R.addSubRegion(ExpandedR, true);
ValidRegions.insert(ExpandedR);
removeCachedResults(*CurrentRegion);
removeCachedResultsRecursively(*ExpandedR);
}
}
bool ScopDetection::allBlocksValid(DetectionContext &Context) const {
Region &CurRegion = Context.CurRegion;
for (const BasicBlock *BB : CurRegion.blocks()) {
Loop *L = LI.getLoopFor(BB);
if (L && L->getHeader() == BB) {
if (CurRegion.contains(L)) {
if (!isValidLoop(L, Context) && !KeepGoing)
return false;
} else {
SmallVector<BasicBlock *, 1> Latches;
L->getLoopLatches(Latches);
for (BasicBlock *Latch : Latches)
if (CurRegion.contains(Latch))
return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
L);
}
}
}
for (BasicBlock *BB : CurRegion.blocks()) {
bool IsErrorBlock = isErrorBlock(*BB, CurRegion, LI, DT);
// Also check exception blocks (and possibly register them as non-affine
// regions). Even though exception blocks are not modeled, we use them
// to forward-propagate domain constraints during ScopInfo construction.
if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing)
return false;
if (IsErrorBlock)
continue;
for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
if (!isValidInstruction(*I, Context) && !KeepGoing)
return false;
}
if (!hasAffineMemoryAccesses(Context))
return false;
return true;
}
bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
int NumLoops) const {
int InstCount = 0;
if (NumLoops == 0)
return false;
for (auto *BB : Context.CurRegion.blocks())
if (Context.CurRegion.contains(LI.getLoopFor(BB)))
InstCount += BB->size();
InstCount = InstCount / NumLoops;
return InstCount >= ProfitabilityMinPerLoopInstructions;
}
bool ScopDetection::hasPossiblyDistributableLoop(
DetectionContext &Context) const {
for (auto *BB : Context.CurRegion.blocks()) {
auto *L = LI.getLoopFor(BB);
if (!Context.CurRegion.contains(L))
continue;
if (Context.BoxedLoopsSet.count(L))
continue;
unsigned StmtsWithStoresInLoops = 0;
for (auto *LBB : L->blocks()) {
bool MemStore = false;
for (auto &I : *LBB)
MemStore |= isa<StoreInst>(&I);
StmtsWithStoresInLoops += MemStore;
}
return (StmtsWithStoresInLoops > 1);
}
return false;
}
bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
Region &CurRegion = Context.CurRegion;
if (PollyProcessUnprofitable)
return true;
// We can probably not do a lot on scops that only write or only read
// data.
if (!Context.hasStores || !Context.hasLoads)
return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
int NumLoops =
countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops;
int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
// Scops with at least two loops may allow either loop fusion or tiling and
// are consequently interesting to look at.
if (NumAffineLoops >= 2)
return true;
// A loop with multiple non-trivial blocks might be amendable to distribution.
if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
return true;
// Scops that contain a loop with a non-trivial amount of computation per
// loop-iteration are interesting as we may be able to parallelize such
// loops. Individual loops that have only a small amount of computation
// per-iteration are performance-wise very fragile as any change to the
// loop induction variables may affect performance. To not cause spurious
// performance regressions, we do not consider such loops.
if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
return true;
return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion);
}
bool ScopDetection::isValidRegion(DetectionContext &Context) const {
Region &CurRegion = Context.CurRegion;
LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
LLVM_DEBUG(dbgs() << "Top level region is invalid\n");
return false;
}
DebugLoc DbgLoc;
if (CurRegion.getExit() &&
isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) {
LLVM_DEBUG(dbgs() << "Unreachable in exit\n");
return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
CurRegion.getExit(), DbgLoc);
}
if (!CurRegion.getEntry()->getName().count(OnlyRegion)) {
LLVM_DEBUG({
dbgs() << "Region entry does not match -polly-region-only";
dbgs() << "\n";
});
return false;
}
// SCoP cannot contain the entry block of the function, because we need
// to insert alloca instruction there when translate scalar to array.
if (!PollyAllowFullFunction &&
CurRegion.getEntry() ==
&(CurRegion.getEntry()->getParent()->getEntryBlock()))
return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry());
if (!allBlocksValid(Context))
return false;
if (!isReducibleRegion(CurRegion, DbgLoc))
return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
&CurRegion, DbgLoc);
LLVM_DEBUG(dbgs() << "OK\n");
return true;
}
void ScopDetection::markFunctionAsInvalid(Function *F) {
F->addFnAttr(PollySkipFnAttr);
}
bool ScopDetection::isValidFunction(Function &F) {
return !F.hasFnAttribute(PollySkipFnAttr);
}
void ScopDetection::printLocations(Function &F) {
for (const Region *R : *this) {
unsigned LineEntry, LineExit;
std::string FileName;
getDebugLocation(R, LineEntry, LineExit, FileName);
DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
F.getContext().diagnose(Diagnostic);
}
}
void ScopDetection::emitMissedRemarks(const Function &F) {
for (auto &DIt : DetectionContextMap) {
auto &DC = DIt.getSecond();
if (DC.Log.hasErrors())
emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE);
}
}
bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
/// Enum for coloring BBs in Region.
///
/// WHITE - Unvisited BB in DFS walk.
/// GREY - BBs which are currently on the DFS stack for processing.
/// BLACK - Visited and completely processed BB.
enum Color { WHITE, GREY, BLACK };
BasicBlock *REntry = R.getEntry();
BasicBlock *RExit = R.getExit();
// Map to match the color of a BasicBlock during the DFS walk.
DenseMap<const BasicBlock *, Color> BBColorMap;
// Stack keeping track of current BB and index of next child to be processed.
std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
unsigned AdjacentBlockIndex = 0;
BasicBlock *CurrBB, *SuccBB;
CurrBB = REntry;
// Initialize the map for all BB with WHITE color.
for (auto *BB : R.blocks())
BBColorMap[BB] = WHITE;
// Process the entry block of the Region.
BBColorMap[CurrBB] = GREY;
DFSStack.push(std::make_pair(CurrBB, 0));
while (!DFSStack.empty()) {
// Get next BB on stack to be processed.
CurrBB = DFSStack.top().first;
AdjacentBlockIndex = DFSStack.top().second;
DFSStack.pop();
// Loop to iterate over the successors of current BB.
const Instruction *TInst = CurrBB->getTerminator();
unsigned NSucc = TInst->getNumSuccessors();
for (unsigned I = AdjacentBlockIndex; I < NSucc;
++I, ++AdjacentBlockIndex) {
SuccBB = TInst->getSuccessor(I);
// Checks for region exit block and self-loops in BB.
if (SuccBB == RExit || SuccBB == CurrBB)
continue;
// WHITE indicates an unvisited BB in DFS walk.
if (BBColorMap[SuccBB] == WHITE) {
// Push the current BB and the index of the next child to be visited.
DFSStack.push(std::make_pair(CurrBB, I + 1));
// Push the next BB to be processed.
DFSStack.push(std::make_pair(SuccBB, 0));
// First time the BB is being processed.
BBColorMap[SuccBB] = GREY;
break;
} else if (BBColorMap[SuccBB] == GREY) {
// GREY indicates a loop in the control flow.
// If the destination dominates the source, it is a natural loop
// else, an irreducible control flow in the region is detected.
if (!DT.dominates(SuccBB, CurrBB)) {
// Get debug info of instruction which causes irregular control flow.
DbgLoc = TInst->getDebugLoc();
return false;
}
}
}
// If all children of current BB have been processed,
// then mark that BB as fully processed.
if (AdjacentBlockIndex == NSucc)
BBColorMap[CurrBB] = BLACK;
}
return true;
}
static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
bool OnlyProfitable) {
if (!OnlyProfitable) {
NumLoopsInScop += Stats.NumLoops;
MaxNumLoopsInScop =
std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops);
if (Stats.MaxDepth == 0)
NumScopsDepthZero++;
else if (Stats.MaxDepth == 1)
NumScopsDepthOne++;
else if (Stats.MaxDepth == 2)
NumScopsDepthTwo++;
else if (Stats.MaxDepth == 3)
NumScopsDepthThree++;
else if (Stats.MaxDepth == 4)
NumScopsDepthFour++;
else if (Stats.MaxDepth == 5)
NumScopsDepthFive++;
else
NumScopsDepthLarger++;
} else {
NumLoopsInProfScop += Stats.NumLoops;
MaxNumLoopsInProfScop =
std::max(MaxNumLoopsInProfScop.getValue(), (unsigned)Stats.NumLoops);
if (Stats.MaxDepth == 0)
NumProfScopsDepthZero++;
else if (Stats.MaxDepth == 1)
NumProfScopsDepthOne++;
else if (Stats.MaxDepth == 2)
NumProfScopsDepthTwo++;
else if (Stats.MaxDepth == 3)
NumProfScopsDepthThree++;
else if (Stats.MaxDepth == 4)
NumProfScopsDepthFour++;
else if (Stats.MaxDepth == 5)
NumProfScopsDepthFive++;
else
NumProfScopsDepthLarger++;
}
}
ScopDetection::DetectionContext *
ScopDetection::getDetectionContext(const Region *R) const {
auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R));
if (DCMIt == DetectionContextMap.end())
return nullptr;
return &DCMIt->second;
}
const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
const DetectionContext *DC = getDetectionContext(R);
return DC ? &DC->Log : nullptr;
}
void ScopDetection::verifyRegion(const Region &R) const {
assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
isValidRegion(Context);
}
void ScopDetection::verifyAnalysis() const {
Check scops a second time before working on them In rare cases the modification of one scop can effect the validity of other scops, as code generation of an earlier scop may make the scalar evolution functions derived for later scops less precise. The example that triggered this patch was a scop that contained an 'or' expression as follows: %add13710 = or i32 %j.19, 1 --> {(1 + (4 * %l)),+,2}<nsw><%for.body81> Scev could only analyze the 'or' as it knew %j.19 is a multiple of 2. This information was not available after the first scop was code generated (or independent-blocks was run on it) and SCEV could not derive a precise SCEV expression any more. This means we could not any more code generate this SCoP. My current understanding is that there is always the risk that an earlier code generation change invalidates later scops. As the example we have seen here is difficult to avoid, we use this occasion to guard us against all such invalidations. This patch "solves" this issue by verifying right before we start working on a detected scop, if this scop is in fact still valid. This adds a certain overhead. However the verification we run is anyways very fast and secondly it is only run on detected scops. So the overhead should not be very large. As a later optimization we could detect scops only on demand, such that we need to run scop-detections always only a single time. This should fix the single last failure in the LLVM test-suite for the new scev-based code generation. llvm-svn: 201593
2014-02-19 02:49:49 +08:00
if (!VerifyScops)
return;
for (const Region *R : ValidRegions)
verifyRegion(*R);
}
bool ScopDetectionWrapperPass::runOnFunction(Function &F) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
Result.reset(new ScopDetection(F, DT, SE, LI, RI, AA, ORE));
return false;
}
void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
// We also need AA and RegionInfo when we are verifying analysis.
AU.addRequiredTransitive<AAResultsWrapperPass>();
AU.addRequiredTransitive<RegionInfoPass>();
AU.setPreservesAll();
}
void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
for (const Region *R : Result->ValidRegions)
OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
OS << "\n";
}
ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
// Disable runtime alias checks if we ignore aliasing all together.
if (IgnoreAliasing)
PollyUseRuntimeAliasChecks = false;
}
ScopAnalysis::ScopAnalysis() {
// Disable runtime alias checks if we ignore aliasing all together.
if (IgnoreAliasing)
PollyUseRuntimeAliasChecks = false;
}
void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
char ScopDetectionWrapperPass::ID;
AnalysisKey ScopAnalysis::Key;
ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
auto &LI = FAM.getResult<LoopAnalysis>(F);
auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
auto &AA = FAM.getResult<AAManager>(F);
auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
return {F, DT, SE, LI, RI, AA, ORE};
}
PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
FunctionAnalysisManager &FAM) {
OS << "Detected Scops in Function " << F.getName() << "\n";
auto &SD = FAM.getResult<ScopAnalysis>(F);
for (const Region *R : SD.ValidRegions)
OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
OS << "\n";
return PreservedAnalyses::all();
}
Pass *polly::createScopDetectionWrapperPassPass() {
return new ScopDetectionWrapperPass();
}
INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect",
"Polly - Detect static control parts (SCoPs)", false,
false);
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
"Polly - Detect static control parts (SCoPs)", false, false)