llvm-project/polly/lib/Analysis/TempScopInfo.cpp

514 lines
17 KiB
C++
Raw Normal View History

//===---------- TempScopInfo.cpp - Extract TempScops ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Collect information about the control flow regions detected by the Scop
// detection, such that this information can be translated info its polyhedral
// representation.
//
//===----------------------------------------------------------------------===//
#include "polly/TempScopInfo.h"
#include "polly/CodeGen/BlockGenerators.h"
#include "polly/LinkAllPasses.h"
#include "polly/ScopDetection.h"
#include "polly/Support/GICHelper.h"
#include "polly/Support/SCEVValidator.h"
2013-05-07 16:11:54 +08:00
#include "polly/Support/ScopHelper.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
2013-05-07 16:11:54 +08:00
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
2013-05-07 16:11:54 +08:00
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
using namespace polly;
#define DEBUG_TYPE "polly-analyze-ir"
//===----------------------------------------------------------------------===//
/// Helper Classes
void IRAccess::print(raw_ostream &OS) const {
if (isRead())
OS << "Read ";
else {
if (isMayWrite())
OS << "May";
OS << "Write ";
}
OS << BaseAddress->getName() << '[' << *Offset << "]\n";
}
void Comparison::print(raw_ostream &OS) const {
// Not yet implemented.
}
/// Helper function to print the condition
static void printBBCond(raw_ostream &OS, const BBCond &Cond) {
assert(!Cond.empty() && "Unexpected empty condition!");
Cond[0].print(OS);
for (unsigned i = 1, e = Cond.size(); i != e; ++i) {
OS << " && ";
Cond[i].print(OS);
}
}
inline raw_ostream &operator<<(raw_ostream &OS, const BBCond &Cond) {
printBBCond(OS, Cond);
return OS;
}
//===----------------------------------------------------------------------===//
// TempScop implementation
2014-02-12 09:55:28 +08:00
TempScop::~TempScop() {}
void TempScop::print(raw_ostream &OS, ScalarEvolution *SE, LoopInfo *LI) const {
OS << "Scop: " << R.getNameStr() << "\n";
printDetail(OS, SE, LI, &R, 0);
}
void TempScop::printDetail(raw_ostream &OS, ScalarEvolution *SE, LoopInfo *LI,
const Region *CurR, unsigned ind) const {
// FIXME: Print other details rather than memory accesses.
for (const auto &CurBlock : CurR->blocks()) {
AccFuncMapType::const_iterator AccSetIt = AccFuncMap.find(CurBlock);
// Ignore trivial blocks that do not contain any memory access.
if (AccSetIt == AccFuncMap.end())
continue;
OS.indent(ind) << "BB: " << CurBlock->getName() << '\n';
typedef AccFuncSetType::const_iterator access_iterator;
const AccFuncSetType &AccFuncs = AccSetIt->second;
for (access_iterator AI = AccFuncs.begin(), AE = AccFuncs.end(); AI != AE;
++AI)
AI->first.print(OS.indent(ind + 2));
}
}
void TempScopInfo::buildPHIAccesses(PHINode *PHI, Region &R,
AccFuncSetType &Functions,
Region *NonAffineSubRegion) {
if (canSynthesize(PHI, LI, SE, &R))
return;
// PHI nodes are modeled as if they had been demoted prior to the SCoP
// detection. Hence, the PHI is a load of a new memory location in which the
// incoming value was written at the end of the incoming basic block.
bool Written = false;
for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
Value *Op = PHI->getIncomingValue(u);
BasicBlock *OpBB = PHI->getIncomingBlock(u);
if (!R.contains(OpBB))
continue;
// Do not build scalar dependences inside a non-affine subregion.
if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB))
continue;
Instruction *OpI = dyn_cast<Instruction>(Op);
if (OpI) {
BasicBlock *OpIBB = OpI->getParent();
// As we pretend there is a use (or more precise a write) of OpI in OpBB
// we have to insert a scalar dependence from the definition of OpI to
// OpBB if the definition is not in OpBB.
if (OpIBB != OpBB) {
IRAccess ScalarRead(IRAccess::READ, OpI, ZeroOffset, 1, true);
AccFuncMap[OpBB].push_back(std::make_pair(ScalarRead, PHI));
IRAccess ScalarWrite(IRAccess::MUST_WRITE, OpI, ZeroOffset, 1, true);
AccFuncMap[OpIBB].push_back(std::make_pair(ScalarWrite, OpI));
}
}
// If the operand is a constant, global or argument we need an access
// instruction and just choose the PHI.
if (!OpI)
OpI = PHI;
Written = true;
IRAccess ScalarAccess(IRAccess::MUST_WRITE, PHI, ZeroOffset, 1, true);
AccFuncMap[OpBB].push_back(std::make_pair(ScalarAccess, OpI));
}
if (Written) {
IRAccess ScalarAccess(IRAccess::READ, PHI, ZeroOffset, 1, true);
Functions.push_back(std::make_pair(ScalarAccess, PHI));
}
}
bool TempScopInfo::buildScalarDependences(Instruction *Inst, Region *R,
Region *NonAffineSubRegion) {
bool canSynthesizeInst = canSynthesize(Inst, LI, SE, R);
if (isIgnoredIntrinsic(Inst))
return false;
bool AnyCrossStmtUse = false;
BasicBlock *ParentBB = Inst->getParent();
for (User *U : Inst->users()) {
Instruction *UI = dyn_cast<Instruction>(U);
// Ignore the strange user
if (UI == 0)
continue;
BasicBlock *UseParent = UI->getParent();
// Ignore the users in the same BB (statement)
if (UseParent == ParentBB)
continue;
// Do not build scalar dependences inside a non-affine subregion.
if (NonAffineSubRegion && NonAffineSubRegion->contains(UseParent))
continue;
// Check whether or not the use is in the SCoP.
if (!R->contains(UseParent)) {
AnyCrossStmtUse = true;
continue;
}
// If the instruction can be synthesized and the user is in the region
// we do not need to add scalar dependences.
if (canSynthesizeInst)
continue;
// No need to translate these scalar dependences into polyhedral form,
// because synthesizable scalars can be generated by the code generator.
if (canSynthesize(UI, LI, SE, R))
continue;
// Skip PHI nodes in the region as they handle their operands on their own.
if (isa<PHINode>(UI))
continue;
// Now U is used in another statement.
AnyCrossStmtUse = true;
// Do not build a read access that is not in the current SCoP
// Use the def instruction as base address of the IRAccess, so that it will
2014-02-21 05:29:02 +08:00
// become the name of the scalar access in the polyhedral form.
IRAccess ScalarAccess(IRAccess::READ, Inst, ZeroOffset, 1, true);
AccFuncMap[UseParent].push_back(std::make_pair(ScalarAccess, UI));
}
return AnyCrossStmtUse;
}
extern MapInsnToMemAcc InsnToMemAcc;
IRAccess
TempScopInfo::buildIRAccess(Instruction *Inst, Loop *L, Region *R,
const ScopDetection::BoxedLoopsSetTy *BoxedLoops) {
unsigned Size;
Type *SizeType;
enum IRAccess::TypeKind Type;
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
SizeType = Load->getType();
Size = TD->getTypeStoreSize(SizeType);
Type = IRAccess::READ;
} else {
StoreInst *Store = cast<StoreInst>(Inst);
SizeType = Store->getValueOperand()->getType();
Size = TD->getTypeStoreSize(SizeType);
Type = IRAccess::MUST_WRITE;
}
const SCEV *AccessFunction = SE->getSCEVAtScope(getPointerOperand(*Inst), L);
const SCEVUnknown *BasePointer =
dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFunction));
assert(BasePointer && "Could not find base pointer");
AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer);
MemAcc *Acc = InsnToMemAcc[Inst];
if (PollyDelinearize && Acc)
return IRAccess(Type, BasePointer->getValue(), AccessFunction, Size, true,
Acc->DelinearizedSubscripts, Acc->Shape->DelinearizedSizes);
// Check if the access depends on a loop contained in a non-affine subregion.
bool isVariantInNonAffineLoop = false;
if (BoxedLoops) {
SetVector<const Loop *> Loops;
findLoops(AccessFunction, Loops);
for (const Loop *L : Loops)
if (BoxedLoops->count(L))
isVariantInNonAffineLoop = true;
}
bool IsAffine = !isVariantInNonAffineLoop &&
isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue());
SmallVector<const SCEV *, 4> Subscripts, Sizes;
Subscripts.push_back(AccessFunction);
Sizes.push_back(SE->getConstant(ZeroOffset->getType(), Size));
if (!IsAffine && Type == IRAccess::MUST_WRITE)
Type = IRAccess::MAY_WRITE;
return IRAccess(Type, BasePointer->getValue(), AccessFunction, Size, IsAffine,
Subscripts, Sizes);
}
void TempScopInfo::buildAccessFunctions(Region &R, Region &SR) {
if (SD->isNonAffineSubRegion(&SR, &R)) {
for (BasicBlock *BB : SR.blocks())
buildAccessFunctions(R, *BB, &SR);
return;
}
for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
if (I->isSubRegion())
buildAccessFunctions(R, *I->getNodeAs<Region>());
else
buildAccessFunctions(R, *I->getNodeAs<BasicBlock>());
}
void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB,
Region *NonAffineSubRegion) {
AccFuncSetType Functions;
2013-04-10 12:05:18 +08:00
Loop *L = LI->getLoopFor(&BB);
// The set of loops contained in non-affine subregions that are part of R.
const ScopDetection::BoxedLoopsSetTy *BoxedLoops = SD->getBoxedLoops(&R);
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) {
Instruction *Inst = I;
if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
Functions.push_back(
std::make_pair(buildIRAccess(Inst, L, &R, BoxedLoops), Inst));
if (PHINode *PHI = dyn_cast<PHINode>(Inst))
buildPHIAccesses(PHI, R, Functions, NonAffineSubRegion);
if (!isa<StoreInst>(Inst) &&
buildScalarDependences(Inst, &R, NonAffineSubRegion)) {
// If the Instruction is used outside the statement, we need to build the
// write access.
IRAccess ScalarAccess(IRAccess::MUST_WRITE, Inst, ZeroOffset, 1, true);
Functions.push_back(std::make_pair(ScalarAccess, Inst));
}
}
if (Functions.empty())
return;
AccFuncSetType &Accs = AccFuncMap[&BB];
Accs.insert(Accs.end(), Functions.begin(), Functions.end());
}
void TempScopInfo::buildAffineCondition(Value &V, bool inverted,
Comparison **Comp) const {
if (ConstantInt *C = dyn_cast<ConstantInt>(&V)) {
// If this is always true condition, we will create 0 <= 1,
// otherwise we will create 0 >= 1.
const SCEV *LHS = SE->getConstant(C->getType(), 0);
const SCEV *RHS = SE->getConstant(C->getType(), 1);
if (C->isOne() == inverted)
*Comp = new Comparison(LHS, RHS, ICmpInst::ICMP_SLE);
else
*Comp = new Comparison(LHS, RHS, ICmpInst::ICMP_SGE);
return;
}
ICmpInst *ICmp = dyn_cast<ICmpInst>(&V);
assert(ICmp && "Only ICmpInst of constant as condition supported!");
2013-04-10 12:05:18 +08:00
Loop *L = LI->getLoopFor(ICmp->getParent());
const SCEV *LHS = SE->getSCEVAtScope(ICmp->getOperand(0), L);
const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L);
ICmpInst::Predicate Pred = ICmp->getPredicate();
// Invert the predicate if needed.
if (inverted)
Pred = ICmpInst::getInversePredicate(Pred);
switch (Pred) {
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
// TODO: At the moment we need to see everything as signed. This is an
// correctness issue that needs to be solved.
// AffLHS->setUnsigned();
// AffRHS->setUnsigned();
break;
default:
break;
}
*Comp = new Comparison(LHS, RHS, Pred);
}
void TempScopInfo::buildCondition(BasicBlock *BB, Region &R) {
BasicBlock *RegionEntry = R.getEntry();
BBCond Cond;
DomTreeNode *BBNode = DT->getNode(BB), *EntryNode = DT->getNode(RegionEntry);
assert(BBNode && EntryNode && "Get null node while building condition!");
// Walk up the dominance tree until reaching the entry node. Collect all
// branching blocks on the path to BB except if BB postdominates the block
// containing the condition.
SmallVector<BasicBlock *, 4> DominatorBrBlocks;
while (BBNode != EntryNode) {
BasicBlock *CurBB = BBNode->getBlock();
BBNode = BBNode->getIDom();
assert(BBNode && "BBNode should not reach the root node!");
if (PDT->dominates(CurBB, BBNode->getBlock()))
continue;
BranchInst *Br = dyn_cast<BranchInst>(BBNode->getBlock()->getTerminator());
assert(Br && "A Valid Scop should only contain branch instruction");
if (Br->isUnconditional())
continue;
DominatorBrBlocks.push_back(BBNode->getBlock());
}
RegionInfo *RI = R.getRegionInfo();
// Iterate in reverse order over the dominating blocks. Until a non-affine
// branch was encountered add all conditions collected. If a non-affine branch
// was encountered, stop as we overapproximate from here on anyway.
for (auto BIt = DominatorBrBlocks.rbegin(), BEnd = DominatorBrBlocks.rend();
BIt != BEnd; BIt++) {
BasicBlock *BBNode = *BIt;
BranchInst *Br = dyn_cast<BranchInst>(BBNode->getTerminator());
assert(Br && "A Valid Scop should only contain branch instruction");
assert(Br->isConditional() && "Assumed a conditional branch");
if (SD->isNonAffineSubRegion(RI->getRegionFor(BBNode), &R))
break;
BasicBlock *TrueBB = Br->getSuccessor(0), *FalseBB = Br->getSuccessor(1);
// Is BB on the ELSE side of the branch?
bool inverted = DT->dominates(FalseBB, BB);
// If both TrueBB and FalseBB dominate BB, one of them must be the target of
// a back-edge, i.e. a loop header.
if (inverted && DT->dominates(TrueBB, BB)) {
assert(
(DT->dominates(TrueBB, FalseBB) || DT->dominates(FalseBB, TrueBB)) &&
"One of the successors should be the loop header and dominate the"
"other!");
// It is not an invert if the FalseBB is the header.
if (DT->dominates(FalseBB, TrueBB))
inverted = false;
}
Comparison *Cmp;
buildAffineCondition(*(Br->getCondition()), inverted, &Cmp);
Cond.push_back(*Cmp);
}
if (!Cond.empty())
BBConds[BB] = Cond;
}
TempScop *TempScopInfo::buildTempScop(Region &R) {
TempScop *TScop = new TempScop(R, BBConds, AccFuncMap);
buildAccessFunctions(R, R);
for (const auto &BB : R.blocks())
buildCondition(BB, R);
return TScop;
}
TempScop *TempScopInfo::getTempScop(const Region *R) const {
TempScopMapType::const_iterator at = TempScops.find(R);
return at == TempScops.end() ? 0 : at->second;
}
void TempScopInfo::print(raw_ostream &OS, const Module *) const {
for (TempScopMapType::const_iterator I = TempScops.begin(),
E = TempScops.end();
I != E; ++I)
I->second->print(OS, SE, LI);
}
bool TempScopInfo::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
PDT = &getAnalysis<PostDominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SD = &getAnalysis<ScopDetection>();
AA = &getAnalysis<AliasAnalysis>();
TD = &F.getParent()->getDataLayout();
ZeroOffset = SE->getConstant(TD->getIntPtrType(F.getContext()), 0);
for (ScopDetection::iterator I = SD->begin(), E = SD->end(); I != E; ++I) {
Check scops a second time before working on them In rare cases the modification of one scop can effect the validity of other scops, as code generation of an earlier scop may make the scalar evolution functions derived for later scops less precise. The example that triggered this patch was a scop that contained an 'or' expression as follows: %add13710 = or i32 %j.19, 1 --> {(1 + (4 * %l)),+,2}<nsw><%for.body81> Scev could only analyze the 'or' as it knew %j.19 is a multiple of 2. This information was not available after the first scop was code generated (or independent-blocks was run on it) and SCEV could not derive a precise SCEV expression any more. This means we could not any more code generate this SCoP. My current understanding is that there is always the risk that an earlier code generation change invalidates later scops. As the example we have seen here is difficult to avoid, we use this occasion to guard us against all such invalidations. This patch "solves" this issue by verifying right before we start working on a detected scop, if this scop is in fact still valid. This adds a certain overhead. However the verification we run is anyways very fast and secondly it is only run on detected scops. So the overhead should not be very large. As a later optimization we could detect scops only on demand, such that we need to run scop-detections always only a single time. This should fix the single last failure in the LLVM test-suite for the new scev-based code generation. llvm-svn: 201593
2014-02-19 02:49:49 +08:00
if (!SD->isMaxRegionInScop(**I))
continue;
Region *R = const_cast<Region *>(*I);
TempScops.insert(std::make_pair(R, buildTempScop(*R)));
}
return false;
}
void TempScopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
AU.addRequiredTransitive<PostDominatorTree>();
AU.addRequiredTransitive<LoopInfoWrapperPass>();
AU.addRequiredTransitive<ScalarEvolution>();
AU.addRequiredTransitive<ScopDetection>();
AU.addRequiredID(IndependentBlocksID);
AU.addRequired<AliasAnalysis>();
AU.setPreservesAll();
}
TempScopInfo::~TempScopInfo() { clear(); }
void TempScopInfo::clear() {
BBConds.clear();
AccFuncMap.clear();
DeleteContainerSeconds(TempScops);
TempScops.clear();
}
//===----------------------------------------------------------------------===//
// TempScop information extraction pass implement
char TempScopInfo::ID = 0;
Pass *polly::createTempScopInfoPass() { return new TempScopInfo(); }
INITIALIZE_PASS_BEGIN(TempScopInfo, "polly-analyze-ir",
"Polly - Analyse the LLVM-IR in the detected regions",
false, false);
INITIALIZE_AG_DEPENDENCY(AliasAnalysis);
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
INITIALIZE_PASS_DEPENDENCY(PostDominatorTree);
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution);
INITIALIZE_PASS_END(TempScopInfo, "polly-analyze-ir",
"Polly - Analyse the LLVM-IR in the detected regions",
false, false)