2012-04-25 21:16:49 +08:00
|
|
|
//===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2012-04-25 21:16:49 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file implements the BlockGenerator and VectorBlockGenerator classes,
|
|
|
|
// which generate sequential code and vectorized code for a polyhedral
|
|
|
|
// statement, respectively.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2012-04-25 21:18:28 +08:00
|
|
|
#include "polly/CodeGen/BlockGenerators.h"
|
2014-08-03 09:51:59 +08:00
|
|
|
#include "polly/CodeGen/IslExprBuilder.h"
|
2015-09-06 16:47:57 +08:00
|
|
|
#include "polly/CodeGen/RuntimeDebugBuilder.h"
|
2013-05-07 15:31:10 +08:00
|
|
|
#include "polly/Options.h"
|
2015-12-21 20:38:56 +08:00
|
|
|
#include "polly/ScopInfo.h"
|
2013-03-21 02:03:18 +08:00
|
|
|
#include "polly/Support/ScopHelper.h"
|
2017-05-04 23:22:57 +08:00
|
|
|
#include "polly/Support/VirtualInstruction.h"
|
2012-04-28 00:36:14 +08:00
|
|
|
#include "llvm/Analysis/LoopInfo.h"
|
2015-03-02 02:45:58 +08:00
|
|
|
#include "llvm/Analysis/RegionInfo.h"
|
2012-04-28 00:36:14 +08:00
|
|
|
#include "llvm/Analysis/ScalarEvolution.h"
|
2012-04-25 21:16:49 +08:00
|
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
2018-06-05 17:03:46 +08:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2015-03-02 02:45:58 +08:00
|
|
|
#include "isl/ast.h"
|
|
|
|
#include <deque>
|
|
|
|
|
2012-04-25 21:16:49 +08:00
|
|
|
using namespace llvm;
|
|
|
|
using namespace polly;
|
|
|
|
|
2014-10-23 07:22:41 +08:00
|
|
|
static cl::opt<bool> Aligned("enable-polly-aligned",
|
|
|
|
cl::desc("Assumed aligned memory accesses."),
|
|
|
|
cl::Hidden, cl::init(false), cl::ZeroOrMore,
|
|
|
|
cl::cat(PollyCategory));
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2017-03-19 04:54:43 +08:00
|
|
|
bool PollyDebugPrinting;
|
|
|
|
static cl::opt<bool, true> DebugPrintingX(
|
2015-09-06 16:47:57 +08:00
|
|
|
"polly-codegen-add-debug-printing",
|
|
|
|
cl::desc("Add printf calls that show the values loaded/stored."),
|
2017-03-19 04:54:43 +08:00
|
|
|
cl::location(PollyDebugPrinting), cl::Hidden, cl::init(false),
|
|
|
|
cl::ZeroOrMore, cl::cat(PollyCategory));
|
2015-09-06 16:47:57 +08:00
|
|
|
|
2018-04-26 03:43:49 +08:00
|
|
|
static cl::opt<bool> TraceStmts(
|
|
|
|
"polly-codegen-trace-stmts",
|
|
|
|
cl::desc("Add printf calls that print the statement being executed"),
|
|
|
|
cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
|
|
|
|
|
|
|
|
static cl::opt<bool> TraceScalars(
|
|
|
|
"polly-codegen-trace-scalars",
|
|
|
|
cl::desc("Add printf calls that print the values of all scalar values "
|
|
|
|
"used in a statement. Requires -polly-codegen-trace-stmts."),
|
|
|
|
cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
|
|
|
|
|
2016-11-03 06:32:23 +08:00
|
|
|
BlockGenerator::BlockGenerator(
|
|
|
|
PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT,
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
AllocaMapTy &ScalarMap, EscapeUsersAllocaMapTy &EscapeMap,
|
|
|
|
ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, BasicBlock *StartBlock)
|
2015-05-23 07:43:58 +08:00
|
|
|
: Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT),
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
EntryBB(nullptr), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
|
|
|
|
GlobalMap(GlobalMap), StartBlock(StartBlock) {}
|
2015-02-07 05:39:31 +08:00
|
|
|
|
2015-10-04 18:18:45 +08:00
|
|
|
Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
|
2015-09-30 19:56:19 +08:00
|
|
|
ValueMapT &BBMap,
|
|
|
|
LoopToScevMapT <S,
|
|
|
|
Loop *L) const {
|
2015-12-23 03:08:01 +08:00
|
|
|
if (!SE.isSCEVable(Old->getType()))
|
|
|
|
return nullptr;
|
2015-08-18 19:56:00 +08:00
|
|
|
|
2015-12-23 03:08:01 +08:00
|
|
|
const SCEV *Scev = SE.getSCEVAtScope(Old, L);
|
|
|
|
if (!Scev)
|
|
|
|
return nullptr;
|
2015-08-18 19:56:00 +08:00
|
|
|
|
2015-12-23 03:08:01 +08:00
|
|
|
if (isa<SCEVCouldNotCompute>(Scev))
|
|
|
|
return nullptr;
|
2012-04-28 00:36:14 +08:00
|
|
|
|
2016-05-29 15:33:16 +08:00
|
|
|
const SCEV *NewScev = SCEVLoopAddRecRewriter::rewrite(Scev, LTS, SE);
|
2015-12-23 03:08:01 +08:00
|
|
|
ValueMapT VTV;
|
|
|
|
VTV.insert(BBMap.begin(), BBMap.end());
|
|
|
|
VTV.insert(GlobalMap.begin(), GlobalMap.end());
|
2012-04-28 00:36:14 +08:00
|
|
|
|
2015-12-23 03:08:01 +08:00
|
|
|
Scop &S = *Stmt.getParent();
|
2016-05-23 20:38:05 +08:00
|
|
|
const DataLayout &DL = S.getFunction().getParent()->getDataLayout();
|
2015-12-23 03:08:01 +08:00
|
|
|
auto IP = Builder.GetInsertPoint();
|
|
|
|
|
|
|
|
assert(IP != Builder.GetInsertBlock()->end() &&
|
|
|
|
"Only instructions can be insert points for SCEVExpander");
|
|
|
|
Value *Expanded =
|
2016-11-03 06:32:23 +08:00
|
|
|
expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV,
|
|
|
|
StartBlock->getSinglePredecessor());
|
2015-12-23 03:08:01 +08:00
|
|
|
|
|
|
|
BBMap[Old] = Expanded;
|
|
|
|
return Expanded;
|
2015-09-30 19:56:19 +08:00
|
|
|
}
|
|
|
|
|
2015-10-04 18:18:45 +08:00
|
|
|
Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
|
|
|
|
LoopToScevMapT <S, Loop *L) const {
|
2017-05-04 23:22:57 +08:00
|
|
|
|
|
|
|
auto lookupGlobally = [this](Value *Old) -> Value * {
|
|
|
|
Value *New = GlobalMap.lookup(Old);
|
|
|
|
if (!New)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Required by:
|
|
|
|
// * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded.ll
|
|
|
|
// * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
|
|
|
|
// * Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_pass_only_needed.ll
|
|
|
|
// * Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
|
|
|
|
// * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
|
|
|
|
// * Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll
|
|
|
|
// GlobalMap should be a mapping from (value in original SCoP) to (copied
|
|
|
|
// value in generated SCoP), without intermediate mappings, which might
|
|
|
|
// easily require transitiveness as well.
|
2015-09-30 19:56:19 +08:00
|
|
|
if (Value *NewRemapped = GlobalMap.lookup(New))
|
|
|
|
New = NewRemapped;
|
2017-05-04 23:22:57 +08:00
|
|
|
|
|
|
|
// No test case for this code.
|
2015-09-30 19:56:19 +08:00
|
|
|
if (Old->getType()->getScalarSizeInBits() <
|
|
|
|
New->getType()->getScalarSizeInBits())
|
|
|
|
New = Builder.CreateTruncOrBitCast(New, Old->getType());
|
|
|
|
|
|
|
|
return New;
|
2017-05-04 23:22:57 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
Value *New = nullptr;
|
|
|
|
auto VUse = VirtualUse::create(&Stmt, L, Old, true);
|
|
|
|
switch (VUse.getKind()) {
|
|
|
|
case VirtualUse::Block:
|
|
|
|
// BasicBlock are constants, but the BlockGenerator copies them.
|
|
|
|
New = BBMap.lookup(Old);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VirtualUse::Constant:
|
|
|
|
// Used by:
|
|
|
|
// * Isl/CodeGen/OpenMP/reference-argument-from-non-affine-region.ll
|
|
|
|
// Constants should not be redefined. In this case, the GlobalMap just
|
|
|
|
// contains a mapping to the same constant, which is unnecessary, but
|
|
|
|
// harmless.
|
|
|
|
if ((New = lookupGlobally(Old)))
|
|
|
|
break;
|
|
|
|
|
|
|
|
assert(!BBMap.count(Old));
|
|
|
|
New = Old;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VirtualUse::ReadOnly:
|
|
|
|
assert(!GlobalMap.count(Old));
|
|
|
|
|
|
|
|
// Required for:
|
|
|
|
// * Isl/CodeGen/MemAccess/create_arrays.ll
|
|
|
|
// * Isl/CodeGen/read-only-scalars.ll
|
|
|
|
// * ScheduleOptimizer/pattern-matching-based-opts_10.ll
|
|
|
|
// For some reason these reload a read-only value. The reloaded value ends
|
|
|
|
// up in BBMap, buts its value should be identical.
|
|
|
|
//
|
|
|
|
// Required for:
|
|
|
|
// * Isl/CodeGen/OpenMP/single_loop_with_param.ll
|
|
|
|
// The parallel subfunctions need to reference the read-only value from the
|
|
|
|
// parent function, this is done by reloading them locally.
|
|
|
|
if ((New = BBMap.lookup(Old)))
|
|
|
|
break;
|
|
|
|
|
|
|
|
New = Old;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VirtualUse::Synthesizable:
|
|
|
|
// Used by:
|
|
|
|
// * Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll
|
|
|
|
// * Isl/CodeGen/OpenMP/recomputed-srem.ll
|
|
|
|
// * Isl/CodeGen/OpenMP/reference-other-bb.ll
|
|
|
|
// * Isl/CodeGen/OpenMP/two-parallel-loops-reference-outer-indvar.ll
|
|
|
|
// For some reason synthesizable values end up in GlobalMap. Their values
|
|
|
|
// are the same as trySynthesizeNewValue would return. The legacy
|
|
|
|
// implementation prioritized GlobalMap, so this is what we do here as well.
|
|
|
|
// Ideally, synthesizable values should not end up in GlobalMap.
|
|
|
|
if ((New = lookupGlobally(Old)))
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Required for:
|
|
|
|
// * Isl/CodeGen/RuntimeDebugBuilder/combine_different_values.ll
|
|
|
|
// * Isl/CodeGen/getNumberOfIterations.ll
|
|
|
|
// * Isl/CodeGen/non_affine_float_compare.ll
|
|
|
|
// * ScheduleOptimizer/pattern-matching-based-opts_10.ll
|
|
|
|
// Ideally, synthesizable values are synthesized by trySynthesizeNewValue,
|
|
|
|
// not precomputed (SCEVExpander has its own caching mechanism).
|
|
|
|
// These tests fail without this, but I think trySynthesizeNewValue would
|
|
|
|
// just re-synthesize the same instructions.
|
|
|
|
if ((New = BBMap.lookup(Old)))
|
|
|
|
break;
|
|
|
|
|
|
|
|
New = trySynthesizeNewValue(Stmt, Old, BBMap, LTS, L);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VirtualUse::Hoisted:
|
|
|
|
// TODO: Hoisted invariant loads should be found in GlobalMap only, but not
|
|
|
|
// redefined locally (which will be ignored anyway). That is, the following
|
|
|
|
// assertion should apply: assert(!BBMap.count(Old))
|
|
|
|
|
|
|
|
New = lookupGlobally(Old);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case VirtualUse::Intra:
|
|
|
|
case VirtualUse::Inter:
|
|
|
|
assert(!GlobalMap.count(Old) &&
|
|
|
|
"Intra and inter-stmt values are never global");
|
|
|
|
New = BBMap.lookup(Old);
|
|
|
|
break;
|
2015-09-30 19:56:19 +08:00
|
|
|
}
|
2017-05-04 23:22:57 +08:00
|
|
|
assert(New && "Unexpected scalar dependence in region!");
|
|
|
|
return New;
|
2012-04-25 21:16:49 +08:00
|
|
|
}
|
|
|
|
|
2015-10-04 18:18:45 +08:00
|
|
|
void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst,
|
2015-09-05 17:56:54 +08:00
|
|
|
ValueMapT &BBMap, LoopToScevMapT <S) {
|
2014-02-21 23:06:05 +08:00
|
|
|
// We do not generate debug intrinsics as we did not investigate how to
|
|
|
|
// copy them correctly. At the current state, they just crash the code
|
|
|
|
// generation as the meta-data operands are not correctly copied.
|
|
|
|
if (isa<DbgInfoIntrinsic>(Inst))
|
|
|
|
return;
|
|
|
|
|
2012-04-25 21:16:49 +08:00
|
|
|
Instruction *NewInst = Inst->clone();
|
|
|
|
|
|
|
|
// Replace old operands with the new ones.
|
2014-06-04 16:06:40 +08:00
|
|
|
for (Value *OldOperand : Inst->operands()) {
|
2015-09-05 17:56:54 +08:00
|
|
|
Value *NewOperand =
|
2016-02-16 20:36:14 +08:00
|
|
|
getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForStmt(Stmt));
|
2012-04-25 21:16:49 +08:00
|
|
|
|
|
|
|
if (!NewOperand) {
|
2013-02-06 02:01:29 +08:00
|
|
|
assert(!isa<StoreInst>(NewInst) &&
|
|
|
|
"Store instructions are always needed!");
|
[IR] De-virtualize ~Value to save a vptr
Summary:
Implements PR889
Removing the virtual table pointer from Value saves 1% of RSS when doing
LTO of llc on Linux. The impact on time was positive, but too noisy to
conclusively say that performance improved. Here is a link to the
spreadsheet with the original data:
https://docs.google.com/spreadsheets/d/1F4FHir0qYnV0MEp2sYYp_BuvnJgWlWPhWOwZ6LbW7W4/edit?usp=sharing
This change makes it invalid to directly delete a Value, User, or
Instruction pointer. Instead, such code can be rewritten to a null check
and a call Value::deleteValue(). Value objects tend to have their
lifetimes managed through iplist, so for the most part, this isn't a big
deal. However, there are some places where LLVM deletes values, and
those places had to be migrated to deleteValue. I have also created
llvm::unique_value, which has a custom deleter, so it can be used in
place of std::unique_ptr<Value>.
I had to add the "DerivedUser" Deleter escape hatch for MemorySSA, which
derives from User outside of lib/IR. Code in IR cannot include MemorySSA
headers or call the MemoryAccess object destructors without introducing
a circular dependency, so we need some level of indirection.
Unfortunately, no class derived from User may have any virtual methods,
because adding a virtual method would break User::getHungOffOperands(),
which assumes that it can find the use list immediately prior to the
User object. I've added a static_assert to the appropriate OperandTraits
templates to help people avoid this trap.
Reviewers: chandlerc, mehdi_amini, pete, dberlin, george.burgess.iv
Reviewed By: chandlerc
Subscribers: krytarowski, eraman, george.burgess.iv, mzolotukhin, Prazek, nlewycky, hans, inglorion, pcc, tejohnson, dberlin, llvm-commits
Differential Revision: https://reviews.llvm.org/D31261
llvm-svn: 303362
2017-05-19 01:24:10 +08:00
|
|
|
NewInst->deleteValue();
|
2012-04-25 21:16:49 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
NewInst->replaceUsesOfWith(OldOperand, NewOperand);
|
|
|
|
}
|
|
|
|
|
|
|
|
Builder.Insert(NewInst);
|
|
|
|
BBMap[Inst] = NewInst;
|
|
|
|
|
2017-08-02 23:20:07 +08:00
|
|
|
// When copying the instruction onto the Module meant for the GPU,
|
|
|
|
// debug metadata attached to an instruction causes all related
|
|
|
|
// metadata to be pulled into the Module. This includes the DICompileUnit,
|
|
|
|
// which will not be listed in llvm.dbg.cu of the Module since the Module
|
|
|
|
// doesn't contain one. This fails the verification of the Module and the
|
|
|
|
// subsequent generation of the ASM string.
|
2017-08-03 01:56:39 +08:00
|
|
|
if (NewInst->getModule() != Inst->getModule())
|
2017-08-02 23:20:07 +08:00
|
|
|
NewInst->setDebugLoc(llvm::DebugLoc());
|
|
|
|
|
2012-04-25 21:16:49 +08:00
|
|
|
if (!NewInst->getType()->isVoidTy())
|
|
|
|
NewInst->setName("p_" + Inst->getName());
|
|
|
|
}
|
|
|
|
|
2016-01-28 01:09:17 +08:00
|
|
|
Value *
|
|
|
|
BlockGenerator::generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
|
|
|
|
ValueMapT &BBMap, LoopToScevMapT <S,
|
|
|
|
isl_id_to_ast_expr *NewAccesses) {
|
2015-12-16 07:49:53 +08:00
|
|
|
const MemoryAccess &MA = Stmt.getArrayAccessFor(Inst);
|
2016-09-02 03:53:31 +08:00
|
|
|
return generateLocationAccessed(
|
|
|
|
Stmt, getLoopForStmt(Stmt),
|
|
|
|
Inst.isNull() ? nullptr : Inst.getPointerOperand(), BBMap, LTS,
|
2017-07-23 12:08:11 +08:00
|
|
|
NewAccesses, MA.getId().release(), MA.getAccessValue()->getType());
|
2016-09-02 03:53:31 +08:00
|
|
|
}
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2016-09-02 03:53:31 +08:00
|
|
|
Value *BlockGenerator::generateLocationAccessed(
|
|
|
|
ScopStmt &Stmt, Loop *L, Value *Pointer, ValueMapT &BBMap,
|
|
|
|
LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses, __isl_take isl_id *Id,
|
|
|
|
Type *ExpectedType) {
|
|
|
|
isl_ast_expr *AccessExpr = isl_id_to_ast_expr_get(NewAccesses, Id);
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2015-08-27 15:28:16 +08:00
|
|
|
if (AccessExpr) {
|
|
|
|
AccessExpr = isl_ast_expr_address_of(AccessExpr);
|
2015-09-29 14:44:38 +08:00
|
|
|
auto Address = ExprBuilder->create(AccessExpr);
|
|
|
|
|
|
|
|
// Cast the address of this memory access to a pointer type that has the
|
|
|
|
// same element type as the original access, but uses the address space of
|
|
|
|
// the newly generated pointer.
|
2016-09-02 03:53:31 +08:00
|
|
|
auto OldPtrTy = ExpectedType->getPointerTo();
|
2015-09-29 14:44:38 +08:00
|
|
|
auto NewPtrTy = Address->getType();
|
|
|
|
OldPtrTy = PointerType::get(OldPtrTy->getElementType(),
|
|
|
|
NewPtrTy->getPointerAddressSpace());
|
|
|
|
|
Support accesses with differently sized types to the same array
This allows code such as:
void multiple_types(char *Short, char *Float, char *Double) {
for (long i = 0; i < 100; i++) {
Short[i] = *(short *)&Short[2 * i];
Float[i] = *(float *)&Float[4 * i];
Double[i] = *(double *)&Double[8 * i];
}
}
To model such code we use as canonical element type of the modeled array the
smallest element type of all original array accesses, if type allocation sizes
are multiples of each other. Otherwise, we use a newly created iN type, where N
is the gcd of the allocation size of the types used in the accesses to this
array. Accesses with types larger as the canonical element type are modeled as
multiple accesses with the smaller type.
For example the second load access is modeled as:
{ Stmt_bb2[i0] -> MemRef_Float[o0] : 4i0 <= o0 <= 3 + 4i0 }
To support code-generating these memory accesses, we introduce a new method
getAccessAddressFunction that assigns each statement instance a single memory
location, the address we load from/store to. Currently we obtain this address by
taking the lexmin of the access function. We may consider keeping track of the
memory location more explicitly in the future.
We currently do _not_ handle multi-dimensional arrays and also keep the
restriction of not supporting accesses where the offset expression is not a
multiple of the access element type size. This patch adds tests that ensure
we correctly invalidate a scop in case these accesses are found. Both types of
accesses can be handled using the very same model, but are left to be added in
the future.
We also move the initialization of the scop-context into the constructor to
ensure it is already available when invalidating the scop.
Finally, we add this as a new item to the 2.9 release notes
Reviewers: jdoerfert, Meinersbur
Differential Revision: http://reviews.llvm.org/D16878
llvm-svn: 259784
2016-02-04 21:18:42 +08:00
|
|
|
if (OldPtrTy != NewPtrTy)
|
2015-09-29 14:44:38 +08:00
|
|
|
Address = Builder.CreateBitOrPointerCast(Address, OldPtrTy);
|
|
|
|
return Address;
|
2015-08-27 15:28:16 +08:00
|
|
|
}
|
2016-09-02 03:53:31 +08:00
|
|
|
assert(
|
|
|
|
Pointer &&
|
|
|
|
"If expression was not generated, must use the original pointer value");
|
|
|
|
return getNewValue(Stmt, Pointer, BBMap, LTS, L);
|
|
|
|
}
|
|
|
|
|
|
|
|
Value *
|
|
|
|
BlockGenerator::getImplicitAddress(MemoryAccess &Access, Loop *L,
|
|
|
|
LoopToScevMapT <S, ValueMapT &BBMap,
|
|
|
|
__isl_keep isl_id_to_ast_expr *NewAccesses) {
|
|
|
|
if (Access.isLatestArrayKind())
|
|
|
|
return generateLocationAccessed(*Access.getStatement(), L, nullptr, BBMap,
|
2017-07-23 12:08:11 +08:00
|
|
|
LTS, NewAccesses, Access.getId().release(),
|
2016-09-02 03:53:31 +08:00
|
|
|
Access.getAccessValue()->getType());
|
|
|
|
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
return getOrCreateAlloca(Access);
|
2012-04-25 21:16:49 +08:00
|
|
|
}
|
|
|
|
|
2016-02-16 20:36:14 +08:00
|
|
|
Loop *BlockGenerator::getLoopForStmt(const ScopStmt &Stmt) const {
|
2016-02-25 06:08:24 +08:00
|
|
|
auto *StmtBB = Stmt.getEntryBlock();
|
2016-02-16 20:36:14 +08:00
|
|
|
return LI.getLoopFor(StmtBB);
|
2013-03-23 07:42:53 +08:00
|
|
|
}
|
|
|
|
|
2016-09-30 22:34:05 +08:00
|
|
|
Value *BlockGenerator::generateArrayLoad(ScopStmt &Stmt, LoadInst *Load,
|
|
|
|
ValueMapT &BBMap, LoopToScevMapT <S,
|
|
|
|
isl_id_to_ast_expr *NewAccesses) {
|
2015-09-30 07:47:21 +08:00
|
|
|
if (Value *PreloadLoad = GlobalMap.lookup(Load))
|
|
|
|
return PreloadLoad;
|
|
|
|
|
2015-09-05 17:56:54 +08:00
|
|
|
Value *NewPointer =
|
2016-01-28 01:09:17 +08:00
|
|
|
generateLocationAccessed(Stmt, Load, BBMap, LTS, NewAccesses);
|
2014-10-03 00:22:19 +08:00
|
|
|
Value *ScalarLoad = Builder.CreateAlignedLoad(
|
|
|
|
NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_");
|
2015-09-06 16:47:57 +08:00
|
|
|
|
2017-03-19 04:54:43 +08:00
|
|
|
if (PollyDebugPrinting)
|
2015-09-06 16:47:57 +08:00
|
|
|
RuntimeDebugBuilder::createCPUPrinter(Builder, "Load from ", NewPointer,
|
|
|
|
": ", ScalarLoad, "\n");
|
|
|
|
|
2012-04-25 21:16:49 +08:00
|
|
|
return ScalarLoad;
|
|
|
|
}
|
|
|
|
|
2016-09-30 22:34:05 +08:00
|
|
|
void BlockGenerator::generateArrayStore(ScopStmt &Stmt, StoreInst *Store,
|
|
|
|
ValueMapT &BBMap, LoopToScevMapT <S,
|
|
|
|
isl_id_to_ast_expr *NewAccesses) {
|
2017-05-22 06:46:57 +08:00
|
|
|
MemoryAccess &MA = Stmt.getArrayAccessFor(Store);
|
2017-07-23 12:08:38 +08:00
|
|
|
isl::set AccDom = MA.getAccessRelation().domain();
|
2017-07-23 12:08:11 +08:00
|
|
|
std::string Subject = MA.getId().get_name();
|
2017-05-22 06:46:57 +08:00
|
|
|
|
2017-07-23 12:08:11 +08:00
|
|
|
generateConditionalExecution(Stmt, AccDom, Subject.c_str(), [&, this]() {
|
2017-05-22 06:46:57 +08:00
|
|
|
Value *NewPointer =
|
|
|
|
generateLocationAccessed(Stmt, Store, BBMap, LTS, NewAccesses);
|
|
|
|
Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap,
|
|
|
|
LTS, getLoopForStmt(Stmt));
|
|
|
|
|
|
|
|
if (PollyDebugPrinting)
|
|
|
|
RuntimeDebugBuilder::createCPUPrinter(Builder, "Store to ", NewPointer,
|
|
|
|
": ", ValueOperand, "\n");
|
|
|
|
|
|
|
|
Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlignment());
|
|
|
|
});
|
2012-04-25 21:16:49 +08:00
|
|
|
}
|
|
|
|
|
2015-12-23 03:08:49 +08:00
|
|
|
bool BlockGenerator::canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst) {
|
2016-02-16 20:36:14 +08:00
|
|
|
Loop *L = getLoopForStmt(Stmt);
|
2015-12-23 03:08:49 +08:00
|
|
|
return (Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) &&
|
2016-11-29 23:11:04 +08:00
|
|
|
canSynthesize(Inst, *Stmt.getParent(), &SE, L);
|
2015-12-23 03:08:49 +08:00
|
|
|
}
|
|
|
|
|
2015-10-04 18:18:45 +08:00
|
|
|
void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst,
|
2015-09-05 17:56:54 +08:00
|
|
|
ValueMapT &BBMap, LoopToScevMapT <S,
|
2015-08-27 15:28:16 +08:00
|
|
|
isl_id_to_ast_expr *NewAccesses) {
|
2012-04-25 21:16:49 +08:00
|
|
|
// Terminator instructions control the control flow. They are explicitly
|
|
|
|
// expressed in the clast and do not need to be copied.
|
|
|
|
if (Inst->isTerminator())
|
|
|
|
return;
|
|
|
|
|
2015-12-23 03:08:49 +08:00
|
|
|
// Synthesizable statements will be generated on-demand.
|
|
|
|
if (canSyntheziseInStmt(Stmt, Inst))
|
2012-04-28 00:36:14 +08:00
|
|
|
return;
|
|
|
|
|
2015-10-04 18:18:39 +08:00
|
|
|
if (auto *Load = dyn_cast<LoadInst>(Inst)) {
|
2016-09-30 22:34:05 +08:00
|
|
|
Value *NewLoad = generateArrayLoad(Stmt, Load, BBMap, LTS, NewAccesses);
|
2013-05-25 02:46:02 +08:00
|
|
|
// Compute NewLoad before its insertion in BBMap to make the insertion
|
|
|
|
// deterministic.
|
2013-05-25 01:16:02 +08:00
|
|
|
BBMap[Load] = NewLoad;
|
2012-04-25 21:16:49 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-10-04 18:18:39 +08:00
|
|
|
if (auto *Store = dyn_cast<StoreInst>(Inst)) {
|
2017-03-11 00:05:24 +08:00
|
|
|
// Identified as redundant by -polly-simplify.
|
|
|
|
if (!Stmt.getArrayAccessOrNULLFor(Store))
|
|
|
|
return;
|
|
|
|
|
2016-09-30 22:34:05 +08:00
|
|
|
generateArrayStore(Stmt, Store, BBMap, LTS, NewAccesses);
|
2012-04-25 21:16:49 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-10-04 18:18:39 +08:00
|
|
|
if (auto *PHI = dyn_cast<PHINode>(Inst)) {
|
2015-09-05 17:56:54 +08:00
|
|
|
copyPHIInstruction(Stmt, PHI, BBMap, LTS);
|
2015-05-23 07:43:58 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-01-26 02:07:30 +08:00
|
|
|
// Skip some special intrinsics for which we do not adjust the semantics to
|
|
|
|
// the new schedule. All others are handled like every other instruction.
|
2015-08-31 00:57:15 +08:00
|
|
|
if (isIgnoredIntrinsic(Inst))
|
|
|
|
return;
|
2015-01-26 02:07:30 +08:00
|
|
|
|
2015-09-05 17:56:54 +08:00
|
|
|
copyInstScalar(Stmt, Inst, BBMap, LTS);
|
2012-04-25 21:16:49 +08:00
|
|
|
}
|
|
|
|
|
2016-07-21 19:48:36 +08:00
|
|
|
void BlockGenerator::removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap) {
|
2016-08-09 16:59:05 +08:00
|
|
|
auto NewBB = Builder.GetInsertBlock();
|
|
|
|
for (auto I = NewBB->rbegin(); I != NewBB->rend(); I++) {
|
|
|
|
Instruction *NewInst = &*I;
|
2016-07-21 19:48:36 +08:00
|
|
|
|
|
|
|
if (!isInstructionTriviallyDead(NewInst))
|
|
|
|
continue;
|
|
|
|
|
2016-08-09 16:59:05 +08:00
|
|
|
for (auto Pair : BBMap)
|
|
|
|
if (Pair.second == NewInst) {
|
|
|
|
BBMap.erase(Pair.first);
|
|
|
|
}
|
|
|
|
|
2016-07-21 19:48:36 +08:00
|
|
|
NewInst->eraseFromParent();
|
2016-08-09 16:59:05 +08:00
|
|
|
I = NewBB->rbegin();
|
2016-07-21 19:48:36 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-05 17:56:54 +08:00
|
|
|
void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S,
|
2015-08-27 15:28:16 +08:00
|
|
|
isl_id_to_ast_expr *NewAccesses) {
|
2015-02-25 00:16:32 +08:00
|
|
|
assert(Stmt.isBlockStmt() &&
|
|
|
|
"Only block statements can be copied by the block generator");
|
|
|
|
|
|
|
|
ValueMapT BBMap;
|
|
|
|
|
2015-02-07 05:39:31 +08:00
|
|
|
BasicBlock *BB = Stmt.getBasicBlock();
|
2015-09-05 17:56:54 +08:00
|
|
|
copyBB(Stmt, BB, BBMap, LTS, NewAccesses);
|
2016-07-21 19:48:36 +08:00
|
|
|
removeDeadInstructions(BB, BBMap);
|
2015-02-25 00:16:32 +08:00
|
|
|
}
|
|
|
|
|
2015-02-28 02:29:04 +08:00
|
|
|
BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) {
|
2015-11-07 06:56:54 +08:00
|
|
|
BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
|
|
|
|
&*Builder.GetInsertPoint(), &DT, &LI);
|
2012-04-25 21:16:49 +08:00
|
|
|
CopyBB->setName("polly.stmt." + BB->getName());
|
2015-02-28 02:29:04 +08:00
|
|
|
return CopyBB;
|
|
|
|
}
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2015-02-28 02:29:04 +08:00
|
|
|
BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB,
|
2015-09-05 17:56:54 +08:00
|
|
|
ValueMapT &BBMap, LoopToScevMapT <S,
|
2015-08-27 15:28:16 +08:00
|
|
|
isl_id_to_ast_expr *NewAccesses) {
|
2015-02-28 02:29:04 +08:00
|
|
|
BasicBlock *CopyBB = splitBB(BB);
|
2015-11-07 06:56:54 +08:00
|
|
|
Builder.SetInsertPoint(&CopyBB->front());
|
2016-09-02 03:53:31 +08:00
|
|
|
generateScalarLoads(Stmt, LTS, BBMap, NewAccesses);
|
2018-04-26 03:43:49 +08:00
|
|
|
generateBeginStmtTrace(Stmt, LTS, BBMap);
|
2015-10-18 05:36:00 +08:00
|
|
|
|
2015-09-05 17:56:54 +08:00
|
|
|
copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses);
|
2015-10-18 05:36:00 +08:00
|
|
|
|
|
|
|
// After a basic block was copied store all scalars that escape this block in
|
|
|
|
// their alloca.
|
2016-09-02 03:53:31 +08:00
|
|
|
generateScalarStores(Stmt, LTS, BBMap, NewAccesses);
|
2015-02-28 02:29:04 +08:00
|
|
|
return CopyBB;
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB,
|
2015-09-05 17:56:54 +08:00
|
|
|
ValueMapT &BBMap, LoopToScevMapT <S,
|
2015-08-27 15:28:16 +08:00
|
|
|
isl_id_to_ast_expr *NewAccesses) {
|
2015-05-23 07:43:58 +08:00
|
|
|
EntryBB = &CopyBB->getParent()->getEntryBlock();
|
|
|
|
|
2017-08-31 11:17:35 +08:00
|
|
|
// Block statements and the entry blocks of region statement are code
|
|
|
|
// generated from instruction lists. This allow us to optimize the
|
|
|
|
// instructions that belong to a certain scop statement. As the code
|
|
|
|
// structure of region statements might be arbitrary complex, optimizing the
|
|
|
|
// instruction list is not yet supported.
|
|
|
|
if (Stmt.isBlockStmt() || (Stmt.isRegionStmt() && Stmt.getEntryBlock() == BB))
|
2017-06-02 08:13:49 +08:00
|
|
|
for (Instruction *Inst : Stmt.getInstructions())
|
|
|
|
copyInstruction(Stmt, Inst, BBMap, LTS, NewAccesses);
|
|
|
|
else
|
|
|
|
for (Instruction &Inst : *BB)
|
|
|
|
copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses);
|
2015-05-23 07:43:58 +08:00
|
|
|
}
|
|
|
|
|
2016-02-02 22:16:01 +08:00
|
|
|
Value *BlockGenerator::getOrCreateAlloca(const MemoryAccess &Access) {
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
assert(!Access.isLatestArrayKind() && "Trying to get alloca for array kind");
|
2016-08-04 14:55:53 +08:00
|
|
|
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
return getOrCreateAlloca(Access.getLatestScopArrayInfo());
|
2015-10-18 06:16:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) {
|
2016-08-04 14:55:53 +08:00
|
|
|
assert(!Array->isArrayKind() && "Trying to get alloca for array kind");
|
|
|
|
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
auto &Addr = ScalarMap[Array];
|
|
|
|
|
|
|
|
if (Addr) {
|
|
|
|
// Allow allocas to be (temporarily) redirected once by adding a new
|
2017-06-08 20:06:15 +08:00
|
|
|
// old-alloca-addr to new-addr mapping to GlobalMap. This functionality
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
// is used for example by the OpenMP code generation where a first use
|
|
|
|
// of a scalar while still in the host code allocates a normal alloca with
|
|
|
|
// getOrCreateAlloca. When the values of this scalar are accessed during
|
|
|
|
// the generation of the parallel subfunction, these values are copied over
|
|
|
|
// to the parallel subfunction and each request for a scalar alloca slot
|
2017-06-08 20:06:15 +08:00
|
|
|
// must be forwarded to the temporary in-subfunction slot. This mapping is
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
// removed when the subfunction has been generated and again normal host
|
2017-01-28 19:39:02 +08:00
|
|
|
// code is generated. Due to the following reasons it is not possible to
|
|
|
|
// perform the GlobalMap lookup right after creating the alloca below, but
|
|
|
|
// instead we need to check GlobalMap at each call to getOrCreateAlloca:
|
|
|
|
//
|
|
|
|
// 1) GlobalMap may be changed multiple times (for each parallel loop),
|
|
|
|
// 2) The temporary mapping is commonly only known after the initial
|
|
|
|
// alloca has already been generated, and
|
|
|
|
// 3) The original alloca value must be restored after leaving the
|
|
|
|
// sub-function.
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
if (Value *NewAddr = GlobalMap.lookup(&*Addr))
|
|
|
|
return NewAddr;
|
|
|
|
return Addr;
|
|
|
|
}
|
|
|
|
|
|
|
|
Type *Ty = Array->getElementType();
|
|
|
|
Value *ScalarBase = Array->getBasePtr();
|
|
|
|
std::string NameExt;
|
ScopInfo: Harmonize the different array kinds
Over time different vocabulary has been introduced to describe the different
memory objects in Polly, resulting in different - often inconsistent - naming
schemes in different parts of Polly. We now standartize this to the following
scheme:
KindArray, KindValue, KindPHI, KindExitPHI
| ------- isScalar -----------|
In most cases this naming scheme has already been used previously (this
minimizes changes and ensures we remain consistent with previous publications).
The main change is that we remove KindScalar to clearify the difference between
a scalar as a memory object of kind Value, PHI or ExitPHI and a value (former
KindScalar) which is a memory object modeling a llvm::Value.
We also move all documentation to the Kind* enum in the ScopArrayInfo class,
remove the second enum in the MemoryAccess class and update documentation to be
formulated from the perspective of the memory object, rather than the memory
access. The terms "Implicit"/"Explicit", formerly used to describe memory
accesses, have been dropped. From the perspective of memory accesses they
described the different memory kinds well - especially from the perspective of
code generation - but just from the perspective of a memory object it seems more
straightforward to talk about scalars and arrays, rather than explicit and
implicit arrays. The last comment is clearly subjective, though. A less
subjective reason to go for these terms is the historic use both in mailing list
discussions and publications.
llvm-svn: 255467
2015-12-14 03:59:01 +08:00
|
|
|
if (Array->isPHIKind())
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
NameExt = ".phiops";
|
2015-08-30 02:12:03 +08:00
|
|
|
else
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
NameExt = ".s2a";
|
2015-08-30 02:12:03 +08:00
|
|
|
|
2017-04-11 12:59:13 +08:00
|
|
|
const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
|
2017-04-11 08:12:58 +08:00
|
|
|
|
|
|
|
Addr = new AllocaInst(Ty, DL.getAllocaAddrSpace(),
|
|
|
|
ScalarBase->getName() + NameExt);
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
|
|
|
|
Addr->insertBefore(&*EntryBB->getFirstInsertionPt());
|
2015-08-28 16:23:35 +08:00
|
|
|
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
return Addr;
|
2015-08-28 16:23:35 +08:00
|
|
|
}
|
|
|
|
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
void BlockGenerator::handleOutsideUsers(const Scop &S, ScopArrayInfo *Array) {
|
|
|
|
Instruction *Inst = cast<Instruction>(Array->getBasePtr());
|
|
|
|
|
Remove isNew from getOrCreateAlloca
This commit drops some dead code. Specifically, there is no need to initialize
the virtual memory locations of scalars in BlockGenerator::handleOutsideUsers,
the function that initalizes the escape map that keeps track of out-of-scope
uses of scalar values. We already model instructions inside the scop that
are used outside the scope (escaping instructions) as scalar memory writes at
the position of the instruction. As a result, the virtual memory location of
this instructions is already initialized when code-generating the corresponding
virtual scalar write and consequently does not need to be initialized later on
when generating the set of escaping values.
Code references:
In TempScopInfo::buildScalarDependences we detect scalar cross-statement
dependences for all instructions (including PHIs) that have uses outside of the
scop's region:
// Check whether or not the use is in the SCoP.
if (!R->contains(UseParent)) {
AnyCrossStmtUse = true;
continue;
}
We use this information in TempScopInfo::buildAccessFunctions were we build
scalar write memory accesses for all these instructions:
if (!isa<StoreInst>(Inst) &&
buildScalarDependences(Inst, &R, NonAffineSubRegion)) {
// If the Instruction is used outside the statement, we need to build the
// write access.
IRAccess ScalarAccess(IRAccess::MUST_WRITE, Inst, ZeroOffset, 1, true,
Inst);
Functions.push_back(std::make_pair(ScalarAccess, Inst));
}
Reviewers: jdoerfert
Subscribers: pollydev, llvm-commits
Differential Revision: http://reviews.llvm.org/D12472
llvm-svn: 246383
2015-08-30 23:03:59 +08:00
|
|
|
// If there are escape users we get the alloca for this instruction and put it
|
|
|
|
// in the EscapeMap for later finalization. Lastly, if the instruction was
|
|
|
|
// copied multiple times we already did this and can exit.
|
2015-08-19 01:25:48 +08:00
|
|
|
if (EscapeMap.count(Inst))
|
|
|
|
return;
|
2015-08-18 19:56:00 +08:00
|
|
|
|
2015-05-23 07:43:58 +08:00
|
|
|
EscapeUserVectorTy EscapeUsers;
|
|
|
|
for (User *U : Inst->users()) {
|
|
|
|
|
|
|
|
// Non-instruction user will never escape.
|
|
|
|
Instruction *UI = dyn_cast<Instruction>(U);
|
|
|
|
if (!UI)
|
|
|
|
continue;
|
|
|
|
|
2016-05-23 20:40:48 +08:00
|
|
|
if (S.contains(UI))
|
2015-05-23 07:43:58 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
EscapeUsers.push_back(UI);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Exit if no escape uses were found.
|
|
|
|
if (EscapeUsers.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Get or create an escape alloca for this instruction.
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
auto *ScalarAddr = getOrCreateAlloca(Array);
|
2015-05-23 07:43:58 +08:00
|
|
|
|
|
|
|
// Remember that this instruction has escape uses and the escape alloca.
|
|
|
|
EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers));
|
|
|
|
}
|
|
|
|
|
2016-09-02 03:53:31 +08:00
|
|
|
void BlockGenerator::generateScalarLoads(
|
|
|
|
ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap,
|
|
|
|
__isl_keep isl_id_to_ast_expr *NewAccesses) {
|
2015-10-18 05:36:00 +08:00
|
|
|
for (MemoryAccess *MA : Stmt) {
|
2016-09-02 03:53:31 +08:00
|
|
|
if (MA->isOriginalArrayKind() || MA->isWrite())
|
2015-07-28 01:57:58 +08:00
|
|
|
continue;
|
|
|
|
|
2016-09-30 22:01:46 +08:00
|
|
|
#ifndef NDEBUG
|
2018-04-10 09:20:51 +08:00
|
|
|
auto StmtDom =
|
|
|
|
Stmt.getDomain().intersect_params(Stmt.getParent()->getContext());
|
2018-04-10 09:20:47 +08:00
|
|
|
auto AccDom = MA->getAccessRelation().domain();
|
|
|
|
assert(!StmtDom.is_subset(AccDom).is_false() &&
|
2016-09-30 22:01:46 +08:00
|
|
|
"Scalar must be loaded in all statement instances");
|
|
|
|
#endif
|
|
|
|
|
2016-09-02 03:53:31 +08:00
|
|
|
auto *Address =
|
|
|
|
getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap, NewAccesses);
|
2016-02-25 06:08:14 +08:00
|
|
|
assert((!isa<Instruction>(Address) ||
|
|
|
|
DT.dominates(cast<Instruction>(Address)->getParent(),
|
|
|
|
Builder.GetInsertBlock())) &&
|
|
|
|
"Domination violation");
|
2017-02-10 07:54:23 +08:00
|
|
|
BBMap[MA->getAccessValue()] =
|
2015-08-31 03:51:01 +08:00
|
|
|
Builder.CreateLoad(Address, Address->getName() + ".reload");
|
2015-05-23 07:43:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-22 06:46:57 +08:00
|
|
|
Value *BlockGenerator::buildContainsCondition(ScopStmt &Stmt,
|
|
|
|
const isl::set &Subdomain) {
|
2017-08-07 01:53:04 +08:00
|
|
|
isl::ast_build AstBuild = Stmt.getAstBuild();
|
2017-08-07 00:39:52 +08:00
|
|
|
isl::set Domain = Stmt.getDomain();
|
2017-05-22 06:46:57 +08:00
|
|
|
|
2017-06-01 05:49:51 +08:00
|
|
|
isl::union_map USchedule = AstBuild.get_schedule();
|
|
|
|
USchedule = USchedule.intersect_domain(Domain);
|
2017-05-22 06:46:57 +08:00
|
|
|
|
2017-06-01 05:49:51 +08:00
|
|
|
assert(!USchedule.is_empty());
|
|
|
|
isl::map Schedule = isl::map::from_union_map(USchedule);
|
2017-05-22 06:46:57 +08:00
|
|
|
|
2017-06-01 05:49:51 +08:00
|
|
|
isl::set ScheduledDomain = Schedule.range();
|
|
|
|
isl::set ScheduledSet = Subdomain.apply(Schedule);
|
2017-05-22 06:46:57 +08:00
|
|
|
|
2017-06-01 05:49:51 +08:00
|
|
|
isl::ast_build RestrictedBuild = AstBuild.restrict(ScheduledDomain);
|
|
|
|
|
|
|
|
isl::ast_expr IsInSet = RestrictedBuild.expr_from(ScheduledSet);
|
2017-05-22 06:46:57 +08:00
|
|
|
Value *IsInSetExpr = ExprBuilder->create(IsInSet.copy());
|
|
|
|
IsInSetExpr = Builder.CreateICmpNE(
|
|
|
|
IsInSetExpr, ConstantInt::get(IsInSetExpr->getType(), 0));
|
|
|
|
|
|
|
|
return IsInSetExpr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockGenerator::generateConditionalExecution(
|
|
|
|
ScopStmt &Stmt, const isl::set &Subdomain, StringRef Subject,
|
|
|
|
const std::function<void()> &GenThenFunc) {
|
2017-08-07 00:39:52 +08:00
|
|
|
isl::set StmtDom = Stmt.getDomain();
|
2017-05-22 06:46:57 +08:00
|
|
|
|
|
|
|
// If the condition is a tautology, don't generate a condition around the
|
|
|
|
// code.
|
2017-06-01 17:34:20 +08:00
|
|
|
bool IsPartialWrite =
|
2017-08-07 03:52:38 +08:00
|
|
|
!StmtDom.intersect_params(Stmt.getParent()->getContext())
|
2017-06-01 17:34:20 +08:00
|
|
|
.is_subset(Subdomain);
|
|
|
|
if (!IsPartialWrite) {
|
2017-05-22 06:46:57 +08:00
|
|
|
GenThenFunc();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Generate the condition.
|
|
|
|
Value *Cond = buildContainsCondition(Stmt, Subdomain);
|
2017-08-24 22:51:35 +08:00
|
|
|
|
|
|
|
// Don't call GenThenFunc if it is never executed. An ast index expression
|
|
|
|
// might not be defined in this case.
|
|
|
|
if (auto *Const = dyn_cast<ConstantInt>(Cond))
|
|
|
|
if (Const->isZero())
|
|
|
|
return;
|
|
|
|
|
2017-05-22 06:46:57 +08:00
|
|
|
BasicBlock *HeadBlock = Builder.GetInsertBlock();
|
|
|
|
StringRef BlockName = HeadBlock->getName();
|
|
|
|
|
|
|
|
// Generate the conditional block.
|
|
|
|
SplitBlockAndInsertIfThen(Cond, &*Builder.GetInsertPoint(), false, nullptr,
|
|
|
|
&DT, &LI);
|
|
|
|
BranchInst *Branch = cast<BranchInst>(HeadBlock->getTerminator());
|
|
|
|
BasicBlock *ThenBlock = Branch->getSuccessor(0);
|
|
|
|
BasicBlock *TailBlock = Branch->getSuccessor(1);
|
|
|
|
|
|
|
|
// Assign descriptive names.
|
|
|
|
if (auto *CondInst = dyn_cast<Instruction>(Cond))
|
|
|
|
CondInst->setName("polly." + Subject + ".cond");
|
|
|
|
ThenBlock->setName(BlockName + "." + Subject + ".partial");
|
|
|
|
TailBlock->setName(BlockName + ".cont");
|
|
|
|
|
|
|
|
// Put the client code into the conditional block and continue in the merge
|
|
|
|
// block afterwards.
|
|
|
|
Builder.SetInsertPoint(ThenBlock, ThenBlock->getFirstInsertionPt());
|
|
|
|
GenThenFunc();
|
|
|
|
Builder.SetInsertPoint(TailBlock, TailBlock->getFirstInsertionPt());
|
|
|
|
}
|
|
|
|
|
2018-04-26 03:43:49 +08:00
|
|
|
static std::string getInstName(Value *Val) {
|
|
|
|
std::string Result;
|
|
|
|
raw_string_ostream OS(Result);
|
|
|
|
Val->printAsOperand(OS, false);
|
|
|
|
return OS.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
void BlockGenerator::generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT <S,
|
|
|
|
ValueMapT &BBMap) {
|
|
|
|
if (!TraceStmts)
|
|
|
|
return;
|
|
|
|
|
|
|
|
Scop *S = Stmt.getParent();
|
|
|
|
const char *BaseName = Stmt.getBaseName();
|
|
|
|
|
|
|
|
isl::ast_build AstBuild = Stmt.getAstBuild();
|
|
|
|
isl::set Domain = Stmt.getDomain();
|
|
|
|
|
|
|
|
isl::union_map USchedule = AstBuild.get_schedule().intersect_domain(Domain);
|
|
|
|
isl::map Schedule = isl::map::from_union_map(USchedule);
|
|
|
|
assert(Schedule.is_empty().is_false() &&
|
|
|
|
"The stmt must have a valid instance");
|
|
|
|
|
|
|
|
isl::multi_pw_aff ScheduleMultiPwAff =
|
|
|
|
isl::pw_multi_aff::from_map(Schedule.reverse());
|
|
|
|
isl::ast_build RestrictedBuild = AstBuild.restrict(Schedule.range());
|
|
|
|
|
|
|
|
// Sequence of strings to print.
|
|
|
|
SmallVector<llvm::Value *, 8> Values;
|
|
|
|
|
|
|
|
// Print the name of the statement.
|
|
|
|
// TODO: Indent by the depth of the statement instance in the schedule tree.
|
|
|
|
Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, BaseName));
|
|
|
|
Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "("));
|
|
|
|
|
|
|
|
// Add the coordinate of the statement instance.
|
|
|
|
int DomDims = ScheduleMultiPwAff.dim(isl::dim::out);
|
|
|
|
for (int i = 0; i < DomDims; i += 1) {
|
|
|
|
if (i > 0)
|
|
|
|
Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ","));
|
|
|
|
|
|
|
|
isl::ast_expr IsInSet =
|
|
|
|
RestrictedBuild.expr_from(ScheduleMultiPwAff.get_pw_aff(i));
|
|
|
|
Values.push_back(ExprBuilder->create(IsInSet.copy()));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (TraceScalars) {
|
|
|
|
Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ")"));
|
|
|
|
DenseSet<Instruction *> Encountered;
|
|
|
|
|
|
|
|
// Add the value of each scalar (and the result of PHIs) used in the
|
|
|
|
// statement.
|
|
|
|
// TODO: Values used in region-statements.
|
|
|
|
for (Instruction *Inst : Stmt.insts()) {
|
|
|
|
if (!RuntimeDebugBuilder::isPrintable(Inst->getType()))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (isa<PHINode>(Inst)) {
|
|
|
|
Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, " "));
|
|
|
|
Values.push_back(RuntimeDebugBuilder::getPrintableString(
|
|
|
|
Builder, getInstName(Inst)));
|
|
|
|
Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "="));
|
|
|
|
Values.push_back(getNewValue(Stmt, Inst, BBMap, LTS,
|
|
|
|
LI.getLoopFor(Inst->getParent())));
|
|
|
|
} else {
|
|
|
|
for (Value *Op : Inst->operand_values()) {
|
|
|
|
// Do not print values that cannot change during the execution of the
|
|
|
|
// SCoP.
|
|
|
|
auto *OpInst = dyn_cast<Instruction>(Op);
|
|
|
|
if (!OpInst)
|
|
|
|
continue;
|
|
|
|
if (!S->contains(OpInst))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Print each scalar at most once, and exclude values defined in the
|
|
|
|
// statement itself.
|
|
|
|
if (Encountered.count(OpInst))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
Values.push_back(
|
|
|
|
RuntimeDebugBuilder::getPrintableString(Builder, " "));
|
|
|
|
Values.push_back(RuntimeDebugBuilder::getPrintableString(
|
|
|
|
Builder, getInstName(OpInst)));
|
|
|
|
Values.push_back(
|
|
|
|
RuntimeDebugBuilder::getPrintableString(Builder, "="));
|
|
|
|
Values.push_back(getNewValue(Stmt, OpInst, BBMap, LTS,
|
|
|
|
LI.getLoopFor(Inst->getParent())));
|
|
|
|
Encountered.insert(OpInst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Encountered.insert(Inst);
|
|
|
|
}
|
|
|
|
|
|
|
|
Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "\n"));
|
|
|
|
} else {
|
|
|
|
Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ")\n"));
|
|
|
|
}
|
|
|
|
|
|
|
|
RuntimeDebugBuilder::createCPUPrinter(Builder, ArrayRef<Value *>(Values));
|
|
|
|
}
|
|
|
|
|
2016-09-02 03:53:31 +08:00
|
|
|
void BlockGenerator::generateScalarStores(
|
|
|
|
ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap,
|
|
|
|
__isl_keep isl_id_to_ast_expr *NewAccesses) {
|
BlockGenerators: Replace getNewScalarValue with getNewValue
Both functions implement the same functionality, with the difference that
getNewScalarValue assumes that globals and out-of-scop scalars can be directly
reused without loading them from their corresponding stack slot. This is correct
for sequential code generation, but causes issues with outlining code e.g. for
OpenMP code generation. getNewValue handles such cases correctly.
Hence, we can replace getNewScalarValue with getNewValue. This is not only more
future proof, but also eliminates a bunch of code.
The only functionality that was available in getNewScalarValue that is lost
is the on-demand creation of scalar values. However, this is not necessary any
more as scalars are always loaded at the beginning of each basic block and will
consequently always be available when scalar stores are generated. As this was
not the case in older versions of Polly, it seems the on-demand loading is just
some older code that has not yet been removed.
Finally, generateScalarLoads also generated loads for values that are loop
invariant, available in GlobalMap and which are preferred over the ones loaded
in generateScalarLoads. Hence, we can just skip the code generation of such
scalar values, avoiding the generation of dead code.
Differential Revision: http://reviews.llvm.org/D16522
llvm-svn: 258799
2016-01-26 18:01:35 +08:00
|
|
|
Loop *L = LI.getLoopFor(Stmt.getBasicBlock());
|
2015-05-23 07:43:58 +08:00
|
|
|
|
2017-01-16 22:08:10 +08:00
|
|
|
assert(Stmt.isBlockStmt() &&
|
|
|
|
"Region statements need to use the generateScalarStores() function in "
|
|
|
|
"the RegionGenerator");
|
2015-05-23 07:43:58 +08:00
|
|
|
|
|
|
|
for (MemoryAccess *MA : Stmt) {
|
2016-09-02 03:53:31 +08:00
|
|
|
if (MA->isOriginalArrayKind() || MA->isRead())
|
2015-05-23 07:43:58 +08:00
|
|
|
continue;
|
|
|
|
|
2017-07-23 12:08:38 +08:00
|
|
|
isl::set AccDom = MA->getAccessRelation().domain();
|
2017-07-23 12:08:11 +08:00
|
|
|
std::string Subject = MA->getId().get_name();
|
|
|
|
|
|
|
|
generateConditionalExecution(
|
|
|
|
Stmt, AccDom, Subject.c_str(), [&, this, MA]() {
|
|
|
|
Value *Val = MA->getAccessValue();
|
|
|
|
if (MA->isAnyPHIKind()) {
|
|
|
|
assert(MA->getIncoming().size() >= 1 &&
|
|
|
|
"Block statements have exactly one exiting block, or "
|
|
|
|
"multiple but "
|
|
|
|
"with same incoming block and value");
|
|
|
|
assert(std::all_of(MA->getIncoming().begin(),
|
|
|
|
MA->getIncoming().end(),
|
|
|
|
[&](std::pair<BasicBlock *, Value *> p) -> bool {
|
|
|
|
return p.first == Stmt.getBasicBlock();
|
|
|
|
}) &&
|
|
|
|
"Incoming block must be statement's block");
|
|
|
|
Val = MA->getIncoming()[0].second;
|
|
|
|
}
|
|
|
|
auto Address = getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS,
|
|
|
|
BBMap, NewAccesses);
|
|
|
|
|
|
|
|
Val = getNewValue(Stmt, Val, BBMap, LTS, L);
|
|
|
|
assert((!isa<Instruction>(Val) ||
|
|
|
|
DT.dominates(cast<Instruction>(Val)->getParent(),
|
|
|
|
Builder.GetInsertBlock())) &&
|
|
|
|
"Domination violation");
|
|
|
|
assert((!isa<Instruction>(Address) ||
|
|
|
|
DT.dominates(cast<Instruction>(Address)->getParent(),
|
|
|
|
Builder.GetInsertBlock())) &&
|
|
|
|
"Domination violation");
|
2017-09-07 20:15:01 +08:00
|
|
|
|
|
|
|
// The new Val might have a different type than the old Val due to
|
|
|
|
// ScalarEvolution looking through bitcasts.
|
|
|
|
if (Val->getType() != Address->getType()->getPointerElementType())
|
|
|
|
Address = Builder.CreateBitOrPointerCast(
|
|
|
|
Address, Val->getType()->getPointerTo());
|
|
|
|
|
2017-07-23 12:08:11 +08:00
|
|
|
Builder.CreateStore(Val, Address);
|
|
|
|
});
|
2015-05-23 07:43:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-31 03:19:34 +08:00
|
|
|
void BlockGenerator::createScalarInitialization(Scop &S) {
|
2016-05-23 20:42:38 +08:00
|
|
|
BasicBlock *ExitBB = S.getExit();
|
2016-11-03 06:32:23 +08:00
|
|
|
BasicBlock *PreEntryBB = S.getEnteringBlock();
|
2015-11-09 08:21:21 +08:00
|
|
|
|
2016-11-03 06:32:23 +08:00
|
|
|
Builder.SetInsertPoint(&*StartBlock->begin());
|
2015-05-23 07:43:58 +08:00
|
|
|
|
2016-07-30 17:25:51 +08:00
|
|
|
for (auto &Array : S.arrays()) {
|
2015-08-31 03:19:34 +08:00
|
|
|
if (Array->getNumberOfDimensions() != 0)
|
|
|
|
continue;
|
ScopInfo: Harmonize the different array kinds
Over time different vocabulary has been introduced to describe the different
memory objects in Polly, resulting in different - often inconsistent - naming
schemes in different parts of Polly. We now standartize this to the following
scheme:
KindArray, KindValue, KindPHI, KindExitPHI
| ------- isScalar -----------|
In most cases this naming scheme has already been used previously (this
minimizes changes and ensures we remain consistent with previous publications).
The main change is that we remove KindScalar to clearify the difference between
a scalar as a memory object of kind Value, PHI or ExitPHI and a value (former
KindScalar) which is a memory object modeling a llvm::Value.
We also move all documentation to the Kind* enum in the ScopArrayInfo class,
remove the second enum in the MemoryAccess class and update documentation to be
formulated from the perspective of the memory object, rather than the memory
access. The terms "Implicit"/"Explicit", formerly used to describe memory
accesses, have been dropped. From the perspective of memory accesses they
described the different memory kinds well - especially from the perspective of
code generation - but just from the perspective of a memory object it seems more
straightforward to talk about scalars and arrays, rather than explicit and
implicit arrays. The last comment is clearly subjective, though. A less
subjective reason to go for these terms is the historic use both in mailing list
discussions and publications.
llvm-svn: 255467
2015-12-14 03:59:01 +08:00
|
|
|
if (Array->isPHIKind()) {
|
2015-08-31 03:19:34 +08:00
|
|
|
// For PHI nodes, the only values we need to store are the ones that
|
|
|
|
// reach the PHI node from outside the region. In general there should
|
|
|
|
// only be one such incoming edge and this edge should enter through
|
2016-11-03 06:32:23 +08:00
|
|
|
// 'PreEntryBB'.
|
2015-08-31 03:19:34 +08:00
|
|
|
auto PHI = cast<PHINode>(Array->getBasePtr());
|
|
|
|
|
|
|
|
for (auto BI = PHI->block_begin(), BE = PHI->block_end(); BI != BE; BI++)
|
2016-11-03 06:32:23 +08:00
|
|
|
if (!S.contains(*BI) && *BI != PreEntryBB)
|
2015-08-31 03:19:34 +08:00
|
|
|
llvm_unreachable("Incoming edges from outside the scop should always "
|
2016-11-03 06:32:23 +08:00
|
|
|
"come from PreEntryBB");
|
2015-08-31 03:19:34 +08:00
|
|
|
|
2016-11-03 06:32:23 +08:00
|
|
|
int Idx = PHI->getBasicBlockIndex(PreEntryBB);
|
2015-08-31 03:19:34 +08:00
|
|
|
if (Idx < 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
Value *ScalarValue = PHI->getIncomingValue(Idx);
|
2015-05-23 07:43:58 +08:00
|
|
|
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
Builder.CreateStore(ScalarValue, getOrCreateAlloca(Array));
|
2015-05-23 07:43:58 +08:00
|
|
|
continue;
|
2015-08-31 03:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
|
2015-05-23 07:43:58 +08:00
|
|
|
|
2016-05-23 20:40:48 +08:00
|
|
|
if (Inst && S.contains(Inst))
|
2015-08-31 03:19:34 +08:00
|
|
|
continue;
|
2015-05-23 07:43:58 +08:00
|
|
|
|
2015-11-09 08:21:21 +08:00
|
|
|
// PHI nodes that are not marked as such in their SAI object are either exit
|
|
|
|
// PHI nodes we model as common scalars but without initialization, or
|
|
|
|
// incoming phi nodes that need to be initialized. Check if the first is the
|
|
|
|
// case for Inst and do not create and initialize memory if so.
|
|
|
|
if (auto *PHI = dyn_cast_or_null<PHINode>(Inst))
|
|
|
|
if (!S.hasSingleExitEdge() && PHI->getBasicBlockIndex(ExitBB) >= 0)
|
|
|
|
continue;
|
2015-09-09 05:44:27 +08:00
|
|
|
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
Builder.CreateStore(Array->getBasePtr(), getOrCreateAlloca(Array));
|
2015-05-23 07:43:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-23 20:42:38 +08:00
|
|
|
void BlockGenerator::createScalarFinalization(Scop &S) {
|
2015-05-23 07:43:58 +08:00
|
|
|
// The exit block of the __unoptimized__ region.
|
2016-05-23 20:42:38 +08:00
|
|
|
BasicBlock *ExitBB = S.getExitingBlock();
|
2015-05-23 07:43:58 +08:00
|
|
|
// The merge block __just after__ the region and the optimized region.
|
2016-05-23 20:42:38 +08:00
|
|
|
BasicBlock *MergeBB = S.getExit();
|
2015-05-23 07:43:58 +08:00
|
|
|
|
|
|
|
// The exit block of the __optimized__ region.
|
|
|
|
BasicBlock *OptExitBB = *(pred_begin(MergeBB));
|
|
|
|
if (OptExitBB == ExitBB)
|
|
|
|
OptExitBB = *(++pred_begin(MergeBB));
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(OptExitBB->getTerminator());
|
|
|
|
for (const auto &EscapeMapping : EscapeMap) {
|
|
|
|
// Extract the escaping instruction and the escaping users as well as the
|
|
|
|
// alloca the instruction was demoted to.
|
2016-08-06 00:45:51 +08:00
|
|
|
Instruction *EscapeInst = EscapeMapping.first;
|
|
|
|
const auto &EscapeMappingValue = EscapeMapping.second;
|
2015-05-23 07:43:58 +08:00
|
|
|
const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second;
|
2015-08-31 13:52:24 +08:00
|
|
|
Value *ScalarAddr = EscapeMappingValue.first;
|
2015-05-23 07:43:58 +08:00
|
|
|
|
|
|
|
// Reload the demoted instruction in the optimized version of the SCoP.
|
2015-10-18 20:36:42 +08:00
|
|
|
Value *EscapeInstReload =
|
2015-05-23 07:43:58 +08:00
|
|
|
Builder.CreateLoad(ScalarAddr, EscapeInst->getName() + ".final_reload");
|
2015-10-18 20:36:42 +08:00
|
|
|
EscapeInstReload =
|
|
|
|
Builder.CreateBitOrPointerCast(EscapeInstReload, EscapeInst->getType());
|
2015-05-23 07:43:58 +08:00
|
|
|
|
|
|
|
// Create the merge PHI that merges the optimized and unoptimized version.
|
|
|
|
PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2,
|
|
|
|
EscapeInst->getName() + ".merge");
|
2015-11-07 06:56:54 +08:00
|
|
|
MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
|
2015-05-23 07:43:58 +08:00
|
|
|
|
|
|
|
// Add the respective values to the merge PHI.
|
|
|
|
MergePHI->addIncoming(EscapeInstReload, OptExitBB);
|
|
|
|
MergePHI->addIncoming(EscapeInst, ExitBB);
|
|
|
|
|
|
|
|
// The information of scalar evolution about the escaping instruction needs
|
|
|
|
// to be revoked so the new merged instruction will be used.
|
|
|
|
if (SE.isSCEVable(EscapeInst->getType()))
|
|
|
|
SE.forgetValue(EscapeInst);
|
|
|
|
|
|
|
|
// Replace all uses of the demoted instruction with the merge PHI.
|
|
|
|
for (Instruction *EUser : EscapeUsers)
|
|
|
|
EUser->replaceUsesOfWith(EscapeInst, MergePHI);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-17 16:54:13 +08:00
|
|
|
void BlockGenerator::findOutsideUsers(Scop &S) {
|
2016-07-30 17:25:51 +08:00
|
|
|
for (auto &Array : S.arrays()) {
|
2015-10-17 16:54:13 +08:00
|
|
|
|
|
|
|
if (Array->getNumberOfDimensions() != 0)
|
|
|
|
continue;
|
|
|
|
|
ScopInfo: Harmonize the different array kinds
Over time different vocabulary has been introduced to describe the different
memory objects in Polly, resulting in different - often inconsistent - naming
schemes in different parts of Polly. We now standartize this to the following
scheme:
KindArray, KindValue, KindPHI, KindExitPHI
| ------- isScalar -----------|
In most cases this naming scheme has already been used previously (this
minimizes changes and ensures we remain consistent with previous publications).
The main change is that we remove KindScalar to clearify the difference between
a scalar as a memory object of kind Value, PHI or ExitPHI and a value (former
KindScalar) which is a memory object modeling a llvm::Value.
We also move all documentation to the Kind* enum in the ScopArrayInfo class,
remove the second enum in the MemoryAccess class and update documentation to be
formulated from the perspective of the memory object, rather than the memory
access. The terms "Implicit"/"Explicit", formerly used to describe memory
accesses, have been dropped. From the perspective of memory accesses they
described the different memory kinds well - especially from the perspective of
code generation - but just from the perspective of a memory object it seems more
straightforward to talk about scalars and arrays, rather than explicit and
implicit arrays. The last comment is clearly subjective, though. A less
subjective reason to go for these terms is the historic use both in mailing list
discussions and publications.
llvm-svn: 255467
2015-12-14 03:59:01 +08:00
|
|
|
if (Array->isPHIKind())
|
2015-10-17 16:54:13 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
auto *Inst = dyn_cast<Instruction>(Array->getBasePtr());
|
|
|
|
|
|
|
|
if (!Inst)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Scop invariant hoisting moves some of the base pointers out of the scop.
|
|
|
|
// We can ignore these, as the invariant load hoisting already registers the
|
|
|
|
// relevant outside users.
|
2016-05-23 20:40:48 +08:00
|
|
|
if (!S.contains(Inst))
|
2015-10-17 16:54:13 +08:00
|
|
|
continue;
|
|
|
|
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
handleOutsideUsers(S, Array);
|
2015-10-17 16:54:13 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-25 01:41:29 +08:00
|
|
|
void BlockGenerator::createExitPHINodeMerges(Scop &S) {
|
|
|
|
if (S.hasSingleExitEdge())
|
|
|
|
return;
|
|
|
|
|
2016-05-23 20:42:38 +08:00
|
|
|
auto *ExitBB = S.getExitingBlock();
|
|
|
|
auto *MergeBB = S.getExit();
|
2015-10-25 01:41:29 +08:00
|
|
|
auto *AfterMergeBB = MergeBB->getSingleSuccessor();
|
|
|
|
BasicBlock *OptExitBB = *(pred_begin(MergeBB));
|
|
|
|
if (OptExitBB == ExitBB)
|
|
|
|
OptExitBB = *(++pred_begin(MergeBB));
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(OptExitBB->getTerminator());
|
|
|
|
|
2016-07-30 17:25:51 +08:00
|
|
|
for (auto &SAI : S.arrays()) {
|
2015-10-25 01:41:29 +08:00
|
|
|
auto *Val = SAI->getBasePtr();
|
|
|
|
|
2016-03-04 01:20:43 +08:00
|
|
|
// Only Value-like scalars need a merge PHI. Exit block PHIs receive either
|
|
|
|
// the original PHI's value or the reloaded incoming values from the
|
|
|
|
// generated code. An llvm::Value is merged between the original code's
|
|
|
|
// value or the generated one.
|
2016-10-13 00:31:09 +08:00
|
|
|
if (!SAI->isExitPHIKind())
|
2016-03-04 01:20:43 +08:00
|
|
|
continue;
|
|
|
|
|
2015-10-25 01:41:29 +08:00
|
|
|
PHINode *PHI = dyn_cast<PHINode>(Val);
|
|
|
|
if (!PHI)
|
|
|
|
continue;
|
|
|
|
|
2015-10-25 03:01:09 +08:00
|
|
|
if (PHI->getParent() != AfterMergeBB)
|
2015-10-25 01:41:29 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
std::string Name = PHI->getName();
|
[Polly] [BlockGenerator] Unify ScalarMap and PhiOpsMap
Instead of keeping two separate maps from Value to Allocas, one for
MemoryType::Value and the other for MemoryType::PHI, we introduce a single map
from ScopArrayInfo to the corresponding Alloca. This change is intended, both as
a general simplification and cleanup, but also to reduce our use of
MemoryAccess::getBaseAddr(). Moving away from using getBaseAddr() makes sure
we have only a single place where the array (and its base pointer) for which we
generate code for is specified, which means we can more easily introduce new
access functions that use a different ScopArrayInfo as base. We already today
experiment with modifiable access functions, so this change does not address
a specific bug, but it just reduces the scope one needs to reason about.
Another motivation for this patch is https://reviews.llvm.org/D28518, where
memory accesses with different base pointers could possibly be mapped to a
single ScopArrayInfo object. Such a mapping is currently not possible, as we
currently generate alloca instructions according to the base addresses of the
memory accesses, not according to the ScopArrayInfo object they belong to. By
making allocas ScopArrayInfo specific, a mapping to a single ScopArrayInfo
object will automatically mean that the same stack slot is used for these
arrays. For D28518 this is not a problem, as only MemoryType::Array objects are
mapping, but resolving this inconsistency will hopefully avoid confusion.
llvm-svn: 293374
2017-01-28 15:42:10 +08:00
|
|
|
Value *ScalarAddr = getOrCreateAlloca(SAI);
|
2015-10-25 01:41:29 +08:00
|
|
|
Value *Reload = Builder.CreateLoad(ScalarAddr, Name + ".ph.final_reload");
|
|
|
|
Reload = Builder.CreateBitOrPointerCast(Reload, PHI->getType());
|
|
|
|
Value *OriginalValue = PHI->getIncomingValueForBlock(MergeBB);
|
2016-03-04 01:20:43 +08:00
|
|
|
assert((!isa<Instruction>(OriginalValue) ||
|
|
|
|
cast<Instruction>(OriginalValue)->getParent() != MergeBB) &&
|
|
|
|
"Original value must no be one we just generated.");
|
2015-10-25 01:41:29 +08:00
|
|
|
auto *MergePHI = PHINode::Create(PHI->getType(), 2, Name + ".ph.merge");
|
2015-11-07 06:56:54 +08:00
|
|
|
MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt());
|
2015-10-25 01:41:29 +08:00
|
|
|
MergePHI->addIncoming(Reload, OptExitBB);
|
|
|
|
MergePHI->addIncoming(OriginalValue, ExitBB);
|
|
|
|
int Idx = PHI->getBasicBlockIndex(MergeBB);
|
|
|
|
PHI->setIncomingValue(Idx, MergePHI);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-18 18:45:57 +08:00
|
|
|
void BlockGenerator::invalidateScalarEvolution(Scop &S) {
|
|
|
|
for (auto &Stmt : S)
|
2016-09-14 14:26:09 +08:00
|
|
|
if (Stmt.isCopyStmt())
|
|
|
|
continue;
|
|
|
|
else if (Stmt.isBlockStmt())
|
2016-08-18 18:45:57 +08:00
|
|
|
for (auto &Inst : *Stmt.getBasicBlock())
|
|
|
|
SE.forgetValue(&Inst);
|
|
|
|
else if (Stmt.isRegionStmt())
|
|
|
|
for (auto *BB : Stmt.getRegion()->blocks())
|
|
|
|
for (auto &Inst : *BB)
|
|
|
|
SE.forgetValue(&Inst);
|
|
|
|
else
|
|
|
|
llvm_unreachable("Unexpected statement type found");
|
2017-05-22 23:36:53 +08:00
|
|
|
|
|
|
|
// Invalidate SCEV of loops surrounding the EscapeUsers.
|
|
|
|
for (const auto &EscapeMapping : EscapeMap) {
|
|
|
|
const EscapeUserVectorTy &EscapeUsers = EscapeMapping.second.second;
|
|
|
|
for (Instruction *EUser : EscapeUsers) {
|
|
|
|
if (Loop *L = LI.getLoopFor(EUser->getParent()))
|
|
|
|
while (L) {
|
|
|
|
SE.forgetLoop(L);
|
|
|
|
L = L->getParentLoop();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-08-18 18:45:57 +08:00
|
|
|
}
|
|
|
|
|
2015-10-17 16:54:13 +08:00
|
|
|
void BlockGenerator::finalizeSCoP(Scop &S) {
|
|
|
|
findOutsideUsers(S);
|
2015-08-31 03:19:34 +08:00
|
|
|
createScalarInitialization(S);
|
2015-10-25 01:41:29 +08:00
|
|
|
createExitPHINodeMerges(S);
|
2016-05-23 20:42:38 +08:00
|
|
|
createScalarFinalization(S);
|
2016-08-18 18:45:57 +08:00
|
|
|
invalidateScalarEvolution(S);
|
2012-04-25 21:16:49 +08:00
|
|
|
}
|
|
|
|
|
2015-02-07 05:39:31 +08:00
|
|
|
VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen,
|
|
|
|
std::vector<LoopToScevMapT> &VLTS,
|
|
|
|
isl_map *Schedule)
|
2015-09-05 17:56:54 +08:00
|
|
|
: BlockGenerator(BlockGen), VLTS(VLTS), Schedule(Schedule) {
|
2012-12-18 15:46:06 +08:00
|
|
|
assert(Schedule && "No statement domain provided");
|
|
|
|
}
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2015-10-04 18:18:45 +08:00
|
|
|
Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old,
|
2013-05-07 15:30:56 +08:00
|
|
|
ValueMapT &VectorMap,
|
|
|
|
VectorValueMapT &ScalarMaps,
|
|
|
|
Loop *L) {
|
2013-06-29 21:22:15 +08:00
|
|
|
if (Value *NewValue = VectorMap.lookup(Old))
|
|
|
|
return NewValue;
|
2012-04-25 21:16:49 +08:00
|
|
|
|
|
|
|
int Width = getVectorWidth();
|
|
|
|
|
|
|
|
Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
|
|
|
|
|
|
|
|
for (int Lane = 0; Lane < Width; Lane++)
|
2013-02-06 02:01:29 +08:00
|
|
|
Vector = Builder.CreateInsertElement(
|
2015-09-05 17:56:54 +08:00
|
|
|
Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], VLTS[Lane], L),
|
2013-02-22 16:07:06 +08:00
|
|
|
Builder.getInt32(Lane));
|
2012-04-25 21:16:49 +08:00
|
|
|
|
|
|
|
VectorMap[Old] = Vector;
|
|
|
|
|
|
|
|
return Vector;
|
|
|
|
}
|
|
|
|
|
|
|
|
Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
|
|
|
|
PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
|
|
|
|
assert(PointerTy && "PointerType expected");
|
|
|
|
|
|
|
|
Type *ScalarType = PointerTy->getElementType();
|
|
|
|
VectorType *VectorType = VectorType::get(ScalarType, Width);
|
|
|
|
|
|
|
|
return PointerType::getUnqual(VectorType);
|
|
|
|
}
|
|
|
|
|
2015-02-07 05:39:31 +08:00
|
|
|
Value *VectorBlockGenerator::generateStrideOneLoad(
|
2015-10-04 18:18:45 +08:00
|
|
|
ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
|
2015-08-27 15:28:16 +08:00
|
|
|
__isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) {
|
2014-03-20 03:27:24 +08:00
|
|
|
unsigned VectorWidth = getVectorWidth();
|
2015-10-04 18:18:39 +08:00
|
|
|
auto *Pointer = Load->getPointerOperand();
|
2014-03-20 03:27:24 +08:00
|
|
|
Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
|
|
|
|
unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
|
|
|
|
|
2016-02-25 23:52:43 +08:00
|
|
|
Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[Offset],
|
|
|
|
VLTS[Offset], NewAccesses);
|
2013-02-06 02:01:29 +08:00
|
|
|
Value *VectorPtr =
|
|
|
|
Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
|
|
|
|
LoadInst *VecLoad =
|
|
|
|
Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
|
2012-04-25 21:16:49 +08:00
|
|
|
if (!Aligned)
|
|
|
|
VecLoad->setAlignment(8);
|
|
|
|
|
2014-03-20 03:27:24 +08:00
|
|
|
if (NegativeStride) {
|
|
|
|
SmallVector<Constant *, 16> Indices;
|
|
|
|
for (int i = VectorWidth - 1; i >= 0; i--)
|
|
|
|
Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
|
|
|
|
Constant *SV = llvm::ConstantVector::get(Indices);
|
|
|
|
Value *RevVecLoad = Builder.CreateShuffleVector(
|
|
|
|
VecLoad, VecLoad, SV, Load->getName() + "_reverse");
|
|
|
|
return RevVecLoad;
|
|
|
|
}
|
|
|
|
|
2012-04-25 21:16:49 +08:00
|
|
|
return VecLoad;
|
|
|
|
}
|
|
|
|
|
2015-08-27 15:28:16 +08:00
|
|
|
Value *VectorBlockGenerator::generateStrideZeroLoad(
|
2015-10-04 18:18:45 +08:00
|
|
|
ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap,
|
2015-08-27 15:28:16 +08:00
|
|
|
__isl_keep isl_id_to_ast_expr *NewAccesses) {
|
2015-10-04 18:18:39 +08:00
|
|
|
auto *Pointer = Load->getPointerOperand();
|
2012-04-25 21:16:49 +08:00
|
|
|
Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
|
2016-01-28 01:09:17 +08:00
|
|
|
Value *NewPointer =
|
|
|
|
generateLocationAccessed(Stmt, Load, BBMap, VLTS[0], NewAccesses);
|
2012-04-25 21:16:49 +08:00
|
|
|
Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
|
|
|
|
Load->getName() + "_p_vec_p");
|
2013-02-06 02:01:29 +08:00
|
|
|
LoadInst *ScalarLoad =
|
|
|
|
Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
|
2012-04-25 21:16:49 +08:00
|
|
|
|
|
|
|
if (!Aligned)
|
|
|
|
ScalarLoad->setAlignment(8);
|
|
|
|
|
2013-02-06 02:01:29 +08:00
|
|
|
Constant *SplatVector = Constant::getNullValue(
|
|
|
|
VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2013-02-06 02:01:29 +08:00
|
|
|
Value *VectorLoad = Builder.CreateShuffleVector(
|
|
|
|
ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
|
2012-04-25 21:16:49 +08:00
|
|
|
return VectorLoad;
|
|
|
|
}
|
|
|
|
|
2015-02-07 05:39:31 +08:00
|
|
|
Value *VectorBlockGenerator::generateUnknownStrideLoad(
|
2015-10-04 18:18:45 +08:00
|
|
|
ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
|
2015-10-08 04:17:36 +08:00
|
|
|
__isl_keep isl_id_to_ast_expr *NewAccesses) {
|
2012-04-25 21:16:49 +08:00
|
|
|
int VectorWidth = getVectorWidth();
|
2015-10-04 18:18:39 +08:00
|
|
|
auto *Pointer = Load->getPointerOperand();
|
2012-04-25 21:16:49 +08:00
|
|
|
VectorType *VectorType = VectorType::get(
|
2013-02-06 02:01:29 +08:00
|
|
|
dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
|
2012-04-25 21:16:49 +08:00
|
|
|
|
|
|
|
Value *Vector = UndefValue::get(VectorType);
|
|
|
|
|
|
|
|
for (int i = 0; i < VectorWidth; i++) {
|
2016-01-28 01:09:17 +08:00
|
|
|
Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[i],
|
|
|
|
VLTS[i], NewAccesses);
|
2013-02-06 02:01:29 +08:00
|
|
|
Value *ScalarLoad =
|
|
|
|
Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
|
|
|
|
Vector = Builder.CreateInsertElement(
|
|
|
|
Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
|
2012-04-25 21:16:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return Vector;
|
|
|
|
}
|
|
|
|
|
2015-08-27 15:28:16 +08:00
|
|
|
void VectorBlockGenerator::generateLoad(
|
2015-10-04 18:18:45 +08:00
|
|
|
ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
|
2015-08-27 15:28:16 +08:00
|
|
|
VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
|
2015-09-30 07:47:21 +08:00
|
|
|
if (Value *PreloadLoad = GlobalMap.lookup(Load)) {
|
|
|
|
VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad,
|
|
|
|
Load->getName() + "_p");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-03-23 15:00:36 +08:00
|
|
|
if (!VectorType::isValidElementType(Load->getType())) {
|
2012-04-25 21:16:49 +08:00
|
|
|
for (int i = 0; i < getVectorWidth(); i++)
|
2015-09-05 17:56:54 +08:00
|
|
|
ScalarMaps[i][Load] =
|
2016-09-30 22:34:05 +08:00
|
|
|
generateArrayLoad(Stmt, Load, ScalarMaps[i], VLTS[i], NewAccesses);
|
2012-04-25 21:16:49 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-12-16 07:50:01 +08:00
|
|
|
const MemoryAccess &Access = Stmt.getArrayAccessFor(Load);
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2014-04-18 17:46:35 +08:00
|
|
|
// Make sure we have scalar values available to access the pointer to
|
|
|
|
// the data location.
|
|
|
|
extractScalarValues(Load, VectorMap, ScalarMaps);
|
|
|
|
|
2012-04-25 21:16:49 +08:00
|
|
|
Value *NewLoad;
|
2018-02-20 15:26:58 +08:00
|
|
|
if (Access.isStrideZero(isl::manage_copy(Schedule)))
|
2015-08-27 15:28:16 +08:00
|
|
|
NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0], NewAccesses);
|
2018-02-20 15:26:58 +08:00
|
|
|
else if (Access.isStrideOne(isl::manage_copy(Schedule)))
|
2015-08-27 15:28:16 +08:00
|
|
|
NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses);
|
2018-02-20 15:26:58 +08:00
|
|
|
else if (Access.isStrideX(isl::manage_copy(Schedule), -1))
|
2015-08-27 15:28:16 +08:00
|
|
|
NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, NewAccesses, true);
|
2012-04-25 21:16:49 +08:00
|
|
|
else
|
2015-08-27 15:28:16 +08:00
|
|
|
NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps, NewAccesses);
|
2012-04-25 21:16:49 +08:00
|
|
|
|
|
|
|
VectorMap[Load] = NewLoad;
|
|
|
|
}
|
|
|
|
|
2015-10-04 18:18:45 +08:00
|
|
|
void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
|
2012-04-25 21:16:49 +08:00
|
|
|
ValueMapT &VectorMap,
|
|
|
|
VectorValueMapT &ScalarMaps) {
|
|
|
|
int VectorWidth = getVectorWidth();
|
2015-02-07 05:39:31 +08:00
|
|
|
Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap,
|
2016-02-16 20:36:14 +08:00
|
|
|
ScalarMaps, getLoopForStmt(Stmt));
|
2012-04-25 21:16:49 +08:00
|
|
|
|
|
|
|
assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
|
|
|
|
|
|
|
|
const CastInst *Cast = dyn_cast<CastInst>(Inst);
|
|
|
|
VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
|
|
|
|
VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
|
|
|
|
}
|
|
|
|
|
2015-10-04 18:18:45 +08:00
|
|
|
void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
|
2012-04-25 21:16:49 +08:00
|
|
|
ValueMapT &VectorMap,
|
|
|
|
VectorValueMapT &ScalarMaps) {
|
2016-02-16 20:36:14 +08:00
|
|
|
Loop *L = getLoopForStmt(Stmt);
|
2012-04-25 21:16:49 +08:00
|
|
|
Value *OpZero = Inst->getOperand(0);
|
|
|
|
Value *OpOne = Inst->getOperand(1);
|
|
|
|
|
|
|
|
Value *NewOpZero, *NewOpOne;
|
2015-02-07 05:39:31 +08:00
|
|
|
NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L);
|
|
|
|
NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L);
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2012-12-30 07:47:38 +08:00
|
|
|
Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
|
2012-04-25 21:16:49 +08:00
|
|
|
Inst->getName() + "p_vec");
|
|
|
|
VectorMap[Inst] = NewInst;
|
|
|
|
}
|
|
|
|
|
2015-08-27 15:28:16 +08:00
|
|
|
void VectorBlockGenerator::copyStore(
|
2015-10-04 18:18:45 +08:00
|
|
|
ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
|
2015-08-27 15:28:16 +08:00
|
|
|
VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
|
2015-12-16 07:50:01 +08:00
|
|
|
const MemoryAccess &Access = Stmt.getArrayAccessFor(Store);
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2015-10-04 18:18:39 +08:00
|
|
|
auto *Pointer = Store->getPointerOperand();
|
2015-02-07 05:39:31 +08:00
|
|
|
Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap,
|
2016-02-16 20:36:14 +08:00
|
|
|
ScalarMaps, getLoopForStmt(Stmt));
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2014-04-18 07:13:49 +08:00
|
|
|
// Make sure we have scalar values available to access the pointer to
|
|
|
|
// the data location.
|
|
|
|
extractScalarValues(Store, VectorMap, ScalarMaps);
|
|
|
|
|
2018-02-20 15:26:58 +08:00
|
|
|
if (Access.isStrideOne(isl::manage_copy(Schedule))) {
|
2014-10-09 04:18:32 +08:00
|
|
|
Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
|
2016-01-28 01:09:17 +08:00
|
|
|
Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[0],
|
|
|
|
VLTS[0], NewAccesses);
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2013-02-06 02:01:29 +08:00
|
|
|
Value *VectorPtr =
|
|
|
|
Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
|
2012-04-25 21:16:49 +08:00
|
|
|
StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
|
|
|
|
|
|
|
|
if (!Aligned)
|
|
|
|
Store->setAlignment(8);
|
|
|
|
} else {
|
|
|
|
for (unsigned i = 0; i < ScalarMaps.size(); i++) {
|
2012-12-30 07:47:38 +08:00
|
|
|
Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
|
2016-01-28 01:09:17 +08:00
|
|
|
Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[i],
|
|
|
|
VLTS[i], NewAccesses);
|
2012-04-25 21:16:49 +08:00
|
|
|
Builder.CreateStore(Scalar, NewPointer);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
|
|
|
|
ValueMapT &VectorMap) {
|
2014-06-04 16:06:40 +08:00
|
|
|
for (Value *Operand : Inst->operands())
|
|
|
|
if (VectorMap.count(Operand))
|
2012-04-25 21:16:49 +08:00
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
|
|
|
|
ValueMapT &VectorMap,
|
|
|
|
VectorValueMapT &ScalarMaps) {
|
|
|
|
bool HasVectorOperand = false;
|
|
|
|
int VectorWidth = getVectorWidth();
|
|
|
|
|
2014-06-04 16:06:40 +08:00
|
|
|
for (Value *Operand : Inst->operands()) {
|
|
|
|
ValueMapT::iterator VecOp = VectorMap.find(Operand);
|
2012-04-25 21:16:49 +08:00
|
|
|
|
|
|
|
if (VecOp == VectorMap.end())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
HasVectorOperand = true;
|
|
|
|
Value *NewVector = VecOp->second;
|
|
|
|
|
|
|
|
for (int i = 0; i < VectorWidth; ++i) {
|
|
|
|
ValueMapT &SM = ScalarMaps[i];
|
|
|
|
|
|
|
|
// If there is one scalar extracted, all scalar elements should have
|
|
|
|
// already been extracted by the code here. So no need to check for the
|
2016-08-03 13:28:09 +08:00
|
|
|
// existence of all of them.
|
2014-06-04 16:06:40 +08:00
|
|
|
if (SM.count(Operand))
|
2012-04-25 21:16:49 +08:00
|
|
|
break;
|
|
|
|
|
2014-06-04 16:06:40 +08:00
|
|
|
SM[Operand] =
|
|
|
|
Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
|
2012-04-25 21:16:49 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return HasVectorOperand;
|
|
|
|
}
|
|
|
|
|
2015-08-27 15:28:16 +08:00
|
|
|
void VectorBlockGenerator::copyInstScalarized(
|
2015-10-04 18:18:45 +08:00
|
|
|
ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
|
2015-08-27 15:28:16 +08:00
|
|
|
VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
|
2012-04-25 21:16:49 +08:00
|
|
|
bool HasVectorOperand;
|
|
|
|
int VectorWidth = getVectorWidth();
|
|
|
|
|
|
|
|
HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
|
|
|
|
|
|
|
|
for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
|
2015-02-07 05:39:31 +08:00
|
|
|
BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane],
|
2015-09-05 17:56:54 +08:00
|
|
|
VLTS[VectorLane], NewAccesses);
|
2012-04-25 21:16:49 +08:00
|
|
|
|
|
|
|
if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Make the result available as vector value.
|
|
|
|
VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
|
|
|
|
Value *Vector = UndefValue::get(VectorType);
|
|
|
|
|
|
|
|
for (int i = 0; i < VectorWidth; i++)
|
|
|
|
Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
|
|
|
|
Builder.getInt32(i));
|
|
|
|
|
|
|
|
VectorMap[Inst] = Vector;
|
|
|
|
}
|
|
|
|
|
2015-09-05 17:56:54 +08:00
|
|
|
int VectorBlockGenerator::getVectorWidth() { return VLTS.size(); }
|
2012-04-25 21:16:49 +08:00
|
|
|
|
2015-08-27 15:28:16 +08:00
|
|
|
void VectorBlockGenerator::copyInstruction(
|
2015-10-04 18:18:45 +08:00
|
|
|
ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
|
2015-08-27 15:28:16 +08:00
|
|
|
VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) {
|
2012-04-25 21:16:49 +08:00
|
|
|
// Terminator instructions control the control flow. They are explicitly
|
|
|
|
// expressed in the clast and do not need to be copied.
|
|
|
|
if (Inst->isTerminator())
|
|
|
|
return;
|
|
|
|
|
2015-12-23 03:08:49 +08:00
|
|
|
if (canSyntheziseInStmt(Stmt, Inst))
|
2012-04-28 00:36:14 +08:00
|
|
|
return;
|
|
|
|
|
2015-10-04 18:18:39 +08:00
|
|
|
if (auto *Load = dyn_cast<LoadInst>(Inst)) {
|
2015-08-27 15:28:16 +08:00
|
|
|
generateLoad(Stmt, Load, VectorMap, ScalarMaps, NewAccesses);
|
2012-04-25 21:16:49 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hasVectorOperands(Inst, VectorMap)) {
|
2015-10-04 18:18:39 +08:00
|
|
|
if (auto *Store = dyn_cast<StoreInst>(Inst)) {
|
2017-03-11 00:05:24 +08:00
|
|
|
// Identified as redundant by -polly-simplify.
|
|
|
|
if (!Stmt.getArrayAccessOrNULLFor(Store))
|
|
|
|
return;
|
|
|
|
|
2015-08-27 15:28:16 +08:00
|
|
|
copyStore(Stmt, Store, VectorMap, ScalarMaps, NewAccesses);
|
2012-04-25 21:16:49 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-10-04 18:18:39 +08:00
|
|
|
if (auto *Unary = dyn_cast<UnaryInstruction>(Inst)) {
|
2015-02-07 05:39:31 +08:00
|
|
|
copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps);
|
2012-04-25 21:16:49 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-10-04 18:18:39 +08:00
|
|
|
if (auto *Binary = dyn_cast<BinaryOperator>(Inst)) {
|
2015-02-07 05:39:31 +08:00
|
|
|
copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps);
|
2012-04-25 21:16:49 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-06-08 20:06:15 +08:00
|
|
|
// Fallthrough: We generate scalar instructions, if we don't know how to
|
2012-04-25 21:16:49 +08:00
|
|
|
// generate vector code.
|
|
|
|
}
|
|
|
|
|
2015-08-27 15:28:16 +08:00
|
|
|
copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses);
|
2012-04-25 21:16:49 +08:00
|
|
|
}
|
|
|
|
|
2015-12-16 07:49:58 +08:00
|
|
|
void VectorBlockGenerator::generateScalarVectorLoads(
|
|
|
|
ScopStmt &Stmt, ValueMapT &VectorBlockMap) {
|
|
|
|
for (MemoryAccess *MA : Stmt) {
|
|
|
|
if (MA->isArrayKind() || MA->isWrite())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
auto *Address = getOrCreateAlloca(*MA);
|
|
|
|
Type *VectorPtrType = getVectorPtrTy(Address, 1);
|
|
|
|
Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType,
|
|
|
|
Address->getName() + "_p_vec_p");
|
|
|
|
auto *Val = Builder.CreateLoad(VectorPtr, Address->getName() + ".reload");
|
|
|
|
Constant *SplatVector = Constant::getNullValue(
|
|
|
|
VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
|
|
|
|
|
|
|
|
Value *VectorVal = Builder.CreateShuffleVector(
|
|
|
|
Val, Val, SplatVector, Address->getName() + "_p_splat");
|
2017-02-10 07:54:23 +08:00
|
|
|
VectorBlockMap[MA->getAccessValue()] = VectorVal;
|
2015-12-16 07:49:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) {
|
|
|
|
for (MemoryAccess *MA : Stmt) {
|
|
|
|
if (MA->isArrayKind() || MA->isRead())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
llvm_unreachable("Scalar stores not expected in vector loop");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-27 15:28:16 +08:00
|
|
|
void VectorBlockGenerator::copyStmt(
|
|
|
|
ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) {
|
2017-01-16 22:08:10 +08:00
|
|
|
assert(Stmt.isBlockStmt() &&
|
|
|
|
"TODO: Only block statements can be copied by the vector block "
|
|
|
|
"generator");
|
2015-02-25 00:16:32 +08:00
|
|
|
|
2015-02-07 05:39:31 +08:00
|
|
|
BasicBlock *BB = Stmt.getBasicBlock();
|
2015-11-07 06:56:54 +08:00
|
|
|
BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
|
|
|
|
&*Builder.GetInsertPoint(), &DT, &LI);
|
2012-04-25 21:16:49 +08:00
|
|
|
CopyBB->setName("polly.stmt." + BB->getName());
|
2015-11-07 06:56:54 +08:00
|
|
|
Builder.SetInsertPoint(&CopyBB->front());
|
2012-04-25 21:16:49 +08:00
|
|
|
|
|
|
|
// Create two maps that store the mapping from the original instructions of
|
|
|
|
// the old basic block to their copies in the new basic block. Those maps
|
|
|
|
// are basic block local.
|
|
|
|
//
|
|
|
|
// As vector code generation is supported there is one map for scalar values
|
|
|
|
// and one for vector values.
|
|
|
|
//
|
|
|
|
// In case we just do scalar code generation, the vectorMap is not used and
|
|
|
|
// the scalarMap has just one dimension, which contains the mapping.
|
|
|
|
//
|
|
|
|
// In case vector code generation is done, an instruction may either appear
|
|
|
|
// in the vector map once (as it is calculating >vectorwidth< values at a
|
|
|
|
// time. Or (if the values are calculated using scalar operations), it
|
|
|
|
// appears once in every dimension of the scalarMap.
|
|
|
|
VectorValueMapT ScalarBlockMap(getVectorWidth());
|
|
|
|
ValueMapT VectorBlockMap;
|
|
|
|
|
2015-12-16 07:49:58 +08:00
|
|
|
generateScalarVectorLoads(Stmt, VectorBlockMap);
|
|
|
|
|
2014-06-04 16:06:40 +08:00
|
|
|
for (Instruction &Inst : *BB)
|
2015-08-27 15:28:16 +08:00
|
|
|
copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap, NewAccesses);
|
2015-12-16 07:49:58 +08:00
|
|
|
|
|
|
|
verifyNoScalarStores(Stmt);
|
2012-04-25 21:16:49 +08:00
|
|
|
}
|
2015-02-25 00:16:32 +08:00
|
|
|
|
2015-05-23 07:43:58 +08:00
|
|
|
BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB,
|
|
|
|
BasicBlock *BBCopy) {
|
2015-02-28 02:29:04 +08:00
|
|
|
|
|
|
|
BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock();
|
2017-06-07 01:17:30 +08:00
|
|
|
BasicBlock *BBCopyIDom = EndBlockMap.lookup(BBIDom);
|
2015-02-28 02:29:04 +08:00
|
|
|
|
|
|
|
if (BBCopyIDom)
|
|
|
|
DT.changeImmediateDominator(BBCopy, BBCopyIDom);
|
|
|
|
|
2017-06-07 01:17:30 +08:00
|
|
|
return StartBlockMap.lookup(BBIDom);
|
2015-02-28 02:29:04 +08:00
|
|
|
}
|
|
|
|
|
2016-02-25 22:08:48 +08:00
|
|
|
// This is to determine whether an llvm::Value (defined in @p BB) is usable when
|
|
|
|
// leaving a subregion. The straight-forward DT.dominates(BB, R->getExitBlock())
|
|
|
|
// does not work in cases where the exit block has edges from outside the
|
|
|
|
// region. In that case the llvm::Value would never be usable in in the exit
|
|
|
|
// block. The RegionGenerator however creates an new exit block ('ExitBBCopy')
|
|
|
|
// for the subregion's exiting edges only. We need to determine whether an
|
|
|
|
// llvm::Value is usable in there. We do this by checking whether it dominates
|
|
|
|
// all exiting blocks individually.
|
|
|
|
static bool isDominatingSubregionExit(const DominatorTree &DT, Region *R,
|
|
|
|
BasicBlock *BB) {
|
|
|
|
for (auto ExitingBB : predecessors(R->getExit())) {
|
|
|
|
// Check for non-subregion incoming edges.
|
|
|
|
if (!R->contains(ExitingBB))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!DT.dominates(BB, ExitingBB))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find the direct dominator of the subregion's exit block if the subregion was
|
|
|
|
// simplified.
|
|
|
|
static BasicBlock *findExitDominator(DominatorTree &DT, Region *R) {
|
|
|
|
BasicBlock *Common = nullptr;
|
|
|
|
for (auto ExitingBB : predecessors(R->getExit())) {
|
|
|
|
// Check for non-subregion incoming edges.
|
|
|
|
if (!R->contains(ExitingBB))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// First exiting edge.
|
|
|
|
if (!Common) {
|
|
|
|
Common = ExitingBB;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
Common = DT.findNearestCommonDominator(Common, ExitingBB);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(Common && R->contains(Common));
|
|
|
|
return Common;
|
|
|
|
}
|
|
|
|
|
2015-09-05 17:56:54 +08:00
|
|
|
void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S,
|
2015-08-27 15:28:16 +08:00
|
|
|
isl_id_to_ast_expr *IdToAstExp) {
|
2015-02-25 00:16:32 +08:00
|
|
|
assert(Stmt.isRegionStmt() &&
|
2015-08-01 14:26:51 +08:00
|
|
|
"Only region statements can be copied by the region generator");
|
2015-02-25 00:16:32 +08:00
|
|
|
|
2015-05-23 07:43:58 +08:00
|
|
|
// Forget all old mappings.
|
2017-06-07 01:17:30 +08:00
|
|
|
StartBlockMap.clear();
|
|
|
|
EndBlockMap.clear();
|
2015-05-23 07:43:58 +08:00
|
|
|
RegionMaps.clear();
|
|
|
|
IncompletePHINodeMap.clear();
|
|
|
|
|
2015-10-18 05:36:00 +08:00
|
|
|
// Collection of all values related to this subregion.
|
|
|
|
ValueMapT ValueMap;
|
|
|
|
|
2015-02-25 00:16:32 +08:00
|
|
|
// The region represented by the statement.
|
|
|
|
Region *R = Stmt.getRegion();
|
|
|
|
|
2015-05-23 07:43:58 +08:00
|
|
|
// Create a dedicated entry for the region where we can reload all demoted
|
|
|
|
// inputs.
|
|
|
|
BasicBlock *EntryBB = R->getEntry();
|
2015-11-07 06:56:54 +08:00
|
|
|
BasicBlock *EntryBBCopy = SplitBlock(Builder.GetInsertBlock(),
|
|
|
|
&*Builder.GetInsertPoint(), &DT, &LI);
|
2015-05-23 07:43:58 +08:00
|
|
|
EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry");
|
2015-11-07 06:56:54 +08:00
|
|
|
Builder.SetInsertPoint(&EntryBBCopy->front());
|
2015-02-28 02:29:04 +08:00
|
|
|
|
2015-11-10 07:33:40 +08:00
|
|
|
ValueMapT &EntryBBMap = RegionMaps[EntryBBCopy];
|
2016-09-02 03:53:31 +08:00
|
|
|
generateScalarLoads(Stmt, LTS, EntryBBMap, IdToAstExp);
|
2018-04-26 03:43:49 +08:00
|
|
|
generateBeginStmtTrace(Stmt, LTS, EntryBBMap);
|
2015-10-18 05:36:00 +08:00
|
|
|
|
2015-05-23 07:43:58 +08:00
|
|
|
for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI)
|
2017-06-07 01:17:30 +08:00
|
|
|
if (!R->contains(*PI)) {
|
|
|
|
StartBlockMap[*PI] = EntryBBCopy;
|
|
|
|
EndBlockMap[*PI] = EntryBBCopy;
|
|
|
|
}
|
2015-02-25 00:16:32 +08:00
|
|
|
|
|
|
|
// Iterate over all blocks in the region in a breadth-first search.
|
|
|
|
std::deque<BasicBlock *> Blocks;
|
2016-10-20 01:56:49 +08:00
|
|
|
SmallSetVector<BasicBlock *, 8> SeenBlocks;
|
2015-05-23 07:43:58 +08:00
|
|
|
Blocks.push_back(EntryBB);
|
|
|
|
SeenBlocks.insert(EntryBB);
|
2015-02-25 00:16:32 +08:00
|
|
|
|
|
|
|
while (!Blocks.empty()) {
|
|
|
|
BasicBlock *BB = Blocks.front();
|
|
|
|
Blocks.pop_front();
|
|
|
|
|
2015-02-28 02:29:04 +08:00
|
|
|
// First split the block and update dominance information.
|
|
|
|
BasicBlock *BBCopy = splitBB(BB);
|
2015-05-23 07:43:58 +08:00
|
|
|
BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy);
|
|
|
|
|
2015-11-10 07:33:40 +08:00
|
|
|
// Get the mapping for this block and initialize it with either the scalar
|
|
|
|
// loads from the generated entering block (which dominates all blocks of
|
|
|
|
// this subregion) or the maps of the immediate dominator, if part of the
|
|
|
|
// subregion. The latter necessarily includes the former.
|
|
|
|
ValueMapT *InitBBMap;
|
|
|
|
if (BBCopyIDom) {
|
|
|
|
assert(RegionMaps.count(BBCopyIDom));
|
|
|
|
InitBBMap = &RegionMaps[BBCopyIDom];
|
|
|
|
} else
|
|
|
|
InitBBMap = &EntryBBMap;
|
|
|
|
auto Inserted = RegionMaps.insert(std::make_pair(BBCopy, *InitBBMap));
|
|
|
|
ValueMapT &RegionMap = Inserted.first->second;
|
2015-02-28 02:29:04 +08:00
|
|
|
|
2015-02-25 00:16:32 +08:00
|
|
|
// Copy the block with the BlockGenerator.
|
2015-11-07 06:56:54 +08:00
|
|
|
Builder.SetInsertPoint(&BBCopy->front());
|
2015-09-05 17:56:54 +08:00
|
|
|
copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp);
|
2015-02-25 00:16:32 +08:00
|
|
|
|
2015-05-23 07:43:58 +08:00
|
|
|
// In order to remap PHI nodes we store also basic block mappings.
|
2017-06-07 01:17:30 +08:00
|
|
|
StartBlockMap[BB] = BBCopy;
|
|
|
|
EndBlockMap[BB] = Builder.GetInsertBlock();
|
2015-05-23 07:43:58 +08:00
|
|
|
|
|
|
|
// Add values to incomplete PHI nodes waiting for this block to be copied.
|
|
|
|
for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB])
|
2015-09-05 17:56:54 +08:00
|
|
|
addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, LTS);
|
2015-05-23 07:43:58 +08:00
|
|
|
IncompletePHINodeMap[BB].clear();
|
|
|
|
|
2015-02-25 00:16:32 +08:00
|
|
|
// And continue with new successors inside the region.
|
|
|
|
for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++)
|
2016-10-20 01:56:49 +08:00
|
|
|
if (R->contains(*SI) && SeenBlocks.insert(*SI))
|
2015-02-25 00:16:32 +08:00
|
|
|
Blocks.push_back(*SI);
|
2015-11-10 06:37:29 +08:00
|
|
|
|
|
|
|
// Remember value in case it is visible after this subregion.
|
2016-02-25 22:08:48 +08:00
|
|
|
if (isDominatingSubregionExit(DT, R, BB))
|
2015-11-10 06:37:29 +08:00
|
|
|
ValueMap.insert(RegionMap.begin(), RegionMap.end());
|
2015-02-25 00:16:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Now create a new dedicated region exit block and add it to the region map.
|
2015-11-07 06:56:54 +08:00
|
|
|
BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(),
|
|
|
|
&*Builder.GetInsertPoint(), &DT, &LI);
|
2015-05-23 07:43:58 +08:00
|
|
|
ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit");
|
2017-06-07 01:17:30 +08:00
|
|
|
StartBlockMap[R->getExit()] = ExitBBCopy;
|
|
|
|
EndBlockMap[R->getExit()] = ExitBBCopy;
|
2015-02-28 02:29:04 +08:00
|
|
|
|
2017-06-07 01:17:30 +08:00
|
|
|
BasicBlock *ExitDomBBCopy = EndBlockMap.lookup(findExitDominator(DT, R));
|
2017-01-16 22:08:10 +08:00
|
|
|
assert(ExitDomBBCopy &&
|
|
|
|
"Common exit dominator must be within region; at least the entry node "
|
|
|
|
"must match");
|
2016-02-25 22:08:48 +08:00
|
|
|
DT.changeImmediateDominator(ExitBBCopy, ExitDomBBCopy);
|
2015-02-25 00:16:32 +08:00
|
|
|
|
|
|
|
// As the block generator doesn't handle control flow we need to add the
|
|
|
|
// region control flow by hand after all blocks have been copied.
|
|
|
|
for (BasicBlock *BB : SeenBlocks) {
|
|
|
|
|
2017-06-07 01:17:30 +08:00
|
|
|
BasicBlock *BBCopyStart = StartBlockMap[BB];
|
|
|
|
BasicBlock *BBCopyEnd = EndBlockMap[BB];
|
2018-10-15 18:42:50 +08:00
|
|
|
Instruction *TI = BB->getTerminator();
|
2015-09-14 19:15:58 +08:00
|
|
|
if (isa<UnreachableInst>(TI)) {
|
2017-06-07 01:17:30 +08:00
|
|
|
while (!BBCopyEnd->empty())
|
|
|
|
BBCopyEnd->begin()->eraseFromParent();
|
|
|
|
new UnreachableInst(BBCopyEnd->getContext(), BBCopyEnd);
|
2015-09-14 19:15:58 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-06-07 01:17:30 +08:00
|
|
|
Instruction *BICopy = BBCopyEnd->getTerminator();
|
2015-02-25 00:16:32 +08:00
|
|
|
|
2017-06-07 01:17:30 +08:00
|
|
|
ValueMapT &RegionMap = RegionMaps[BBCopyStart];
|
|
|
|
RegionMap.insert(StartBlockMap.begin(), StartBlockMap.end());
|
2015-02-28 02:29:04 +08:00
|
|
|
|
2015-08-01 17:07:57 +08:00
|
|
|
Builder.SetInsertPoint(BICopy);
|
2015-09-28 17:33:22 +08:00
|
|
|
copyInstScalar(Stmt, TI, RegionMap, LTS);
|
2015-02-25 00:16:32 +08:00
|
|
|
BICopy->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2015-05-23 07:43:58 +08:00
|
|
|
// Add counting PHI nodes to all loops in the region that can be used as
|
2017-06-08 20:06:15 +08:00
|
|
|
// replacement for SCEVs referring to the old loop.
|
2015-05-23 07:43:58 +08:00
|
|
|
for (BasicBlock *BB : SeenBlocks) {
|
|
|
|
Loop *L = LI.getLoopFor(BB);
|
2015-11-12 15:34:09 +08:00
|
|
|
if (L == nullptr || L->getHeader() != BB || !R->contains(L))
|
2015-05-23 07:43:58 +08:00
|
|
|
continue;
|
|
|
|
|
2017-06-07 01:17:30 +08:00
|
|
|
BasicBlock *BBCopy = StartBlockMap[BB];
|
2015-05-23 07:43:58 +08:00
|
|
|
Value *NullVal = Builder.getInt32(0);
|
|
|
|
PHINode *LoopPHI =
|
|
|
|
PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv");
|
|
|
|
Instruction *LoopPHIInc = BinaryOperator::CreateAdd(
|
|
|
|
LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc");
|
2015-11-07 06:56:54 +08:00
|
|
|
LoopPHI->insertBefore(&BBCopy->front());
|
2015-05-23 07:43:58 +08:00
|
|
|
LoopPHIInc->insertBefore(BBCopy->getTerminator());
|
|
|
|
|
|
|
|
for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) {
|
|
|
|
if (!R->contains(PredBB))
|
|
|
|
continue;
|
|
|
|
if (L->contains(PredBB))
|
2017-06-07 01:17:30 +08:00
|
|
|
LoopPHI->addIncoming(LoopPHIInc, EndBlockMap[PredBB]);
|
2015-05-23 07:43:58 +08:00
|
|
|
else
|
2017-06-07 01:17:30 +08:00
|
|
|
LoopPHI->addIncoming(NullVal, EndBlockMap[PredBB]);
|
2015-05-23 07:43:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy)))
|
|
|
|
if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0)
|
|
|
|
LoopPHI->addIncoming(NullVal, PredBBCopy);
|
|
|
|
|
|
|
|
LTS[L] = SE.getUnknown(LoopPHI);
|
|
|
|
}
|
|
|
|
|
2015-10-18 05:36:00 +08:00
|
|
|
// Continue generating code in the exit block.
|
2015-11-07 06:56:54 +08:00
|
|
|
Builder.SetInsertPoint(&*ExitBBCopy->getFirstInsertionPt());
|
2015-05-23 07:43:58 +08:00
|
|
|
|
2015-10-18 05:36:00 +08:00
|
|
|
// Write values visible to other statements.
|
2016-09-02 03:53:31 +08:00
|
|
|
generateScalarStores(Stmt, LTS, ValueMap, IdToAstExp);
|
2017-06-07 01:17:30 +08:00
|
|
|
StartBlockMap.clear();
|
|
|
|
EndBlockMap.clear();
|
2015-10-27 04:41:53 +08:00
|
|
|
RegionMaps.clear();
|
|
|
|
IncompletePHINodeMap.clear();
|
2015-05-23 07:43:58 +08:00
|
|
|
}
|
|
|
|
|
2016-01-26 21:33:27 +08:00
|
|
|
PHINode *RegionGenerator::buildExitPHI(MemoryAccess *MA, LoopToScevMapT <S,
|
|
|
|
ValueMapT &BBMap, Loop *L) {
|
|
|
|
ScopStmt *Stmt = MA->getStatement();
|
|
|
|
Region *SubR = Stmt->getRegion();
|
|
|
|
auto Incoming = MA->getIncoming();
|
|
|
|
|
|
|
|
PollyIRBuilder::InsertPointGuard IPGuard(Builder);
|
|
|
|
PHINode *OrigPHI = cast<PHINode>(MA->getAccessInstruction());
|
|
|
|
BasicBlock *NewSubregionExit = Builder.GetInsertBlock();
|
|
|
|
|
|
|
|
// This can happen if the subregion is simplified after the ScopStmts
|
|
|
|
// have been created; simplification happens as part of CodeGeneration.
|
|
|
|
if (OrigPHI->getParent() != SubR->getExit()) {
|
|
|
|
BasicBlock *FormerExit = SubR->getExitingBlock();
|
|
|
|
if (FormerExit)
|
2017-06-07 01:17:30 +08:00
|
|
|
NewSubregionExit = StartBlockMap.lookup(FormerExit);
|
2016-01-26 21:33:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
PHINode *NewPHI = PHINode::Create(OrigPHI->getType(), Incoming.size(),
|
|
|
|
"polly." + OrigPHI->getName(),
|
|
|
|
NewSubregionExit->getFirstNonPHI());
|
|
|
|
|
|
|
|
// Add the incoming values to the PHI.
|
|
|
|
for (auto &Pair : Incoming) {
|
|
|
|
BasicBlock *OrigIncomingBlock = Pair.first;
|
2017-06-07 01:17:30 +08:00
|
|
|
BasicBlock *NewIncomingBlockStart = StartBlockMap.lookup(OrigIncomingBlock);
|
|
|
|
BasicBlock *NewIncomingBlockEnd = EndBlockMap.lookup(OrigIncomingBlock);
|
|
|
|
Builder.SetInsertPoint(NewIncomingBlockEnd->getTerminator());
|
|
|
|
assert(RegionMaps.count(NewIncomingBlockStart));
|
|
|
|
assert(RegionMaps.count(NewIncomingBlockEnd));
|
|
|
|
ValueMapT *LocalBBMap = &RegionMaps[NewIncomingBlockStart];
|
2016-01-26 21:33:27 +08:00
|
|
|
|
|
|
|
Value *OrigIncomingValue = Pair.second;
|
|
|
|
Value *NewIncomingValue =
|
|
|
|
getNewValue(*Stmt, OrigIncomingValue, *LocalBBMap, LTS, L);
|
2017-06-07 01:17:30 +08:00
|
|
|
NewPHI->addIncoming(NewIncomingValue, NewIncomingBlockEnd);
|
2016-01-26 21:33:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return NewPHI;
|
|
|
|
}
|
|
|
|
|
|
|
|
Value *RegionGenerator::getExitScalar(MemoryAccess *MA, LoopToScevMapT <S,
|
|
|
|
ValueMapT &BBMap) {
|
|
|
|
ScopStmt *Stmt = MA->getStatement();
|
|
|
|
|
|
|
|
// TODO: Add some test cases that ensure this is really the right choice.
|
|
|
|
Loop *L = LI.getLoopFor(Stmt->getRegion()->getExit());
|
|
|
|
|
|
|
|
if (MA->isAnyPHIKind()) {
|
|
|
|
auto Incoming = MA->getIncoming();
|
|
|
|
assert(!Incoming.empty() &&
|
|
|
|
"PHI WRITEs must have originate from at least one incoming block");
|
|
|
|
|
|
|
|
// If there is only one incoming value, we do not need to create a PHI.
|
|
|
|
if (Incoming.size() == 1) {
|
|
|
|
Value *OldVal = Incoming[0].second;
|
|
|
|
return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
|
|
|
|
}
|
|
|
|
|
|
|
|
return buildExitPHI(MA, LTS, BBMap, L);
|
|
|
|
}
|
|
|
|
|
2017-01-15 04:25:44 +08:00
|
|
|
// MemoryKind::Value accesses leaving the subregion must dominate the exit
|
|
|
|
// block; just pass the copied value.
|
2016-01-26 21:33:27 +08:00
|
|
|
Value *OldVal = MA->getAccessValue();
|
|
|
|
return getNewValue(*Stmt, OldVal, BBMap, LTS, L);
|
|
|
|
}
|
|
|
|
|
2016-09-02 03:53:31 +08:00
|
|
|
void RegionGenerator::generateScalarStores(
|
|
|
|
ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap,
|
|
|
|
__isl_keep isl_id_to_ast_expr *NewAccesses) {
|
2015-08-22 03:23:21 +08:00
|
|
|
assert(Stmt.getRegion() &&
|
|
|
|
"Block statements need to use the generateScalarStores() "
|
|
|
|
"function in the BlockGenerator");
|
2015-05-23 07:43:58 +08:00
|
|
|
|
2019-05-21 06:31:09 +08:00
|
|
|
// Get the exit scalar values before generating the writes.
|
|
|
|
// This is necessary because RegionGenerator::getExitScalar may insert
|
|
|
|
// PHINodes that depend on the region's exiting blocks. But
|
|
|
|
// BlockGenerator::generateConditionalExecution may insert a new basic block
|
|
|
|
// such that the current basic block is not a direct successor of the exiting
|
|
|
|
// blocks anymore. Hence, build the PHINodes while the current block is still
|
|
|
|
// the direct successor.
|
|
|
|
SmallDenseMap<MemoryAccess *, Value *> NewExitScalars;
|
|
|
|
for (MemoryAccess *MA : Stmt) {
|
|
|
|
if (MA->isOriginalArrayKind() || MA->isRead())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
Value *NewVal = getExitScalar(MA, LTS, BBMap);
|
|
|
|
NewExitScalars[MA] = NewVal;
|
|
|
|
}
|
|
|
|
|
2015-05-23 07:43:58 +08:00
|
|
|
for (MemoryAccess *MA : Stmt) {
|
2016-09-02 03:53:31 +08:00
|
|
|
if (MA->isOriginalArrayKind() || MA->isRead())
|
2015-05-23 07:43:58 +08:00
|
|
|
continue;
|
|
|
|
|
2017-07-23 12:08:38 +08:00
|
|
|
isl::set AccDom = MA->getAccessRelation().domain();
|
2017-07-23 12:08:11 +08:00
|
|
|
std::string Subject = MA->getId().get_name();
|
|
|
|
generateConditionalExecution(
|
|
|
|
Stmt, AccDom, Subject.c_str(), [&, this, MA]() {
|
2019-05-21 06:31:09 +08:00
|
|
|
Value *NewVal = NewExitScalars.lookup(MA);
|
|
|
|
assert(NewVal && "The exit scalar must be determined before");
|
2017-07-23 12:08:11 +08:00
|
|
|
Value *Address = getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS,
|
|
|
|
BBMap, NewAccesses);
|
|
|
|
assert((!isa<Instruction>(NewVal) ||
|
|
|
|
DT.dominates(cast<Instruction>(NewVal)->getParent(),
|
|
|
|
Builder.GetInsertBlock())) &&
|
|
|
|
"Domination violation");
|
|
|
|
assert((!isa<Instruction>(Address) ||
|
|
|
|
DT.dominates(cast<Instruction>(Address)->getParent(),
|
|
|
|
Builder.GetInsertBlock())) &&
|
|
|
|
"Domination violation");
|
|
|
|
Builder.CreateStore(NewVal, Address);
|
|
|
|
});
|
2015-05-23 07:43:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-19 21:25:52 +08:00
|
|
|
void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, PHINode *PHI,
|
2015-05-23 07:43:58 +08:00
|
|
|
PHINode *PHICopy, BasicBlock *IncomingBB,
|
|
|
|
LoopToScevMapT <S) {
|
|
|
|
// If the incoming block was not yet copied mark this PHI as incomplete.
|
|
|
|
// Once the block will be copied the incoming value will be added.
|
2017-06-07 01:17:30 +08:00
|
|
|
BasicBlock *BBCopyStart = StartBlockMap[IncomingBB];
|
|
|
|
BasicBlock *BBCopyEnd = EndBlockMap[IncomingBB];
|
|
|
|
if (!BBCopyStart) {
|
|
|
|
assert(!BBCopyEnd);
|
2017-07-26 00:25:37 +08:00
|
|
|
assert(Stmt.represents(IncomingBB) &&
|
2015-05-23 07:43:58 +08:00
|
|
|
"Bad incoming block for PHI in non-affine region");
|
|
|
|
IncompletePHINodeMap[IncomingBB].push_back(std::make_pair(PHI, PHICopy));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-06-07 01:17:30 +08:00
|
|
|
assert(RegionMaps.count(BBCopyStart) &&
|
|
|
|
"Incoming PHI block did not have a BBMap");
|
|
|
|
ValueMapT &BBCopyMap = RegionMaps[BBCopyStart];
|
BlockGenerator: Do not redundantly reload from PHI-allocas in non-affine stmts
Before this change we created an additional reload in the copy of the incoming
block of a PHI node to reload the incoming value, even though the necessary
value has already been made available by the normally generated scalar loads.
In this change, we drop the code that generates this redundant reload and
instead just reuse the scalar value already available.
Besides making the generated code slightly cleaner, this change also makes sure
that scalar loads go through the normal logic, which means they can be remapped
(e.g. to array slots) and corresponding code is generated to load from the
remapped location. Without this change, the original scalar load at the
beginning of the non-affine region would have been remapped, but the redundant
scalar load would continue to load from the old PHI slot location.
It might be possible to further simplify the code in addOperandToPHI,
but this would not only mean to pull out getNewValue, but to also change the
insertion point update logic. As this did not work when trying it the first
time, this change is likely not trivial. To not introduce bugs last minute, we
postpone further simplications to a subsequent commit.
We also document the current behavior a little bit better.
Reviewed By: Meinersbur
Differential Revision: https://reviews.llvm.org/D28892
llvm-svn: 292486
2017-01-19 22:12:45 +08:00
|
|
|
|
2015-05-23 07:43:58 +08:00
|
|
|
Value *OpCopy = nullptr;
|
|
|
|
|
2017-07-26 00:25:37 +08:00
|
|
|
if (Stmt.represents(IncomingBB)) {
|
2015-05-23 07:43:58 +08:00
|
|
|
Value *Op = PHI->getIncomingValueForBlock(IncomingBB);
|
2015-10-19 17:19:25 +08:00
|
|
|
|
2016-04-01 19:25:47 +08:00
|
|
|
// If the current insert block is different from the PHIs incoming block
|
|
|
|
// change it, otherwise do not.
|
|
|
|
auto IP = Builder.GetInsertPoint();
|
2017-06-07 01:17:30 +08:00
|
|
|
if (IP->getParent() != BBCopyEnd)
|
|
|
|
Builder.SetInsertPoint(BBCopyEnd->getTerminator());
|
2016-02-16 20:36:14 +08:00
|
|
|
OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForStmt(Stmt));
|
2017-06-07 01:17:30 +08:00
|
|
|
if (IP->getParent() != BBCopyEnd)
|
2016-04-01 19:25:47 +08:00
|
|
|
Builder.SetInsertPoint(&*IP);
|
2015-05-23 07:43:58 +08:00
|
|
|
} else {
|
BlockGenerator: Do not redundantly reload from PHI-allocas in non-affine stmts
Before this change we created an additional reload in the copy of the incoming
block of a PHI node to reload the incoming value, even though the necessary
value has already been made available by the normally generated scalar loads.
In this change, we drop the code that generates this redundant reload and
instead just reuse the scalar value already available.
Besides making the generated code slightly cleaner, this change also makes sure
that scalar loads go through the normal logic, which means they can be remapped
(e.g. to array slots) and corresponding code is generated to load from the
remapped location. Without this change, the original scalar load at the
beginning of the non-affine region would have been remapped, but the redundant
scalar load would continue to load from the old PHI slot location.
It might be possible to further simplify the code in addOperandToPHI,
but this would not only mean to pull out getNewValue, but to also change the
insertion point update logic. As this did not work when trying it the first
time, this change is likely not trivial. To not introduce bugs last minute, we
postpone further simplications to a subsequent commit.
We also document the current behavior a little bit better.
Reviewed By: Meinersbur
Differential Revision: https://reviews.llvm.org/D28892
llvm-svn: 292486
2017-01-19 22:12:45 +08:00
|
|
|
// All edges from outside the non-affine region become a single edge
|
|
|
|
// in the new copy of the non-affine region. Make sure to only add the
|
|
|
|
// corresponding edge the first time we encounter a basic block from
|
|
|
|
// outside the non-affine region.
|
2017-06-07 01:17:30 +08:00
|
|
|
if (PHICopy->getBasicBlockIndex(BBCopyEnd) >= 0)
|
2015-05-23 07:43:58 +08:00
|
|
|
return;
|
|
|
|
|
BlockGenerator: Do not redundantly reload from PHI-allocas in non-affine stmts
Before this change we created an additional reload in the copy of the incoming
block of a PHI node to reload the incoming value, even though the necessary
value has already been made available by the normally generated scalar loads.
In this change, we drop the code that generates this redundant reload and
instead just reuse the scalar value already available.
Besides making the generated code slightly cleaner, this change also makes sure
that scalar loads go through the normal logic, which means they can be remapped
(e.g. to array slots) and corresponding code is generated to load from the
remapped location. Without this change, the original scalar load at the
beginning of the non-affine region would have been remapped, but the redundant
scalar load would continue to load from the old PHI slot location.
It might be possible to further simplify the code in addOperandToPHI,
but this would not only mean to pull out getNewValue, but to also change the
insertion point update logic. As this did not work when trying it the first
time, this change is likely not trivial. To not introduce bugs last minute, we
postpone further simplications to a subsequent commit.
We also document the current behavior a little bit better.
Reviewed By: Meinersbur
Differential Revision: https://reviews.llvm.org/D28892
llvm-svn: 292486
2017-01-19 22:12:45 +08:00
|
|
|
// Get the reloaded value.
|
|
|
|
OpCopy = getNewValue(Stmt, PHI, BBCopyMap, LTS, getLoopForStmt(Stmt));
|
2015-05-23 07:43:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
assert(OpCopy && "Incoming PHI value was not copied properly");
|
2017-06-07 01:17:30 +08:00
|
|
|
PHICopy->addIncoming(OpCopy, BBCopyEnd);
|
2015-05-23 07:43:58 +08:00
|
|
|
}
|
|
|
|
|
2016-02-21 23:44:34 +08:00
|
|
|
void RegionGenerator::copyPHIInstruction(ScopStmt &Stmt, PHINode *PHI,
|
|
|
|
ValueMapT &BBMap,
|
|
|
|
LoopToScevMapT <S) {
|
2015-05-23 07:43:58 +08:00
|
|
|
unsigned NumIncoming = PHI->getNumIncomingValues();
|
|
|
|
PHINode *PHICopy =
|
|
|
|
Builder.CreatePHI(PHI->getType(), NumIncoming, "polly." + PHI->getName());
|
|
|
|
PHICopy->moveBefore(PHICopy->getParent()->getFirstNonPHI());
|
|
|
|
BBMap[PHI] = PHICopy;
|
|
|
|
|
2017-01-19 13:09:23 +08:00
|
|
|
for (BasicBlock *IncomingBB : PHI->blocks())
|
|
|
|
addOperandToPHI(Stmt, PHI, PHICopy, IncomingBB, LTS);
|
2015-05-23 07:43:58 +08:00
|
|
|
}
|