forked from OSchip/llvm-project
398 lines
17 KiB
C++
398 lines
17 KiB
C++
//===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements logic for three optimization passes. The first two
|
|
// passes try to move alloc nodes out of blocks to reduce the number of
|
|
// allocations and copies during buffer deallocation. The third pass tries to
|
|
// convert heap-based allocations to stack-based allocations, if possible.
|
|
|
|
#include "PassDetail.h"
|
|
#include "mlir/IR/Operation.h"
|
|
#include "mlir/Interfaces/LoopLikeInterface.h"
|
|
#include "mlir/Pass/Pass.h"
|
|
#include "mlir/Transforms/BufferUtils.h"
|
|
#include "mlir/Transforms/Passes.h"
|
|
|
|
using namespace mlir;
|
|
|
|
/// Returns true if the given operation implements a known high-level region-
|
|
/// based control-flow interface.
|
|
static bool isKnownControlFlowInterface(Operation *op) {
|
|
return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
|
|
}
|
|
|
|
/// Check if the size of the allocation is less than the given size. The
|
|
/// transformation is only applied to small buffers since large buffers could
|
|
/// exceed the stack space.
|
|
static bool isSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
|
|
unsigned bitwidthOfIndexType,
|
|
unsigned maxRankOfAllocatedMemRef) {
|
|
auto type = alloc.getType().dyn_cast<ShapedType>();
|
|
if (!type || !alloc.getDefiningOp<AllocOp>())
|
|
return false;
|
|
if (!type.hasStaticShape()) {
|
|
// Check if the dynamic shape dimension of the alloc is produced by RankOp.
|
|
// If this is the case, it is likely to be small. Furthermore, the dimension
|
|
// is limited to the maximum rank of the allocated memref to avoid large
|
|
// values by multiplying several small values.
|
|
if (type.getRank() <= maxRankOfAllocatedMemRef) {
|
|
return llvm::all_of(
|
|
alloc.getDefiningOp()->getOperands(),
|
|
[&](Value operand) { return operand.getDefiningOp<RankOp>(); });
|
|
}
|
|
return false;
|
|
}
|
|
// For index types, use the provided size, as the type does not know.
|
|
unsigned int bitwidth = type.getElementType().isIndex()
|
|
? bitwidthOfIndexType
|
|
: type.getElementTypeBitWidth();
|
|
return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
|
|
}
|
|
|
|
/// Checks whether the given aliases leave the allocation scope.
|
|
static bool
|
|
leavesAllocationScope(Region *parentRegion,
|
|
const BufferAliasAnalysis::ValueSetT &aliases) {
|
|
for (Value alias : aliases) {
|
|
for (auto *use : alias.getUsers()) {
|
|
// If there is at least one alias that leaves the parent region, we know
|
|
// that this alias escapes the whole region and hence the associated
|
|
// allocation leaves allocation scope.
|
|
if (use->hasTrait<OpTrait::ReturnLike>() &&
|
|
use->getParentRegion() == parentRegion)
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/// Checks, if an automated allocation scope for a given alloc value exists.
|
|
static bool hasAllocationScope(Value alloc,
|
|
const BufferAliasAnalysis &aliasAnalysis) {
|
|
Region *region = alloc.getParentRegion();
|
|
do {
|
|
if (Operation *parentOp = region->getParentOp()) {
|
|
// Check if the operation is an automatic allocation scope and whether an
|
|
// alias leaves the scope. This means, an allocation yields out of
|
|
// this scope and can not be transformed in a stack-based allocation.
|
|
if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
|
|
!leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
|
|
return true;
|
|
// Check if the operation is a known control flow interface and break the
|
|
// loop to avoid transformation in loops. Furthermore skip transformation
|
|
// if the operation does not implement a RegionBeanchOpInterface.
|
|
if (BufferPlacementTransformationBase::isLoop(parentOp) ||
|
|
!isKnownControlFlowInterface(parentOp))
|
|
break;
|
|
}
|
|
} while ((region = region->getParentRegion()));
|
|
return false;
|
|
}
|
|
|
|
namespace {
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// BufferAllocationHoisting
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// A base implementation compatible with the `BufferAllocationHoisting` class.
|
|
struct BufferAllocationHoistingStateBase {
|
|
/// A pointer to the current dominance info.
|
|
DominanceInfo *dominators;
|
|
|
|
/// The current allocation value.
|
|
Value allocValue;
|
|
|
|
/// The current placement block (if any).
|
|
Block *placementBlock;
|
|
|
|
/// Initializes the state base.
|
|
BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue,
|
|
Block *placementBlock)
|
|
: dominators(dominators), allocValue(allocValue),
|
|
placementBlock(placementBlock) {}
|
|
};
|
|
|
|
/// Implements the actual hoisting logic for allocation nodes.
|
|
template <typename StateT>
|
|
class BufferAllocationHoisting : public BufferPlacementTransformationBase {
|
|
public:
|
|
BufferAllocationHoisting(Operation *op)
|
|
: BufferPlacementTransformationBase(op), dominators(op),
|
|
postDominators(op) {}
|
|
|
|
/// Moves allocations upwards.
|
|
void hoist() {
|
|
for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
|
|
Value allocValue = std::get<0>(entry);
|
|
Operation *definingOp = allocValue.getDefiningOp();
|
|
assert(definingOp && "No defining op");
|
|
auto operands = definingOp->getOperands();
|
|
auto resultAliases = aliases.resolve(allocValue);
|
|
// Determine the common dominator block of all aliases.
|
|
Block *dominatorBlock =
|
|
findCommonDominator(allocValue, resultAliases, dominators);
|
|
// Init the initial hoisting state.
|
|
StateT state(&dominators, allocValue, allocValue.getParentBlock());
|
|
// Check for additional allocation dependencies to compute an upper bound
|
|
// for hoisting.
|
|
Block *dependencyBlock = nullptr;
|
|
if (!operands.empty()) {
|
|
// If this node has dependencies, check all dependent nodes with respect
|
|
// to a common post dominator. This ensures that all dependency values
|
|
// have been computed before allocating the buffer.
|
|
ValueSetT dependencies(std::next(operands.begin()), operands.end());
|
|
dependencyBlock = findCommonDominator(*operands.begin(), dependencies,
|
|
postDominators);
|
|
}
|
|
|
|
// Find the actual placement block and determine the start operation using
|
|
// an upper placement-block boundary. The idea is that placement block
|
|
// cannot be moved any further upwards than the given upper bound.
|
|
Block *placementBlock = findPlacementBlock(
|
|
state, state.computeUpperBound(dominatorBlock, dependencyBlock));
|
|
Operation *startOperation = BufferPlacementAllocs::getStartOperation(
|
|
allocValue, placementBlock, liveness);
|
|
|
|
// Move the alloc in front of the start operation.
|
|
Operation *allocOperation = allocValue.getDefiningOp();
|
|
allocOperation->moveBefore(startOperation);
|
|
}
|
|
}
|
|
|
|
private:
|
|
/// Finds a valid placement block by walking upwards in the CFG until we
|
|
/// either cannot continue our walk due to constraints (given by the StateT
|
|
/// implementation) or we have reached the upper-most dominator block.
|
|
Block *findPlacementBlock(StateT &state, Block *upperBound) {
|
|
Block *currentBlock = state.placementBlock;
|
|
// Walk from the innermost regions/loops to the outermost regions/loops and
|
|
// find an appropriate placement block that satisfies the constraint of the
|
|
// current StateT implementation. Walk until we reach the upperBound block
|
|
// (if any).
|
|
|
|
// If we are not able to find a valid parent operation or an associated
|
|
// parent block, break the walk loop.
|
|
Operation *parentOp;
|
|
Block *parentBlock;
|
|
while ((parentOp = currentBlock->getParentOp()) &&
|
|
(parentBlock = parentOp->getBlock()) &&
|
|
(!upperBound ||
|
|
dominators.properlyDominates(upperBound, currentBlock))) {
|
|
// Try to find an immediate dominator and check whether the parent block
|
|
// is above the immediate dominator (if any).
|
|
DominanceInfoNode *idom = dominators.getNode(currentBlock)->getIDom();
|
|
if (idom && dominators.properlyDominates(parentBlock, idom->getBlock())) {
|
|
// If the current immediate dominator is below the placement block, move
|
|
// to the immediate dominator block.
|
|
currentBlock = idom->getBlock();
|
|
state.recordMoveToDominator(currentBlock);
|
|
} else {
|
|
// We have to move to our parent block since an immediate dominator does
|
|
// either not exist or is above our parent block. If we cannot move to
|
|
// our parent operation due to constraints given by the StateT
|
|
// implementation, break the walk loop. Furthermore, we should not move
|
|
// allocations out of unknown region-based control-flow operations.
|
|
if (!isKnownControlFlowInterface(parentOp) ||
|
|
!state.isLegalPlacement(parentOp))
|
|
break;
|
|
// Move to our parent block by notifying the current StateT
|
|
// implementation.
|
|
currentBlock = parentBlock;
|
|
state.recordMoveToParent(currentBlock);
|
|
}
|
|
}
|
|
// Return the finally determined placement block.
|
|
return state.placementBlock;
|
|
}
|
|
|
|
/// The dominator info to find the appropriate start operation to move the
|
|
/// allocs.
|
|
DominanceInfo dominators;
|
|
|
|
/// The post dominator info to move the dependent allocs in the right
|
|
/// position.
|
|
PostDominanceInfo postDominators;
|
|
|
|
/// The map storing the final placement blocks of a given alloc value.
|
|
llvm::DenseMap<Value, Block *> placementBlocks;
|
|
};
|
|
|
|
/// A state implementation compatible with the `BufferAllocationHoisting` class
|
|
/// that hoists allocations into dominator blocks while keeping them inside of
|
|
/// loops.
|
|
struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase {
|
|
using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
|
|
|
|
/// Computes the upper bound for the placement block search.
|
|
Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
|
|
// If we do not have a dependency block, the upper bound is given by the
|
|
// dominator block.
|
|
if (!dependencyBlock)
|
|
return dominatorBlock;
|
|
|
|
// Find the "lower" block of the dominator and the dependency block to
|
|
// ensure that we do not move allocations above this block.
|
|
return dominators->properlyDominates(dominatorBlock, dependencyBlock)
|
|
? dependencyBlock
|
|
: dominatorBlock;
|
|
}
|
|
|
|
/// Returns true if the given operation does not represent a loop.
|
|
bool isLegalPlacement(Operation *op) {
|
|
return !BufferPlacementTransformationBase::isLoop(op);
|
|
}
|
|
|
|
/// Sets the current placement block to the given block.
|
|
void recordMoveToDominator(Block *block) { placementBlock = block; }
|
|
|
|
/// Sets the current placement block to the given block.
|
|
void recordMoveToParent(Block *block) { recordMoveToDominator(block); }
|
|
};
|
|
|
|
/// A state implementation compatible with the `BufferAllocationHoisting` class
|
|
/// that hoists allocations out of loops.
|
|
struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
|
|
using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase;
|
|
|
|
/// Remembers the dominator block of all aliases.
|
|
Block *aliasDominatorBlock;
|
|
|
|
/// Computes the upper bound for the placement block search.
|
|
Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) {
|
|
aliasDominatorBlock = dominatorBlock;
|
|
// If there is a dependency block, we have to use this block as an upper
|
|
// bound to satisfy all allocation value dependencies.
|
|
return dependencyBlock ? dependencyBlock : nullptr;
|
|
}
|
|
|
|
/// Returns true if the given operation represents a loop and one of the
|
|
/// aliases caused the `aliasDominatorBlock` to be "above" the block of the
|
|
/// given loop operation. If this is the case, it indicates that the
|
|
/// allocation is passed via a back edge.
|
|
bool isLegalPlacement(Operation *op) {
|
|
return BufferPlacementTransformationBase::isLoop(op) &&
|
|
!dominators->dominates(aliasDominatorBlock, op->getBlock());
|
|
}
|
|
|
|
/// Does not change the internal placement block, as we want to move
|
|
/// operations out of loops only.
|
|
void recordMoveToDominator(Block *block) {}
|
|
|
|
/// Sets the current placement block to the given block.
|
|
void recordMoveToParent(Block *block) { placementBlock = block; }
|
|
};
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// BufferPlacementPromotion
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// Promotes heap-based allocations to stack-based allocations (if possible).
|
|
class BufferPlacementPromotion : BufferPlacementTransformationBase {
|
|
public:
|
|
BufferPlacementPromotion(Operation *op)
|
|
: BufferPlacementTransformationBase(op) {}
|
|
|
|
/// Promote buffers to stack-based allocations.
|
|
void promote(unsigned maximumSize, unsigned bitwidthOfIndexType,
|
|
unsigned maxRankOfAllocatedMemRef) {
|
|
for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
|
|
Value alloc = std::get<0>(entry);
|
|
Operation *dealloc = std::get<1>(entry);
|
|
// Checking several requirements to transform an AllocOp into an AllocaOp.
|
|
// The transformation is done if the allocation is limited to a given
|
|
// size. Furthermore, a deallocation must not be defined for this
|
|
// allocation entry and a parent allocation scope must exist.
|
|
if (!isSmallAlloc(alloc, maximumSize, bitwidthOfIndexType,
|
|
maxRankOfAllocatedMemRef) ||
|
|
dealloc || !hasAllocationScope(alloc, aliases))
|
|
continue;
|
|
|
|
Operation *startOperation = BufferPlacementAllocs::getStartOperation(
|
|
alloc, alloc.getParentBlock(), liveness);
|
|
// Build a new alloca that is associated with its parent
|
|
// `AutomaticAllocationScope` determined during the initialization phase.
|
|
OpBuilder builder(startOperation);
|
|
Operation *allocOp = alloc.getDefiningOp();
|
|
Operation *alloca = builder.create<AllocaOp>(
|
|
alloc.getLoc(), alloc.getType().cast<MemRefType>(),
|
|
allocOp->getOperands());
|
|
|
|
// Replace the original alloc by a newly created alloca.
|
|
allocOp->replaceAllUsesWith(alloca);
|
|
allocOp->erase();
|
|
}
|
|
}
|
|
};
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// BufferOptimizationPasses
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// The buffer hoisting pass that hoists allocation nodes into dominating
|
|
/// blocks.
|
|
struct BufferHoistingPass : BufferHoistingBase<BufferHoistingPass> {
|
|
|
|
void runOnFunction() override {
|
|
// Hoist all allocations into dominator blocks.
|
|
BufferAllocationHoisting<BufferAllocationHoistingState> optimizer(
|
|
getFunction());
|
|
optimizer.hoist();
|
|
}
|
|
};
|
|
|
|
/// The buffer loop hoisting pass that hoists allocation nodes out of loops.
|
|
struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> {
|
|
|
|
void runOnFunction() override {
|
|
// Hoist all allocations out of loops.
|
|
BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(
|
|
getFunction());
|
|
optimizer.hoist();
|
|
}
|
|
};
|
|
|
|
/// The promote buffer to stack pass that tries to convert alloc nodes into
|
|
/// alloca nodes.
|
|
struct PromoteBuffersToStackPass
|
|
: PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
|
|
|
|
PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
|
|
unsigned bitwidthOfIndexType,
|
|
unsigned maxRankOfAllocatedMemRef) {
|
|
this->maxAllocSizeInBytes = maxAllocSizeInBytes;
|
|
this->bitwidthOfIndexType = bitwidthOfIndexType;
|
|
this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
|
|
}
|
|
|
|
void runOnFunction() override {
|
|
// Move all allocation nodes and convert candidates into allocas.
|
|
BufferPlacementPromotion optimizer(getFunction());
|
|
optimizer.promote(this->maxAllocSizeInBytes, this->bitwidthOfIndexType,
|
|
this->maxRankOfAllocatedMemRef);
|
|
}
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
std::unique_ptr<Pass> mlir::createBufferHoistingPass() {
|
|
return std::make_unique<BufferHoistingPass>();
|
|
}
|
|
|
|
std::unique_ptr<Pass> mlir::createBufferLoopHoistingPass() {
|
|
return std::make_unique<BufferLoopHoistingPass>();
|
|
}
|
|
|
|
std::unique_ptr<Pass>
|
|
mlir::createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
|
|
unsigned bitwidthOfIndexType,
|
|
unsigned maxRankOfAllocatedMemRef) {
|
|
return std::make_unique<PromoteBuffersToStackPass>(
|
|
maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfAllocatedMemRef);
|
|
}
|