forked from OSchip/llvm-project
Replace usages of Instruction with Operation in the Transforms/ directory.
PiperOrigin-RevId: 240636130
This commit is contained in:
parent
3518122e86
commit
99b87c9707
|
@ -33,7 +33,6 @@ class Block;
|
|||
class FuncBuilder;
|
||||
class MLIRContext;
|
||||
class Operation;
|
||||
using Instruction = Operation;
|
||||
class Type;
|
||||
class Value;
|
||||
|
||||
|
@ -43,7 +42,7 @@ class FunctionConversion;
|
|||
}
|
||||
|
||||
/// Base class for the dialect op conversion patterns. Specific conversions
|
||||
/// must derive this class and implement `PatternMatch match(Instruction *)`
|
||||
/// must derive this class and implement `PatternMatch match(Operation *)`
|
||||
/// defined in `Pattern` and at least one of `rewrite` and `rewriteTerminator`.
|
||||
//
|
||||
// TODO(zinenko): this should eventually converge with RewritePattern. So far,
|
||||
|
@ -67,7 +66,7 @@ public:
|
|||
/// DialectOpConversion ever needs to replace an operation that does not have
|
||||
/// successors. This function should not fail. If some specific cases of the
|
||||
/// operation are not supported, these cases should not be matched.
|
||||
virtual SmallVector<Value *, 4> rewrite(Instruction *op,
|
||||
virtual SmallVector<Value *, 4> rewrite(Operation *op,
|
||||
ArrayRef<Value *> operands,
|
||||
FuncBuilder &rewriter) const {
|
||||
llvm_unreachable("unimplemented rewrite, did you mean rewriteTerminator?");
|
||||
|
@ -85,7 +84,7 @@ public:
|
|||
/// successors. This function should not fail the pass. If some specific
|
||||
/// cases of the operation are not supported, these cases should not be
|
||||
/// matched.
|
||||
virtual void rewriteTerminator(Instruction *op,
|
||||
virtual void rewriteTerminator(Operation *op,
|
||||
ArrayRef<Value *> properOperands,
|
||||
ArrayRef<Block *> destinations,
|
||||
ArrayRef<ArrayRef<Value *>> operands,
|
||||
|
|
|
@ -34,10 +34,10 @@ class Function;
|
|||
class FuncBuilder;
|
||||
class Value;
|
||||
|
||||
/// Unrolls this for instruction completely if the trip count is known to be
|
||||
/// Unrolls this for operation completely if the trip count is known to be
|
||||
/// constant. Returns failure otherwise.
|
||||
LogicalResult loopUnrollFull(AffineForOp forOp);
|
||||
/// Unrolls this for instruction by the specified unroll factor. Returns failure
|
||||
/// Unrolls this for operation by the specified unroll factor. Returns failure
|
||||
/// if the loop cannot be unrolled either due to restrictions or due to invalid
|
||||
/// unroll factors.
|
||||
LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor);
|
||||
|
@ -73,8 +73,8 @@ void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
|
|||
SmallVectorImpl<Value *> *operands,
|
||||
FuncBuilder *builder);
|
||||
|
||||
/// Skew the instructions in the body of a 'affine.for' instruction with the
|
||||
/// specified instruction-wise shifts. The shifts are with respect to the
|
||||
/// Skew the operations in the body of a 'affine.for' operation with the
|
||||
/// specified operation-wise shifts. The shifts are with respect to the
|
||||
/// original execution order, and are multiplied by the loop 'step' before being
|
||||
/// applied.
|
||||
LLVM_NODISCARD
|
||||
|
|
|
@ -37,7 +37,7 @@ public:
|
|||
|
||||
FuncBuilder *getBuilder() { return builder; }
|
||||
|
||||
Instruction *createOperation(const OperationState &state) override {
|
||||
Operation *createOperation(const OperationState &state) override {
|
||||
auto *result = builder->createOperation(state);
|
||||
return result;
|
||||
}
|
||||
|
@ -66,7 +66,7 @@ public:
|
|||
/// must override). It will be passed the function-wise state, common to all
|
||||
/// matches, and the state returned by the `match` call, if any. The subclass
|
||||
/// must use `rewriter` to modify the function.
|
||||
virtual void rewriteOpInst(Instruction *op,
|
||||
virtual void rewriteOpInst(Operation *op,
|
||||
MLFuncGlobalLoweringState *funcWiseState,
|
||||
std::unique_ptr<PatternState> opState,
|
||||
MLFuncLoweringRewriter *rewriter) const = 0;
|
||||
|
@ -123,14 +123,14 @@ void applyMLPatternsGreedily(
|
|||
FuncBuilder builder(f);
|
||||
MLFuncLoweringRewriter rewriter(&builder);
|
||||
|
||||
llvm::SmallVector<Instruction *, 16> ops;
|
||||
f->walk([&ops](Instruction *inst) { ops.push_back(inst); });
|
||||
llvm::SmallVector<Operation *, 16> ops;
|
||||
f->walk([&ops](Operation *op) { ops.push_back(op); });
|
||||
|
||||
for (Instruction *inst : ops) {
|
||||
for (Operation *op : ops) {
|
||||
for (const auto &pattern : patterns) {
|
||||
builder.setInsertionPoint(inst);
|
||||
if (auto matchResult = pattern->match(inst)) {
|
||||
pattern->rewriteOpInst(inst, funcWiseState, std::move(*matchResult),
|
||||
builder.setInsertionPoint(op);
|
||||
if (auto matchResult = pattern->match(op)) {
|
||||
pattern->rewriteOpInst(op, funcWiseState, std::move(*matchResult),
|
||||
&rewriter);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -82,7 +82,7 @@ FunctionPassBase *createLoopFusionPass(unsigned fastMemorySpace = 0,
|
|||
/// memory hierarchy.
|
||||
FunctionPassBase *createPipelineDataTransferPass();
|
||||
|
||||
/// Lowers affine control flow instructions (ForStmt, IfStmt and AffineApplyOp)
|
||||
/// Lowers affine control flow operations (ForStmt, IfStmt and AffineApplyOp)
|
||||
/// to equivalent lower-level constructs (flow of basic blocks and arithmetic
|
||||
/// primitives).
|
||||
FunctionPassBase *createLowerAffinePass();
|
||||
|
|
|
@ -73,8 +73,8 @@ bool replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
|
|||
ArrayRef<Value *> extraIndices = {},
|
||||
AffineMap indexRemap = AffineMap(),
|
||||
ArrayRef<Value *> extraOperands = {},
|
||||
Instruction *domInstFilter = nullptr,
|
||||
Instruction *postDomInstFilter = nullptr);
|
||||
Operation *domInstFilter = nullptr,
|
||||
Operation *postDomInstFilter = nullptr);
|
||||
|
||||
/// Creates and inserts into 'builder' a new AffineApplyOp, with the number of
|
||||
/// its results equal to the number of operands, as a composition
|
||||
|
@ -83,13 +83,13 @@ bool replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
|
|||
/// these will also be collected into a single (multi-result) affine apply op.
|
||||
/// The final results of the composed AffineApplyOp are returned in output
|
||||
/// parameter 'results'. Returns the affine apply op created.
|
||||
Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
|
||||
Operation *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
|
||||
ArrayRef<Value *> operands,
|
||||
ArrayRef<Instruction *> affineApplyOps,
|
||||
ArrayRef<Operation *> affineApplyOps,
|
||||
SmallVectorImpl<Value *> *results);
|
||||
|
||||
/// Given an instruction, inserts one or more single result affine apply
|
||||
/// operations, results of which are exclusively used by this instruction.
|
||||
/// Given an operation, inserts one or more single result affine apply
|
||||
/// operations, results of which are exclusively used by this operation.
|
||||
/// The operands of these newly created affine apply ops are
|
||||
/// guaranteed to be loop iterators or terminal symbols of a function.
|
||||
///
|
||||
|
@ -117,13 +117,13 @@ Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
|
|||
/// (i.e., there was no affine computation slice to create).
|
||||
/// 2. If all the affine.apply op's supplying operands to this opInst did not
|
||||
/// have any uses other than those in this opInst.
|
||||
void createAffineComputationSlice(Instruction *opInst,
|
||||
void createAffineComputationSlice(Operation *opInst,
|
||||
SmallVectorImpl<AffineApplyOp> *sliceOps);
|
||||
|
||||
/// Replaces (potentially nested) function attributes in the operation "op"
|
||||
/// with those specified in "remappingTable".
|
||||
void remapFunctionAttrs(
|
||||
Instruction &op, const DenseMap<Attribute, FunctionAttr> &remappingTable);
|
||||
Operation &op, const DenseMap<Attribute, FunctionAttr> &remappingTable);
|
||||
|
||||
/// Replaces (potentially nested) function attributes all operations of the
|
||||
/// Function "fn" with those specified in "remappingTable".
|
||||
|
|
|
@ -38,11 +38,11 @@ using namespace mlir;
|
|||
|
||||
namespace {
|
||||
// TODO(riverriddle) Handle commutative operations.
|
||||
struct SimpleOperationInfo : public llvm::DenseMapInfo<Instruction *> {
|
||||
static unsigned getHashValue(const Instruction *opC) {
|
||||
auto *op = const_cast<Instruction *>(opC);
|
||||
struct SimpleOperationInfo : public llvm::DenseMapInfo<Operation *> {
|
||||
static unsigned getHashValue(const Operation *opC) {
|
||||
auto *op = const_cast<Operation *>(opC);
|
||||
// Hash the operations based upon their:
|
||||
// - Instruction Name
|
||||
// - Operation Name
|
||||
// - Attributes
|
||||
// - Result Types
|
||||
// - Operands
|
||||
|
@ -51,9 +51,9 @@ struct SimpleOperationInfo : public llvm::DenseMapInfo<Instruction *> {
|
|||
hash_combine_range(op->result_type_begin(), op->result_type_end()),
|
||||
hash_combine_range(op->operand_begin(), op->operand_end()));
|
||||
}
|
||||
static bool isEqual(const Instruction *lhsC, const Instruction *rhsC) {
|
||||
auto *lhs = const_cast<Instruction *>(lhsC);
|
||||
auto *rhs = const_cast<Instruction *>(rhsC);
|
||||
static bool isEqual(const Operation *lhsC, const Operation *rhsC) {
|
||||
auto *lhs = const_cast<Operation *>(lhsC);
|
||||
auto *rhs = const_cast<Operation *>(rhsC);
|
||||
if (lhs == rhs)
|
||||
return true;
|
||||
if (lhs == getTombstoneKey() || lhs == getEmptyKey() ||
|
||||
|
@ -90,8 +90,8 @@ struct CSE : public FunctionPass<CSE> {
|
|||
/// Shared implementation of operation elimination and scoped map definitions.
|
||||
using AllocatorTy = llvm::RecyclingAllocator<
|
||||
llvm::BumpPtrAllocator,
|
||||
llvm::ScopedHashTableVal<Instruction *, Instruction *>>;
|
||||
using ScopedMapTy = llvm::ScopedHashTable<Instruction *, Instruction *,
|
||||
llvm::ScopedHashTableVal<Operation *, Operation *>>;
|
||||
using ScopedMapTy = llvm::ScopedHashTable<Operation *, Operation *,
|
||||
SimpleOperationInfo, AllocatorTy>;
|
||||
|
||||
/// Represents a single entry in the depth first traversal of a CFG.
|
||||
|
@ -112,7 +112,7 @@ struct CSE : public FunctionPass<CSE> {
|
|||
|
||||
/// Attempt to eliminate a redundant operation. Returns true if the operation
|
||||
/// was marked for removal, false otherwise.
|
||||
bool simplifyOperation(Instruction *op);
|
||||
bool simplifyOperation(Operation *op);
|
||||
|
||||
void simplifyBlock(DominanceInfo &domInfo, Block *bb);
|
||||
void simplifyRegion(DominanceInfo &domInfo, Region ®ion);
|
||||
|
@ -124,12 +124,12 @@ private:
|
|||
ScopedMapTy knownValues;
|
||||
|
||||
/// Operations marked as dead and to be erased.
|
||||
std::vector<Instruction *> opsToErase;
|
||||
std::vector<Operation *> opsToErase;
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
/// Attempt to eliminate a redundant operation.
|
||||
bool CSE::simplifyOperation(Instruction *op) {
|
||||
bool CSE::simplifyOperation(Operation *op) {
|
||||
// Don't simplify operations with nested blocks. We don't currently model
|
||||
// equality comparisons correctly among other things. It is also unclear
|
||||
// whether we would want to CSE such operations.
|
||||
|
|
|
@ -31,9 +31,9 @@ struct ConstantFold : public FunctionPass<ConstantFold> {
|
|||
// All constants in the function post folding.
|
||||
SmallVector<Value *, 8> existingConstants;
|
||||
// Operations that were folded and that need to be erased.
|
||||
std::vector<Instruction *> opInstsToErase;
|
||||
std::vector<Operation *> opInstsToErase;
|
||||
|
||||
void foldInstruction(Instruction *op);
|
||||
void foldOperation(Operation *op);
|
||||
void runOnFunction() override;
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
@ -41,7 +41,7 @@ struct ConstantFold : public FunctionPass<ConstantFold> {
|
|||
/// Attempt to fold the specified operation, updating the IR to match. If
|
||||
/// constants are found, we keep track of them in the existingConstants list.
|
||||
///
|
||||
void ConstantFold::foldInstruction(Instruction *op) {
|
||||
void ConstantFold::foldOperation(Operation *op) {
|
||||
// If this operation is already a constant, just remember it for cleanup
|
||||
// later, and don't try to fold it.
|
||||
if (auto constant = op->dyn_cast<ConstantOp>()) {
|
||||
|
@ -97,15 +97,15 @@ void ConstantFold::runOnFunction() {
|
|||
existingConstants.clear();
|
||||
opInstsToErase.clear();
|
||||
|
||||
getFunction().walk([&](Instruction *inst) { foldInstruction(inst); });
|
||||
getFunction().walk([&](Operation *op) { foldOperation(op); });
|
||||
|
||||
// At this point, these operations are dead, remove them.
|
||||
// TODO: This is assuming that all constant foldable operations have no
|
||||
// side effects. When we have side effect modeling, we should verify that
|
||||
// the operation is effect-free before we remove it. Until then this is
|
||||
// close enough.
|
||||
for (auto *inst : opInstsToErase) {
|
||||
inst->erase();
|
||||
for (auto *op : opInstsToErase) {
|
||||
op->erase();
|
||||
}
|
||||
|
||||
// By the time we are done, we may have simplified a bunch of code, leaving
|
||||
|
|
|
@ -50,7 +50,7 @@ private:
|
|||
// Utility that looks up a list of value in the value remapping table. Returns
|
||||
// an empty vector if one of the values is not mapped yet.
|
||||
SmallVector<Value *, 4> lookupValues(
|
||||
const llvm::iterator_range<Instruction::operand_iterator> &operands);
|
||||
const llvm::iterator_range<Operation::operand_iterator> &operands);
|
||||
|
||||
// Converts the given function to the dialect using hooks defined in
|
||||
// `dialectConversion`. Returns the converted function or `nullptr` on error.
|
||||
|
@ -61,16 +61,16 @@ private:
|
|||
// passes them to `converter->rewriteTerminator` function defined in the
|
||||
// pattern, together with `builder`.
|
||||
LogicalResult convertOpWithSuccessors(DialectOpConversion *converter,
|
||||
Instruction *op, FuncBuilder &builder);
|
||||
Operation *op, FuncBuilder &builder);
|
||||
|
||||
// Converts an operation without successors. Extracts the converted operands
|
||||
// from `valueRemapping` and passes them to the `converter->rewrite` function
|
||||
// defined in the pattern, together with `builder`.
|
||||
LogicalResult convertOp(DialectOpConversion *converter, Instruction *op,
|
||||
LogicalResult convertOp(DialectOpConversion *converter, Operation *op,
|
||||
FuncBuilder &builder);
|
||||
|
||||
// Converts a block by traversing its instructions sequentially, looking for
|
||||
// the first pattern match and dispatching the instruction conversion to
|
||||
// Converts a block by traversing its operations sequentially, looking for
|
||||
// the first pattern match and dispatching the operation conversion to
|
||||
// either `convertOp` or `convertOpWithSuccessors` depending on the presence
|
||||
// of successors. If there is no match, clones the operation.
|
||||
//
|
||||
|
@ -101,7 +101,7 @@ private:
|
|||
} // end namespace mlir
|
||||
|
||||
SmallVector<Value *, 4> impl::FunctionConversion::lookupValues(
|
||||
const llvm::iterator_range<Instruction::operand_iterator> &operands) {
|
||||
const llvm::iterator_range<Operation::operand_iterator> &operands) {
|
||||
SmallVector<Value *, 4> remapped;
|
||||
remapped.reserve(llvm::size(operands));
|
||||
for (Value *operand : operands) {
|
||||
|
@ -114,7 +114,7 @@ SmallVector<Value *, 4> impl::FunctionConversion::lookupValues(
|
|||
}
|
||||
|
||||
LogicalResult impl::FunctionConversion::convertOpWithSuccessors(
|
||||
DialectOpConversion *converter, Instruction *op, FuncBuilder &builder) {
|
||||
DialectOpConversion *converter, Operation *op, FuncBuilder &builder) {
|
||||
SmallVector<Block *, 2> destinations;
|
||||
destinations.reserve(op->getNumSuccessors());
|
||||
SmallVector<Value *, 4> operands = lookupValues(op->getOperands());
|
||||
|
@ -146,7 +146,7 @@ LogicalResult impl::FunctionConversion::convertOpWithSuccessors(
|
|||
|
||||
LogicalResult
|
||||
impl::FunctionConversion::convertOp(DialectOpConversion *converter,
|
||||
Instruction *op, FuncBuilder &builder) {
|
||||
Operation *op, FuncBuilder &builder) {
|
||||
auto operands = lookupValues(op->getOperands());
|
||||
assert((!operands.empty() || op->getNumOperands() == 0) &&
|
||||
"converting op before ops defining its operands");
|
||||
|
@ -170,22 +170,22 @@ impl::FunctionConversion::convertBlock(Block *block, FuncBuilder &builder,
|
|||
builder.setInsertionPointToStart(mapping.lookupOrNull(block));
|
||||
|
||||
// Iterate over ops and convert them.
|
||||
for (Instruction &inst : *block) {
|
||||
if (inst.getNumRegions() != 0) {
|
||||
inst.emitError("unsupported region instruction");
|
||||
for (Operation &op : *block) {
|
||||
if (op.getNumRegions() != 0) {
|
||||
op.emitError("unsupported region operation");
|
||||
return failure();
|
||||
}
|
||||
|
||||
// Find the first matching conversion and apply it.
|
||||
bool converted = false;
|
||||
for (auto *conversion : conversions) {
|
||||
if (!conversion->match(&inst))
|
||||
if (!conversion->match(&op))
|
||||
continue;
|
||||
|
||||
if (inst.getNumSuccessors() != 0) {
|
||||
if (failed(convertOpWithSuccessors(conversion, &inst, builder)))
|
||||
if (op.getNumSuccessors() != 0) {
|
||||
if (failed(convertOpWithSuccessors(conversion, &op, builder)))
|
||||
return failure();
|
||||
} else if (failed(convertOp(conversion, &inst, builder))) {
|
||||
} else if (failed(convertOp(conversion, &op, builder))) {
|
||||
return failure();
|
||||
}
|
||||
converted = true;
|
||||
|
@ -193,7 +193,7 @@ impl::FunctionConversion::convertBlock(Block *block, FuncBuilder &builder,
|
|||
}
|
||||
// If there is no conversion provided for the op, clone the op as is.
|
||||
if (!converted)
|
||||
builder.clone(inst, mapping);
|
||||
builder.clone(op, mapping);
|
||||
}
|
||||
|
||||
// Recurse to children unless they have been already visited.
|
||||
|
|
|
@ -170,7 +170,7 @@ static void getMultiLevelStrides(const MemRefRegion ®ion,
|
|||
/// dynamic shaped memref's for now. `numParamLoopIVs` is the number of
|
||||
/// enclosing loop IVs of opInst (starting from the outermost) that the region
|
||||
/// is parametric on.
|
||||
static bool getFullMemRefAsRegion(Instruction *opInst, unsigned numParamLoopIVs,
|
||||
static bool getFullMemRefAsRegion(Operation *opInst, unsigned numParamLoopIVs,
|
||||
MemRefRegion *region) {
|
||||
unsigned rank;
|
||||
if (auto loadOp = opInst->dyn_cast<LoadOp>()) {
|
||||
|
@ -212,11 +212,11 @@ static bool getFullMemRefAsRegion(Instruction *opInst, unsigned numParamLoopIVs,
|
|||
}
|
||||
|
||||
static void emitNoteForBlock(Block &block, const Twine &message) {
|
||||
auto *inst = block.getContainingOp();
|
||||
if (!inst) {
|
||||
auto *op = block.getContainingOp();
|
||||
if (!op) {
|
||||
block.getFunction()->emitNote(message);
|
||||
} else {
|
||||
inst->emitNote(message);
|
||||
op->emitNote(message);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -350,7 +350,7 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block,
|
|||
fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace);
|
||||
|
||||
// Create the fast memory space buffer just before the 'affine.for'
|
||||
// instruction.
|
||||
// operation.
|
||||
fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType).getResult();
|
||||
// Record it.
|
||||
fastBufferMap[memref] = fastMemRef;
|
||||
|
@ -391,7 +391,7 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block,
|
|||
top.create<ConstantIndexOp>(loc, strideInfos[0].numEltPerStride);
|
||||
}
|
||||
|
||||
// Record the last instruction just before the point where we insert the
|
||||
// Record the last operation just before the point where we insert the
|
||||
// outgoing DMAs. We later do the memref replacement later only in [begin,
|
||||
// postDomFilter] so that the original memref's in the DMA ops themselves
|
||||
// don't get replaced.
|
||||
|
@ -464,7 +464,7 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block,
|
|||
}
|
||||
|
||||
/// Generate DMAs for this block. The block is partitioned into separate
|
||||
/// `regions`; each region is either a sequence of one or more instructions
|
||||
/// `regions`; each region is either a sequence of one or more operations
|
||||
/// starting and ending with a load or store op, or just a loop (which could
|
||||
/// have other loops nested within). Returns false on an error, true otherwise.
|
||||
bool DmaGeneration::runOnBlock(Block *block) {
|
||||
|
@ -472,20 +472,19 @@ bool DmaGeneration::runOnBlock(Block *block) {
|
|||
return true;
|
||||
|
||||
// Every loop in the block starts and ends a region. A contiguous sequence of
|
||||
// operation instructions starting and ending with a load/store op is also
|
||||
// operations starting and ending with a load/store op is also
|
||||
// identified as a region. Straightline code (contiguous chunks of operation
|
||||
// instructions) are always assumed to not exhaust memory. As a result, this
|
||||
// operations) are always assumed to not exhaust memory. As a result, this
|
||||
// approach is conservative in some cases at the moment, we do a check later
|
||||
// and report an error with location info.
|
||||
// TODO(bondhugula): An 'affine.if' instruction is being treated similar to an
|
||||
// operation instruction. 'affine.if''s could have 'affine.for's in them;
|
||||
// TODO(bondhugula): An 'affine.if' operation is being treated similar to an
|
||||
// operation. 'affine.if''s could have 'affine.for's in them;
|
||||
// treat them separately.
|
||||
|
||||
// Get to the first load, store, or for op.
|
||||
auto curBegin =
|
||||
std::find_if(block->begin(), block->end(), [&](Instruction &inst) {
|
||||
return inst.isa<LoadOp>() || inst.isa<StoreOp>() ||
|
||||
inst.isa<AffineForOp>();
|
||||
std::find_if(block->begin(), block->end(), [&](Operation &op) {
|
||||
return op.isa<LoadOp>() || op.isa<StoreOp>() || op.isa<AffineForOp>();
|
||||
});
|
||||
|
||||
for (auto it = curBegin; it != block->end(); ++it) {
|
||||
|
@ -513,7 +512,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
|
|||
runOnBlock(/*begin=*/curBegin, /*end=*/it);
|
||||
// Recurse onto the body of this loop.
|
||||
runOnBlock(forOp.getBody());
|
||||
// The next region starts right after the 'affine.for' instruction.
|
||||
// The next region starts right after the 'affine.for' operation.
|
||||
curBegin = std::next(it);
|
||||
} else {
|
||||
// We have enough capacity, i.e., DMAs will be computed for the portion
|
||||
|
@ -583,10 +582,10 @@ findHighestBlockForPlacement(const MemRefRegion ®ion, Block &block,
|
|||
}
|
||||
}
|
||||
|
||||
/// Generates DMAs for a contiguous sequence of instructions in `block` in the
|
||||
/// Generates DMAs for a contiguous sequence of operations in `block` in the
|
||||
/// iterator range [begin, end). Returns the total size of the DMA buffers used.
|
||||
// Since we generate alloc's and dealloc's for all DMA buffers (before and
|
||||
// after the range of instructions resp), all of the fast memory capacity is
|
||||
// after the range of operations resp), all of the fast memory capacity is
|
||||
// assumed to be available.
|
||||
uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) {
|
||||
if (begin == end)
|
||||
|
@ -610,8 +609,8 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) {
|
|||
// To check for errors when walking the block.
|
||||
bool error = false;
|
||||
|
||||
// Walk this range of instructions to gather all memory regions.
|
||||
block->walk(begin, end, [&](Instruction *opInst) {
|
||||
// Walk this range of operations to gather all memory regions.
|
||||
block->walk(begin, end, [&](Operation *opInst) {
|
||||
// Gather regions to allocate to buffers in faster memory space.
|
||||
if (auto loadOp = opInst->dyn_cast<LoadOp>()) {
|
||||
if (loadOp.getMemRefType().getMemorySpace() != slowMemorySpace)
|
||||
|
@ -738,8 +737,7 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) {
|
|||
return totalDmaBuffersSizeInBytes;
|
||||
}
|
||||
|
||||
// For a range of operation instructions, a note will be emitted at the
|
||||
// caller.
|
||||
// For a range of operations, a note will be emitted at the caller.
|
||||
AffineForOp forOp;
|
||||
uint64_t sizeInKib = llvm::divideCeil(totalDmaBuffersSizeInBytes, 1024);
|
||||
if (llvm::DebugFlag && (forOp = begin->dyn_cast<AffineForOp>())) {
|
||||
|
@ -750,8 +748,8 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) {
|
|||
if (totalDmaBuffersSizeInBytes > fastMemCapacityBytes) {
|
||||
StringRef str = "Total size of all DMA buffers' for this block "
|
||||
"exceeds fast memory capacity\n";
|
||||
if (auto *inst = block->getContainingOp())
|
||||
inst->emitError(str);
|
||||
if (auto *op = block->getContainingOp())
|
||||
op->emitError(str);
|
||||
else
|
||||
block->getFunction()->emitError(str);
|
||||
}
|
||||
|
|
|
@ -123,26 +123,26 @@ namespace {
|
|||
// operations, and whether or not an IfInst was encountered in the loop nest.
|
||||
struct LoopNestStateCollector {
|
||||
SmallVector<AffineForOp, 4> forOps;
|
||||
SmallVector<Instruction *, 4> loadOpInsts;
|
||||
SmallVector<Instruction *, 4> storeOpInsts;
|
||||
SmallVector<Operation *, 4> loadOpInsts;
|
||||
SmallVector<Operation *, 4> storeOpInsts;
|
||||
bool hasNonForRegion = false;
|
||||
|
||||
void collect(Instruction *instToWalk) {
|
||||
instToWalk->walk([&](Instruction *opInst) {
|
||||
if (opInst->isa<AffineForOp>())
|
||||
forOps.push_back(opInst->cast<AffineForOp>());
|
||||
else if (opInst->getNumRegions() != 0)
|
||||
void collect(Operation *opToWalk) {
|
||||
opToWalk->walk([&](Operation *op) {
|
||||
if (op->isa<AffineForOp>())
|
||||
forOps.push_back(op->cast<AffineForOp>());
|
||||
else if (op->getNumRegions() != 0)
|
||||
hasNonForRegion = true;
|
||||
else if (opInst->isa<LoadOp>())
|
||||
loadOpInsts.push_back(opInst);
|
||||
else if (opInst->isa<StoreOp>())
|
||||
storeOpInsts.push_back(opInst);
|
||||
else if (op->isa<LoadOp>())
|
||||
loadOpInsts.push_back(op);
|
||||
else if (op->isa<StoreOp>())
|
||||
storeOpInsts.push_back(op);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// TODO(b/117228571) Replace when this is modeled through side-effects/op traits
|
||||
static bool isMemRefDereferencingOp(Instruction &op) {
|
||||
static bool isMemRefDereferencingOp(Operation &op) {
|
||||
if (op.isa<LoadOp>() || op.isa<StoreOp>() || op.isa<DmaStartOp>() ||
|
||||
op.isa<DmaWaitOp>())
|
||||
return true;
|
||||
|
@ -150,7 +150,7 @@ static bool isMemRefDereferencingOp(Instruction &op) {
|
|||
}
|
||||
|
||||
// MemRefDependenceGraph is a graph data structure where graph nodes are
|
||||
// top-level instructions in a Function which contain load/store ops, and edges
|
||||
// top-level operations in a Function which contain load/store ops, and edges
|
||||
// are memref dependences between the nodes.
|
||||
// TODO(andydavis) Add a more flexible dependece graph representation.
|
||||
// TODO(andydavis) Add a depth parameter to dependence graph construction.
|
||||
|
@ -163,12 +163,12 @@ public:
|
|||
// The unique identifier of this node in the graph.
|
||||
unsigned id;
|
||||
// The top-level statment which is (or contains) loads/stores.
|
||||
Instruction *inst;
|
||||
Operation *op;
|
||||
// List of load operations.
|
||||
SmallVector<Instruction *, 4> loads;
|
||||
SmallVector<Operation *, 4> loads;
|
||||
// List of store op insts.
|
||||
SmallVector<Instruction *, 4> stores;
|
||||
Node(unsigned id, Instruction *inst) : id(id), inst(inst) {}
|
||||
SmallVector<Operation *, 4> stores;
|
||||
Node(unsigned id, Operation *op) : id(id), op(op) {}
|
||||
|
||||
// Returns the load op count for 'memref'.
|
||||
unsigned getLoadOpCount(Value *memref) {
|
||||
|
@ -192,7 +192,7 @@ public:
|
|||
|
||||
// Returns all store ops in 'storeOps' which access 'memref'.
|
||||
void getStoreOpsForMemref(Value *memref,
|
||||
SmallVectorImpl<Instruction *> *storeOps) {
|
||||
SmallVectorImpl<Operation *> *storeOps) {
|
||||
for (auto *storeOpInst : stores) {
|
||||
if (memref == storeOpInst->cast<StoreOp>().getMemRef())
|
||||
storeOps->push_back(storeOpInst);
|
||||
|
@ -201,7 +201,7 @@ public:
|
|||
|
||||
// Returns all load ops in 'loadOps' which access 'memref'.
|
||||
void getLoadOpsForMemref(Value *memref,
|
||||
SmallVectorImpl<Instruction *> *loadOps) {
|
||||
SmallVectorImpl<Operation *> *loadOps) {
|
||||
for (auto *loadOpInst : loads) {
|
||||
if (memref == loadOpInst->cast<LoadOp>().getMemRef())
|
||||
loadOps->push_back(loadOpInst);
|
||||
|
@ -236,7 +236,7 @@ public:
|
|||
// which contain accesses to the same memref 'value'. If the value is a
|
||||
// non-memref value, then the dependence is between a graph node which
|
||||
// defines an SSA value and another graph node which uses the SSA value
|
||||
// (e.g. a constant instruction defining a value which is used inside a loop
|
||||
// (e.g. a constant operation defining a value which is used inside a loop
|
||||
// nest).
|
||||
Value *value;
|
||||
};
|
||||
|
@ -266,9 +266,9 @@ public:
|
|||
return &it->second;
|
||||
}
|
||||
|
||||
// Adds a node with 'inst' to the graph and returns its unique identifier.
|
||||
unsigned addNode(Instruction *inst) {
|
||||
Node node(nextNodeId++, inst);
|
||||
// Adds a node with 'op' to the graph and returns its unique identifier.
|
||||
unsigned addNode(Operation *op) {
|
||||
Node node(nextNodeId++, op);
|
||||
nodes.insert({node.id, node});
|
||||
return node.id;
|
||||
}
|
||||
|
@ -301,9 +301,9 @@ public:
|
|||
Node *node = getNode(id);
|
||||
for (auto *storeOpInst : node->stores) {
|
||||
auto *memref = storeOpInst->cast<StoreOp>().getMemRef();
|
||||
auto *inst = memref->getDefiningOp();
|
||||
auto *op = memref->getDefiningOp();
|
||||
// Return true if 'memref' is a block argument.
|
||||
if (!inst)
|
||||
if (!op)
|
||||
return true;
|
||||
// Return true if any use of 'memref' escapes the function.
|
||||
for (auto &use : memref->getUses())
|
||||
|
@ -436,50 +436,50 @@ public:
|
|||
return outEdgeCount;
|
||||
}
|
||||
|
||||
// Computes and returns an insertion point instruction, before which the
|
||||
// Computes and returns an insertion point operation, before which the
|
||||
// the fused <srcId, dstId> loop nest can be inserted while preserving
|
||||
// dependences. Returns nullptr if no such insertion point is found.
|
||||
Instruction *getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) {
|
||||
Operation *getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) {
|
||||
if (outEdges.count(srcId) == 0)
|
||||
return getNode(dstId)->inst;
|
||||
return getNode(dstId)->op;
|
||||
|
||||
// Build set of insts in range (srcId, dstId) which depend on 'srcId'.
|
||||
SmallPtrSet<Instruction *, 2> srcDepInsts;
|
||||
SmallPtrSet<Operation *, 2> srcDepInsts;
|
||||
for (auto &outEdge : outEdges[srcId])
|
||||
if (outEdge.id != dstId)
|
||||
srcDepInsts.insert(getNode(outEdge.id)->inst);
|
||||
srcDepInsts.insert(getNode(outEdge.id)->op);
|
||||
|
||||
// Build set of insts in range (srcId, dstId) on which 'dstId' depends.
|
||||
SmallPtrSet<Instruction *, 2> dstDepInsts;
|
||||
SmallPtrSet<Operation *, 2> dstDepInsts;
|
||||
for (auto &inEdge : inEdges[dstId])
|
||||
if (inEdge.id != srcId)
|
||||
dstDepInsts.insert(getNode(inEdge.id)->inst);
|
||||
dstDepInsts.insert(getNode(inEdge.id)->op);
|
||||
|
||||
Instruction *srcNodeInst = getNode(srcId)->inst;
|
||||
Instruction *dstNodeInst = getNode(dstId)->inst;
|
||||
Operation *srcNodeInst = getNode(srcId)->op;
|
||||
Operation *dstNodeInst = getNode(dstId)->op;
|
||||
|
||||
// Computing insertion point:
|
||||
// *) Walk all instruction positions in Block instruction list in the
|
||||
// range (src, dst). For each instruction 'inst' visited in this search:
|
||||
// *) Store in 'firstSrcDepPos' the first position where 'inst' has a
|
||||
// *) Walk all operation positions in Block operation list in the
|
||||
// range (src, dst). For each operation 'op' visited in this search:
|
||||
// *) Store in 'firstSrcDepPos' the first position where 'op' has a
|
||||
// dependence edge from 'srcNode'.
|
||||
// *) Store in 'lastDstDepPost' the last position where 'inst' has a
|
||||
// *) Store in 'lastDstDepPost' the last position where 'op' has a
|
||||
// dependence edge to 'dstNode'.
|
||||
// *) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the
|
||||
// instruction insertion point (or return null pointer if no such
|
||||
// operation insertion point (or return null pointer if no such
|
||||
// insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos').
|
||||
SmallVector<Instruction *, 2> depInsts;
|
||||
SmallVector<Operation *, 2> depInsts;
|
||||
Optional<unsigned> firstSrcDepPos;
|
||||
Optional<unsigned> lastDstDepPos;
|
||||
unsigned pos = 0;
|
||||
for (Block::iterator it = std::next(Block::iterator(srcNodeInst));
|
||||
it != Block::iterator(dstNodeInst); ++it) {
|
||||
Instruction *inst = &(*it);
|
||||
if (srcDepInsts.count(inst) > 0 && firstSrcDepPos == None)
|
||||
Operation *op = &(*it);
|
||||
if (srcDepInsts.count(op) > 0 && firstSrcDepPos == None)
|
||||
firstSrcDepPos = pos;
|
||||
if (dstDepInsts.count(inst) > 0)
|
||||
if (dstDepInsts.count(op) > 0)
|
||||
lastDstDepPos = pos;
|
||||
depInsts.push_back(inst);
|
||||
depInsts.push_back(op);
|
||||
++pos;
|
||||
}
|
||||
|
||||
|
@ -557,8 +557,8 @@ public:
|
|||
}
|
||||
|
||||
// Adds ops in 'loads' and 'stores' to node at 'id'.
|
||||
void addToNode(unsigned id, const SmallVectorImpl<Instruction *> &loads,
|
||||
const SmallVectorImpl<Instruction *> &stores) {
|
||||
void addToNode(unsigned id, const SmallVectorImpl<Operation *> &loads,
|
||||
const SmallVectorImpl<Operation *> &stores) {
|
||||
Node *node = getNode(id);
|
||||
for (auto *loadOpInst : loads)
|
||||
node->loads.push_back(loadOpInst);
|
||||
|
@ -596,7 +596,7 @@ public:
|
|||
continue;
|
||||
assert(nodes.count(edge.id) > 0);
|
||||
// Skip if 'edge.id' is not a loop nest.
|
||||
if (!getNode(edge.id)->inst->isa<AffineForOp>())
|
||||
if (!getNode(edge.id)->op->isa<AffineForOp>())
|
||||
continue;
|
||||
// Visit current input edge 'edge'.
|
||||
callback(edge);
|
||||
|
@ -623,7 +623,7 @@ public:
|
|||
void dump() const { print(llvm::errs()); }
|
||||
};
|
||||
|
||||
// Intializes the data dependence graph by walking instructions in 'f'.
|
||||
// Intializes the data dependence graph by walking operations in 'f'.
|
||||
// Assigns each node in the graph a node id based on program order in 'f'.
|
||||
// TODO(andydavis) Add support for taking a Block arg to construct the
|
||||
// dependence graph at a different depth.
|
||||
|
@ -634,18 +634,18 @@ bool MemRefDependenceGraph::init(Function &f) {
|
|||
if (f.getBlocks().size() != 1)
|
||||
return false;
|
||||
|
||||
DenseMap<Instruction *, unsigned> forToNodeMap;
|
||||
for (auto &inst : f.front()) {
|
||||
if (auto forOp = inst.dyn_cast<AffineForOp>()) {
|
||||
DenseMap<Operation *, unsigned> forToNodeMap;
|
||||
for (auto &op : f.front()) {
|
||||
if (auto forOp = op.dyn_cast<AffineForOp>()) {
|
||||
// Create graph node 'id' to represent top-level 'forOp' and record
|
||||
// all loads and store accesses it contains.
|
||||
LoopNestStateCollector collector;
|
||||
collector.collect(&inst);
|
||||
collector.collect(&op);
|
||||
// Return false if a non 'affine.for' region was found (not currently
|
||||
// supported).
|
||||
if (collector.hasNonForRegion)
|
||||
return false;
|
||||
Node node(nextNodeId++, &inst);
|
||||
Node node(nextNodeId++, &op);
|
||||
for (auto *opInst : collector.loadOpInsts) {
|
||||
node.loads.push_back(opInst);
|
||||
auto *memref = opInst->cast<LoadOp>().getMemRef();
|
||||
|
@ -656,29 +656,29 @@ bool MemRefDependenceGraph::init(Function &f) {
|
|||
auto *memref = opInst->cast<StoreOp>().getMemRef();
|
||||
memrefAccesses[memref].insert(node.id);
|
||||
}
|
||||
forToNodeMap[&inst] = node.id;
|
||||
forToNodeMap[&op] = node.id;
|
||||
nodes.insert({node.id, node});
|
||||
} else if (auto loadOp = inst.dyn_cast<LoadOp>()) {
|
||||
} else if (auto loadOp = op.dyn_cast<LoadOp>()) {
|
||||
// Create graph node for top-level load op.
|
||||
Node node(nextNodeId++, &inst);
|
||||
node.loads.push_back(&inst);
|
||||
auto *memref = inst.cast<LoadOp>().getMemRef();
|
||||
Node node(nextNodeId++, &op);
|
||||
node.loads.push_back(&op);
|
||||
auto *memref = op.cast<LoadOp>().getMemRef();
|
||||
memrefAccesses[memref].insert(node.id);
|
||||
nodes.insert({node.id, node});
|
||||
} else if (auto storeOp = inst.dyn_cast<StoreOp>()) {
|
||||
} else if (auto storeOp = op.dyn_cast<StoreOp>()) {
|
||||
// Create graph node for top-level store op.
|
||||
Node node(nextNodeId++, &inst);
|
||||
node.stores.push_back(&inst);
|
||||
auto *memref = inst.cast<StoreOp>().getMemRef();
|
||||
Node node(nextNodeId++, &op);
|
||||
node.stores.push_back(&op);
|
||||
auto *memref = op.cast<StoreOp>().getMemRef();
|
||||
memrefAccesses[memref].insert(node.id);
|
||||
nodes.insert({node.id, node});
|
||||
} else if (inst.getNumRegions() != 0) {
|
||||
} else if (op.getNumRegions() != 0) {
|
||||
// Return false if another region is found (not currently supported).
|
||||
return false;
|
||||
} else if (inst.getNumResults() > 0 && !inst.use_empty()) {
|
||||
} else if (op.getNumResults() > 0 && !op.use_empty()) {
|
||||
// Create graph node for top-level producer of SSA values, which
|
||||
// could be used by loop nest nodes.
|
||||
Node node(nextNodeId++, &inst);
|
||||
Node node(nextNodeId++, &op);
|
||||
nodes.insert({node.id, node});
|
||||
}
|
||||
}
|
||||
|
@ -689,7 +689,7 @@ bool MemRefDependenceGraph::init(Function &f) {
|
|||
const Node &node = idAndNode.second;
|
||||
if (!node.loads.empty() || !node.stores.empty())
|
||||
continue;
|
||||
auto *opInst = node.inst;
|
||||
auto *opInst = node.op;
|
||||
for (auto *value : opInst->getResults()) {
|
||||
for (auto &use : value->getUses()) {
|
||||
SmallVector<AffineForOp, 4> loops;
|
||||
|
@ -728,11 +728,11 @@ namespace {
|
|||
// and operation count) for a loop nest up until the innermost loop body.
|
||||
struct LoopNestStats {
|
||||
// Map from AffineForOp to immediate child AffineForOps in its loop body.
|
||||
DenseMap<Instruction *, SmallVector<AffineForOp, 2>> loopMap;
|
||||
DenseMap<Operation *, SmallVector<AffineForOp, 2>> loopMap;
|
||||
// Map from AffineForOp to count of operations in its loop body.
|
||||
DenseMap<Instruction *, uint64_t> opCountMap;
|
||||
DenseMap<Operation *, uint64_t> opCountMap;
|
||||
// Map from AffineForOp to its constant trip count.
|
||||
DenseMap<Instruction *, uint64_t> tripCountMap;
|
||||
DenseMap<Operation *, uint64_t> tripCountMap;
|
||||
};
|
||||
|
||||
// LoopNestStatsCollector walks a single loop nest and gathers per-loop
|
||||
|
@ -743,8 +743,8 @@ struct LoopNestStatsCollector {
|
|||
|
||||
LoopNestStatsCollector(LoopNestStats *stats) : stats(stats) {}
|
||||
|
||||
void collect(Instruction *inst) {
|
||||
inst->walk<AffineForOp>([&](AffineForOp forOp) {
|
||||
void collect(Operation *op) {
|
||||
op->walk<AffineForOp>([&](AffineForOp forOp) {
|
||||
auto *forInst = forOp.getOperation();
|
||||
auto *parentInst = forOp.getOperation()->getParentOp();
|
||||
if (parentInst != nullptr) {
|
||||
|
@ -753,11 +753,11 @@ struct LoopNestStatsCollector {
|
|||
stats->loopMap[parentInst].push_back(forOp);
|
||||
}
|
||||
|
||||
// Record the number of op instructions in the body of 'forOp'.
|
||||
// Record the number of op operations in the body of 'forOp'.
|
||||
unsigned count = 0;
|
||||
stats->opCountMap[forInst] = 0;
|
||||
for (auto &inst : *forOp.getBody()) {
|
||||
if (!inst.isa<AffineForOp>() && !inst.isa<AffineIfOp>())
|
||||
for (auto &op : *forOp.getBody()) {
|
||||
if (!op.isa<AffineForOp>() && !op.isa<AffineIfOp>())
|
||||
++count;
|
||||
}
|
||||
stats->opCountMap[forInst] = count;
|
||||
|
@ -789,9 +789,9 @@ struct LoopNestStatsCollector {
|
|||
// NOTE: this is used to compute the cost of fusing a slice of some loop nest
|
||||
// within another loop.
|
||||
static int64_t getComputeCost(
|
||||
Instruction *forInst, LoopNestStats *stats,
|
||||
llvm::SmallDenseMap<Instruction *, uint64_t, 8> *tripCountOverrideMap,
|
||||
DenseMap<Instruction *, int64_t> *computeCostMap) {
|
||||
Operation *forInst, LoopNestStats *stats,
|
||||
llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountOverrideMap,
|
||||
DenseMap<Operation *, int64_t> *computeCostMap) {
|
||||
// 'opCount' is the total number operations in one iteration of 'forOp' body
|
||||
int64_t opCount = stats->opCountMap[forInst];
|
||||
if (stats->loopMap.count(forInst) > 0) {
|
||||
|
@ -843,8 +843,8 @@ static Optional<uint64_t> getConstDifference(AffineMap lbMap, AffineMap ubMap) {
|
|||
// was encountered).
|
||||
// TODO(andydavis) Make this work with non-unit step loops.
|
||||
static bool buildSliceTripCountMap(
|
||||
Instruction *srcOpInst, ComputationSliceState *sliceState,
|
||||
llvm::SmallDenseMap<Instruction *, uint64_t, 8> *tripCountMap) {
|
||||
Operation *srcOpInst, ComputationSliceState *sliceState,
|
||||
llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) {
|
||||
SmallVector<AffineForOp, 4> srcLoopIVs;
|
||||
getLoopIVs(*srcOpInst, &srcLoopIVs);
|
||||
unsigned numSrcLoopIVs = srcLoopIVs.size();
|
||||
|
@ -873,12 +873,11 @@ static bool buildSliceTripCountMap(
|
|||
|
||||
// Removes load operations from 'srcLoads' which operate on 'memref', and
|
||||
// adds them to 'dstLoads'.
|
||||
static void
|
||||
moveLoadsAccessingMemrefTo(Value *memref,
|
||||
SmallVectorImpl<Instruction *> *srcLoads,
|
||||
SmallVectorImpl<Instruction *> *dstLoads) {
|
||||
static void moveLoadsAccessingMemrefTo(Value *memref,
|
||||
SmallVectorImpl<Operation *> *srcLoads,
|
||||
SmallVectorImpl<Operation *> *dstLoads) {
|
||||
dstLoads->clear();
|
||||
SmallVector<Instruction *, 4> srcLoadsToKeep;
|
||||
SmallVector<Operation *, 4> srcLoadsToKeep;
|
||||
for (auto *load : *srcLoads) {
|
||||
if (load->cast<LoadOp>().getMemRef() == memref)
|
||||
dstLoads->push_back(load);
|
||||
|
@ -889,7 +888,7 @@ moveLoadsAccessingMemrefTo(Value *memref,
|
|||
}
|
||||
|
||||
// Returns the innermost common loop depth for the set of operations in 'ops'.
|
||||
static unsigned getInnermostCommonLoopDepth(ArrayRef<Instruction *> ops) {
|
||||
static unsigned getInnermostCommonLoopDepth(ArrayRef<Operation *> ops) {
|
||||
unsigned numOps = ops.size();
|
||||
assert(numOps > 0);
|
||||
|
||||
|
@ -917,10 +916,10 @@ static unsigned getInnermostCommonLoopDepth(ArrayRef<Instruction *> ops) {
|
|||
|
||||
// Returns the maximum loop depth at which no dependences between 'loadOpInsts'
|
||||
// and 'storeOpInsts' are satisfied.
|
||||
static unsigned getMaxLoopDepth(ArrayRef<Instruction *> loadOpInsts,
|
||||
ArrayRef<Instruction *> storeOpInsts) {
|
||||
static unsigned getMaxLoopDepth(ArrayRef<Operation *> loadOpInsts,
|
||||
ArrayRef<Operation *> storeOpInsts) {
|
||||
// Merge loads and stores into the same array.
|
||||
SmallVector<Instruction *, 2> ops(loadOpInsts.begin(), loadOpInsts.end());
|
||||
SmallVector<Operation *, 2> ops(loadOpInsts.begin(), loadOpInsts.end());
|
||||
ops.append(storeOpInsts.begin(), storeOpInsts.end());
|
||||
|
||||
// Compute the innermost common loop depth for loads and stores.
|
||||
|
@ -970,7 +969,7 @@ static unsigned getMaxLoopDepth(ArrayRef<Instruction *> loadOpInsts,
|
|||
// dependence componenent lexicographically negative.
|
||||
// TODO(andydavis) Move this function to LoopUtils.
|
||||
static bool
|
||||
computeLoopInterchangePermutation(ArrayRef<Instruction *> ops,
|
||||
computeLoopInterchangePermutation(ArrayRef<Operation *> ops,
|
||||
unsigned maxLoopDepth,
|
||||
SmallVectorImpl<unsigned> *loopPermMap) {
|
||||
// Gather dependence components for dependences between all ops in 'ops'
|
||||
|
@ -1054,12 +1053,12 @@ computeLoopInterchangePermutation(ArrayRef<Instruction *> ops,
|
|||
// This can increase the loop depth at which we can fuse a slice, since we are
|
||||
// pushing loop carried dependence to a greater depth in the loop nest.
|
||||
static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) {
|
||||
assert(node->inst->isa<AffineForOp>());
|
||||
assert(node->op->isa<AffineForOp>());
|
||||
// Get perfectly nested sequence of loops starting at root of loop nest
|
||||
// (the first op being another AffineFor, and the second op - a terminator).
|
||||
// TODO(andydavis,bondhugula) Share this with similar code in loop tiling.
|
||||
SmallVector<AffineForOp, 4> loops;
|
||||
AffineForOp curr = node->inst->cast<AffineForOp>();
|
||||
AffineForOp curr = node->op->cast<AffineForOp>();
|
||||
loops.push_back(curr);
|
||||
auto *currBody = curr.getBody();
|
||||
while (currBody->begin() == std::prev(currBody->end(), 2) &&
|
||||
|
@ -1071,7 +1070,7 @@ static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) {
|
|||
return;
|
||||
|
||||
// Merge loads and stores into the same array.
|
||||
SmallVector<Instruction *, 2> memOps(node->loads.begin(), node->loads.end());
|
||||
SmallVector<Operation *, 2> memOps(node->loads.begin(), node->loads.end());
|
||||
memOps.append(node->stores.begin(), node->stores.end());
|
||||
|
||||
// Compute loop permutation in 'loopPermMap'.
|
||||
|
@ -1091,7 +1090,7 @@ static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) {
|
|||
}
|
||||
}
|
||||
assert(loopNestRootIndex != -1 && "invalid root index");
|
||||
node->inst = loops[loopNestRootIndex].getOperation();
|
||||
node->op = loops[loopNestRootIndex].getOperation();
|
||||
}
|
||||
|
||||
// TODO(mlir-team): improve/complete this when we have target data.
|
||||
|
@ -1114,8 +1113,7 @@ unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
|
|||
// MemRefRegion written to by 'srcStoreOpInst' at depth 'dstLoopDepth'.
|
||||
// TODO(bondhugula): consider refactoring the common code from generateDma and
|
||||
// this one.
|
||||
static Value *createPrivateMemRef(AffineForOp forOp,
|
||||
Instruction *srcStoreOpInst,
|
||||
static Value *createPrivateMemRef(AffineForOp forOp, Operation *srcStoreOpInst,
|
||||
unsigned dstLoopDepth,
|
||||
Optional<unsigned> fastMemorySpace,
|
||||
uint64_t localBufSizeThreshold) {
|
||||
|
@ -1228,7 +1226,7 @@ static Value *createPrivateMemRef(AffineForOp forOp,
|
|||
|
||||
// Does the slice have a single iteration?
|
||||
static uint64_t getSliceIterationCount(
|
||||
const llvm::SmallDenseMap<Instruction *, uint64_t, 8> &sliceTripCountMap) {
|
||||
const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) {
|
||||
uint64_t iterCount = 1;
|
||||
for (const auto &count : sliceTripCountMap) {
|
||||
iterCount *= count.second;
|
||||
|
@ -1275,7 +1273,7 @@ static bool canFuseSrcWhichWritesToLiveOut(unsigned srcId, unsigned dstId,
|
|||
return false;
|
||||
|
||||
// Compute MemRefRegion 'dstWriteRegion' for 'dstStoreOpInst' on 'memref'.
|
||||
SmallVector<Instruction *, 2> dstStoreOps;
|
||||
SmallVector<Operation *, 2> dstStoreOps;
|
||||
dstNode->getStoreOpsForMemref(memref, &dstStoreOps);
|
||||
assert(dstStoreOps.size() == 1);
|
||||
auto *dstStoreOpInst = dstStoreOps[0];
|
||||
|
@ -1305,8 +1303,8 @@ static bool canFuseSrcWhichWritesToLiveOut(unsigned srcId, unsigned dstId,
|
|||
// and each load op in 'dstLoadOpInsts' at 'dstLoopDepth', and returns
|
||||
// the union in 'sliceState'. Returns true on success, false otherwise.
|
||||
// TODO(andydavis) Move this to a loop fusion utility function.
|
||||
static bool getSliceUnion(Instruction *srcOpInst,
|
||||
ArrayRef<Instruction *> dstLoadOpInsts,
|
||||
static bool getSliceUnion(Operation *srcOpInst,
|
||||
ArrayRef<Operation *> dstLoadOpInsts,
|
||||
unsigned numSrcLoopIVs, unsigned dstLoopDepth,
|
||||
ComputationSliceState *sliceState) {
|
||||
MemRefAccess srcAccess(srcOpInst);
|
||||
|
@ -1415,10 +1413,9 @@ static bool getSliceUnion(Instruction *srcOpInst,
|
|||
// *) Compares the total cost of the unfused loop nests to the min cost fused
|
||||
// loop nest computed in the previous step, and returns true if the latter
|
||||
// is lower.
|
||||
static bool isFusionProfitable(Instruction *srcOpInst,
|
||||
Instruction *srcStoreOpInst,
|
||||
ArrayRef<Instruction *> dstLoadOpInsts,
|
||||
ArrayRef<Instruction *> dstStoreOpInsts,
|
||||
static bool isFusionProfitable(Operation *srcOpInst, Operation *srcStoreOpInst,
|
||||
ArrayRef<Operation *> dstLoadOpInsts,
|
||||
ArrayRef<Operation *> dstStoreOpInsts,
|
||||
ComputationSliceState *sliceState,
|
||||
unsigned *dstLoopDepth, bool maximalFusion) {
|
||||
LLVM_DEBUG({
|
||||
|
@ -1492,7 +1489,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
|
|||
MemRefRegion srcWriteRegion(srcStoreOpInst->getLoc());
|
||||
if (failed(srcWriteRegion.compute(srcStoreOpInst, /*loopDepth=*/0))) {
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "Unable to compute MemRefRegion for source instruction\n.");
|
||||
<< "Unable to compute MemRefRegion for source operation\n.");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1510,8 +1507,8 @@ static bool isFusionProfitable(Instruction *srcOpInst,
|
|||
|
||||
// Evaluate all depth choices for materializing the slice in the destination
|
||||
// loop nest.
|
||||
llvm::SmallDenseMap<Instruction *, uint64_t, 8> sliceTripCountMap;
|
||||
DenseMap<Instruction *, int64_t> computeCostMap;
|
||||
llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap;
|
||||
DenseMap<Operation *, int64_t> computeCostMap;
|
||||
for (unsigned i = maxDstLoopDepth; i >= 1; --i) {
|
||||
// Compute the union of slice bounds of all ops in 'dstLoadOpInsts'.
|
||||
if (!getSliceUnion(srcOpInst, dstLoadOpInsts, numSrcLoopIVs, i,
|
||||
|
@ -1754,7 +1751,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
|
|||
// bounds to be functions of 'dstLoopNest' IVs and symbols.
|
||||
// *) Fuse the 'srcLoopNest' computation slice into the 'dstLoopNest',
|
||||
// at a loop depth determined by the cost model in 'isFusionProfitable'.
|
||||
// *) Add the newly fused load/store operation instructions to the state,
|
||||
// *) Add the newly fused load/store operations to the state,
|
||||
// and also add newly fuse load ops to 'dstLoopOps' to be considered
|
||||
// as fusion dst load ops in another iteration.
|
||||
// *) Remove old src loop nest and its associated state.
|
||||
|
@ -1773,7 +1770,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
|
|||
// is preserved in the fused loop nest.
|
||||
// *) Update graph state to reflect the fusion of 'sibNode' into 'dstNode'.
|
||||
//
|
||||
// Given a graph where top-level instructions are vertices in the set 'V' and
|
||||
// Given a graph where top-level operations are vertices in the set 'V' and
|
||||
// edges in the set 'E' are dependences between vertices, this algorithm
|
||||
// takes O(V) time for initialization, and has runtime O(V + E).
|
||||
//
|
||||
|
@ -1844,7 +1841,7 @@ public:
|
|||
// Get 'dstNode' into which to attempt fusion.
|
||||
auto *dstNode = mdg->getNode(dstId);
|
||||
// Skip if 'dstNode' is not a loop nest.
|
||||
if (!dstNode->inst->isa<AffineForOp>())
|
||||
if (!dstNode->op->isa<AffineForOp>())
|
||||
continue;
|
||||
// Sink sequential loops in 'dstNode' (and thus raise parallel loops)
|
||||
// while preserving relative order. This can increase the maximum loop
|
||||
|
@ -1852,8 +1849,8 @@ public:
|
|||
// consumer loop nest.
|
||||
sinkSequentialLoops(dstNode);
|
||||
|
||||
SmallVector<Instruction *, 4> loads = dstNode->loads;
|
||||
SmallVector<Instruction *, 4> dstLoadOpInsts;
|
||||
SmallVector<Operation *, 4> loads = dstNode->loads;
|
||||
SmallVector<Operation *, 4> dstLoadOpInsts;
|
||||
DenseSet<Value *> visitedMemrefs;
|
||||
while (!loads.empty()) {
|
||||
// Get memref of load on top of the stack.
|
||||
|
@ -1882,7 +1879,7 @@ public:
|
|||
// Get 'srcNode' from which to attempt fusion into 'dstNode'.
|
||||
auto *srcNode = mdg->getNode(srcId);
|
||||
// Skip if 'srcNode' is not a loop nest.
|
||||
if (!srcNode->inst->isa<AffineForOp>())
|
||||
if (!srcNode->op->isa<AffineForOp>())
|
||||
continue;
|
||||
// Skip if 'srcNode' has more than one store to any memref.
|
||||
// TODO(andydavis) Support fusing multi-output src loop nests.
|
||||
|
@ -1908,9 +1905,9 @@ public:
|
|||
if (mdg->getOutEdgeCount(srcNode->id, memref) > maxSrcUserCount)
|
||||
continue;
|
||||
|
||||
// Compute an instruction list insertion point for the fused loop
|
||||
// Compute an operation list insertion point for the fused loop
|
||||
// nest which preserves dependences.
|
||||
Instruction *insertPointInst =
|
||||
Operation *insertPointInst =
|
||||
mdg->getFusedLoopNestInsertionPoint(srcNode->id, dstNode->id);
|
||||
if (insertPointInst == nullptr)
|
||||
continue;
|
||||
|
@ -1918,7 +1915,7 @@ public:
|
|||
// Get unique 'srcNode' store op.
|
||||
auto *srcStoreOpInst = srcNode->stores.front();
|
||||
// Gather 'dstNode' store ops to 'memref'.
|
||||
SmallVector<Instruction *, 2> dstStoreOpInsts;
|
||||
SmallVector<Operation *, 2> dstStoreOpInsts;
|
||||
for (auto *storeOpInst : dstNode->stores)
|
||||
if (storeOpInst->cast<StoreOp>().getMemRef() == memref)
|
||||
dstStoreOpInsts.push_back(storeOpInst);
|
||||
|
@ -1938,7 +1935,7 @@ public:
|
|||
LLVM_DEBUG(llvm::dbgs() << "\tslice loop nest:\n"
|
||||
<< *sliceLoopNest.getOperation() << "\n");
|
||||
// Move 'dstAffineForOp' before 'insertPointInst' if needed.
|
||||
auto dstAffineForOp = dstNode->inst->cast<AffineForOp>();
|
||||
auto dstAffineForOp = dstNode->op->cast<AffineForOp>();
|
||||
if (insertPointInst != dstAffineForOp.getOperation()) {
|
||||
dstAffineForOp.getOperation()->moveBefore(insertPointInst);
|
||||
}
|
||||
|
@ -1954,7 +1951,7 @@ public:
|
|||
}
|
||||
if (!writesToLiveInOrOut) {
|
||||
// Create private memref for 'memref' in 'dstAffineForOp'.
|
||||
SmallVector<Instruction *, 4> storesForMemref;
|
||||
SmallVector<Operation *, 4> storesForMemref;
|
||||
for (auto *storeOpInst : sliceCollector.storeOpInsts) {
|
||||
if (storeOpInst->cast<StoreOp>().getMemRef() == memref)
|
||||
storesForMemref.push_back(storeOpInst);
|
||||
|
@ -1995,7 +1992,7 @@ public:
|
|||
// so it is safe to remove.
|
||||
if (writesToLiveInOrOut || mdg->canRemoveNode(srcNode->id)) {
|
||||
mdg->removeNode(srcNode->id);
|
||||
srcNode->inst->erase();
|
||||
srcNode->op->erase();
|
||||
} else {
|
||||
// Add remaining users of 'oldMemRef' back on the worklist (if not
|
||||
// already there), as its replacement with a local/private memref
|
||||
|
@ -2034,7 +2031,7 @@ public:
|
|||
// Get 'dstNode' into which to attempt fusion.
|
||||
auto *dstNode = mdg->getNode(dstId);
|
||||
// Skip if 'dstNode' is not a loop nest.
|
||||
if (!dstNode->inst->isa<AffineForOp>())
|
||||
if (!dstNode->op->isa<AffineForOp>())
|
||||
continue;
|
||||
// Attempt to fuse 'dstNode' with its sibling nodes in the graph.
|
||||
fuseWithSiblingNodes(dstNode);
|
||||
|
@ -2051,11 +2048,11 @@ public:
|
|||
// TODO(andydavis) Check that 'sibStoreOpInst' post-dominates all other
|
||||
// stores to the same memref in 'sibNode' loop nest.
|
||||
auto *sibNode = mdg->getNode(sibId);
|
||||
// Compute an instruction list insertion point for the fused loop
|
||||
// Compute an operation list insertion point for the fused loop
|
||||
// nest which preserves dependences.
|
||||
assert(sibNode->inst->getBlock() == dstNode->inst->getBlock());
|
||||
Instruction *insertPointInst =
|
||||
sibNode->inst->isBeforeInBlock(dstNode->inst)
|
||||
assert(sibNode->op->getBlock() == dstNode->op->getBlock());
|
||||
Operation *insertPointInst =
|
||||
sibNode->op->isBeforeInBlock(dstNode->op)
|
||||
? mdg->getFusedLoopNestInsertionPoint(sibNode->id, dstNode->id)
|
||||
: mdg->getFusedLoopNestInsertionPoint(dstNode->id, sibNode->id);
|
||||
if (insertPointInst == nullptr)
|
||||
|
@ -2064,21 +2061,21 @@ public:
|
|||
// Check if fusion would be profitable and at what depth.
|
||||
|
||||
// Get unique 'sibNode' load op to 'memref'.
|
||||
SmallVector<Instruction *, 2> sibLoadOpInsts;
|
||||
SmallVector<Operation *, 2> sibLoadOpInsts;
|
||||
sibNode->getLoadOpsForMemref(memref, &sibLoadOpInsts);
|
||||
// Currently findSiblingNodeToFuse searches for siblings with one load.
|
||||
assert(sibLoadOpInsts.size() == 1);
|
||||
Instruction *sibLoadOpInst = sibLoadOpInsts[0];
|
||||
Operation *sibLoadOpInst = sibLoadOpInsts[0];
|
||||
assert(!sibNode->stores.empty());
|
||||
// TODO(andydavis) Choose the store which postdominates all other stores.
|
||||
auto *sibStoreOpInst = sibNode->stores.back();
|
||||
|
||||
// Gather 'dstNode' load ops to 'memref'.
|
||||
SmallVector<Instruction *, 2> dstLoadOpInsts;
|
||||
SmallVector<Operation *, 2> dstLoadOpInsts;
|
||||
dstNode->getLoadOpsForMemref(memref, &dstLoadOpInsts);
|
||||
|
||||
// Gather 'dstNode' store ops to 'memref'.
|
||||
SmallVector<Instruction *, 2> dstStoreOpInsts;
|
||||
SmallVector<Operation *, 2> dstStoreOpInsts;
|
||||
dstNode->getStoreOpsForMemref(memref, &dstStoreOpInsts);
|
||||
|
||||
unsigned bestDstLoopDepth;
|
||||
|
@ -2094,8 +2091,8 @@ public:
|
|||
auto sliceLoopNest = mlir::insertBackwardComputationSlice(
|
||||
sibLoadOpInst, dstLoadOpInsts[0], bestDstLoopDepth, &sliceState);
|
||||
if (sliceLoopNest != nullptr) {
|
||||
auto dstForInst = dstNode->inst->cast<AffineForOp>();
|
||||
// Update instruction position of fused loop nest (if needed).
|
||||
auto dstForInst = dstNode->op->cast<AffineForOp>();
|
||||
// Update operation position of fused loop nest (if needed).
|
||||
if (insertPointInst != dstForInst.getOperation()) {
|
||||
dstForInst.getOperation()->moveBefore(insertPointInst);
|
||||
}
|
||||
|
@ -2140,7 +2137,7 @@ public:
|
|||
if (outEdge.id == dstNode->id || outEdge.value != inEdge.value)
|
||||
return;
|
||||
auto *sibNode = mdg->getNode(sibNodeId);
|
||||
if (!sibNode->inst->isa<AffineForOp>())
|
||||
if (!sibNode->op->isa<AffineForOp>())
|
||||
return;
|
||||
// Skip if 'outEdge' is not a read-after-write dependence.
|
||||
// TODO(andydavis) Remove restrict to single load op restriction.
|
||||
|
@ -2196,7 +2193,7 @@ public:
|
|||
}
|
||||
|
||||
// Collect dst loop stats after memref privatizaton transformation.
|
||||
auto dstForInst = dstNode->inst->cast<AffineForOp>();
|
||||
auto dstForInst = dstNode->op->cast<AffineForOp>();
|
||||
LoopNestStateCollector dstLoopCollector;
|
||||
dstLoopCollector.collect(dstForInst.getOperation());
|
||||
// Clear and add back loads and stores
|
||||
|
@ -2208,7 +2205,7 @@ public:
|
|||
// function.
|
||||
if (mdg->getOutEdgeCount(sibNode->id) == 0) {
|
||||
mdg->removeNode(sibNode->id);
|
||||
sibNode->inst->cast<AffineForOp>().erase();
|
||||
sibNode->op->cast<AffineForOp>().erase();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2218,13 +2215,13 @@ public:
|
|||
if (pair.second > 0)
|
||||
continue;
|
||||
auto *memref = pair.first;
|
||||
// Skip if there exist other uses (return instruction or function calls).
|
||||
// Skip if there exist other uses (return operation or function calls).
|
||||
if (!memref->use_empty())
|
||||
continue;
|
||||
// Use list expected to match the dep graph info.
|
||||
auto *inst = memref->getDefiningOp();
|
||||
if (inst && inst->isa<AllocOp>())
|
||||
inst->erase();
|
||||
auto *op = memref->getDefiningOp();
|
||||
if (op && op->isa<AllocOp>())
|
||||
op->erase();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -180,7 +180,7 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
|
|||
assert(!band.empty());
|
||||
assert(band.size() == tileSizes.size() && "Incorrect number of tile sizes");
|
||||
|
||||
// Check if the supplied for inst's are all successively nested.
|
||||
// Check if the supplied for op's are all successively nested.
|
||||
for (unsigned i = 1, e = band.size(); i < e; i++) {
|
||||
assert(band[i].getOperation()->getParentOp() == band[i - 1].getOperation());
|
||||
}
|
||||
|
@ -269,8 +269,8 @@ static void getTileableBands(Function &f,
|
|||
};
|
||||
|
||||
for (auto &block : f)
|
||||
for (auto &inst : block)
|
||||
if (auto forOp = inst.dyn_cast<AffineForOp>())
|
||||
for (auto &op : block)
|
||||
if (auto forOp = op.dyn_cast<AffineForOp>())
|
||||
getMaximalPerfectLoopNest(forOp);
|
||||
}
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ struct LoopUnroll : public FunctionPass<LoopUnroll> {
|
|||
|
||||
void runOnFunction() override;
|
||||
|
||||
/// Unroll this for inst. Returns failure if nothing was done.
|
||||
/// Unroll this for op. Returns failure if nothing was done.
|
||||
LogicalResult runOnAffineForOp(AffineForOp forOp);
|
||||
|
||||
static const unsigned kDefaultUnrollFactor = 4;
|
||||
|
@ -106,7 +106,7 @@ void LoopUnroll::runOnFunction() {
|
|||
hasInnerLoops |= walkPostOrder(&(*Start++));
|
||||
return hasInnerLoops;
|
||||
}
|
||||
bool walkPostOrder(Instruction *opInst) {
|
||||
bool walkPostOrder(Operation *opInst) {
|
||||
bool hasInnerLoops = false;
|
||||
for (auto ®ion : opInst->getRegions())
|
||||
for (auto &block : region)
|
||||
|
@ -158,7 +158,7 @@ void LoopUnroll::runOnFunction() {
|
|||
}
|
||||
}
|
||||
|
||||
/// Unrolls a 'affine.for' inst. Returns success if the loop was unrolled,
|
||||
/// Unrolls a 'affine.for' op. Returns success if the loop was unrolled,
|
||||
/// failure otherwise. The default unroll factor is 4.
|
||||
LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
|
||||
// Use the function callback if one was provided.
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
//
|
||||
// This file implements loop unroll and jam. Unroll and jam is a transformation
|
||||
// that improves locality, in particular, register reuse, while also improving
|
||||
// instruction level parallelism. The example below shows what it does in nearly
|
||||
// operation level parallelism. The example below shows what it does in nearly
|
||||
// the general case. Loop unroll and jam currently works if the bounds of the
|
||||
// loops inner to the loop being unroll-jammed do not depend on the latter.
|
||||
//
|
||||
|
@ -39,7 +39,7 @@
|
|||
// S6(i+1);
|
||||
//
|
||||
// Note: 'if/else' blocks are not jammed. So, if there are loops inside if
|
||||
// inst's, bodies of those loops will not be jammed.
|
||||
// op's, bodies of those loops will not be jammed.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "mlir/Transforms/Passes.h"
|
||||
|
||||
|
@ -96,7 +96,7 @@ void LoopUnrollAndJam::runOnFunction() {
|
|||
runOnAffineForOp(forOp);
|
||||
}
|
||||
|
||||
/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is
|
||||
/// Unroll and jam a 'affine.for' op. Default unroll jam factor is
|
||||
/// kDefaultUnrollJamFactor. Return failure if nothing was done.
|
||||
LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
|
||||
// Unroll and jam by the factor that was passed if any.
|
||||
|
@ -123,16 +123,16 @@ LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp,
|
|||
/// Unrolls and jams this loop by the specified factor.
|
||||
LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
|
||||
uint64_t unrollJamFactor) {
|
||||
// Gathers all maximal sub-blocks of instructions that do not themselves
|
||||
// include a for inst (a instruction could have a descendant for inst though
|
||||
// Gathers all maximal sub-blocks of operations that do not themselves
|
||||
// include a for op (a operation could have a descendant for op though
|
||||
// in its tree). Ignore the block terminators.
|
||||
struct JamBlockGatherer {
|
||||
// Store iterators to the first and last inst of each sub-block found.
|
||||
// Store iterators to the first and last op of each sub-block found.
|
||||
std::vector<std::pair<Block::iterator, Block::iterator>> subBlocks;
|
||||
|
||||
// This is a linear time walk.
|
||||
void walk(Instruction *inst) {
|
||||
for (auto ®ion : inst->getRegions())
|
||||
void walk(Operation *op) {
|
||||
for (auto ®ion : op->getRegions())
|
||||
for (auto &block : region)
|
||||
walk(block);
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
using namespace mlir;
|
||||
|
||||
namespace {
|
||||
// Visit affine expressions recursively and build the sequence of instructions
|
||||
// Visit affine expressions recursively and build the sequence of operations
|
||||
// that correspond to it. Visitation functions return an Value of the
|
||||
// expression subtree they visited or `nullptr` on error.
|
||||
class AffineApplyExpander
|
||||
|
@ -102,7 +102,7 @@ public:
|
|||
// Floor division operation (rounds towards negative infinity).
|
||||
//
|
||||
// For positive divisors, it can be implemented without branching and with a
|
||||
// single division instruction as
|
||||
// single division operation as
|
||||
//
|
||||
// a floordiv b =
|
||||
// let negative = a < 0 in
|
||||
|
@ -144,7 +144,7 @@ public:
|
|||
// Ceiling division operation (rounds towards positive infinity).
|
||||
//
|
||||
// For positive divisors, it can be implemented without branching and with a
|
||||
// single division instruction as
|
||||
// single division operation as
|
||||
//
|
||||
// a ceildiv b =
|
||||
// let negative = a <= 0 in
|
||||
|
@ -213,7 +213,7 @@ private:
|
|||
};
|
||||
} // namespace
|
||||
|
||||
// Create a sequence of instructions that implement the `expr` applied to the
|
||||
// Create a sequence of operations that implement the `expr` applied to the
|
||||
// given dimension and symbol values.
|
||||
static mlir::Value *expandAffineExpr(FuncBuilder *builder, Location loc,
|
||||
AffineExpr expr,
|
||||
|
@ -222,7 +222,7 @@ static mlir::Value *expandAffineExpr(FuncBuilder *builder, Location loc,
|
|||
return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr);
|
||||
}
|
||||
|
||||
// Create a sequence of instructions that implement the `affineMap` applied to
|
||||
// Create a sequence of operations that implement the `affineMap` applied to
|
||||
// the given `operands` (as it it were an AffineApplyOp).
|
||||
Optional<SmallVector<Value *, 8>> static expandAffineMap(
|
||||
FuncBuilder *builder, Location loc, AffineMap affineMap,
|
||||
|
@ -395,16 +395,16 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Convert an "if" instruction into a flow of basic blocks.
|
||||
// Convert an "if" operation into a flow of basic blocks.
|
||||
//
|
||||
// Create an SESE region for the if instruction (including its "then" and
|
||||
// optional "else" instruction blocks) and append it to the end of the current
|
||||
// Create an SESE region for the if operation (including its "then" and
|
||||
// optional "else" operation blocks) and append it to the end of the current
|
||||
// region. The conditional region consists of a sequence of condition-checking
|
||||
// blocks that implement the short-circuit scheme, followed by a "then" SESE
|
||||
// region and an "else" SESE region, and the continuation block that
|
||||
// post-dominates all blocks of the "if" instruction. The flow of blocks that
|
||||
// post-dominates all blocks of the "if" operation. The flow of blocks that
|
||||
// correspond to the "then" and "else" clauses are constructed recursively,
|
||||
// enabling easy nesting of "if" instructions and if-then-else-if chains.
|
||||
// enabling easy nesting of "if" operations and if-then-else-if chains.
|
||||
//
|
||||
// +--------------------------------+
|
||||
// | <code before the AffineIfOp> |
|
||||
|
@ -465,12 +465,12 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
|
|||
auto *continueBlock = condBlock->splitBlock(ifInst);
|
||||
|
||||
// Create a block for the 'then' code, inserting it between the cond and
|
||||
// continue blocks. Move the instructions over from the AffineIfOp and add a
|
||||
// continue blocks. Move the operations over from the AffineIfOp and add a
|
||||
// branch to the continuation point.
|
||||
Block *thenBlock = new Block();
|
||||
thenBlock->insertBefore(continueBlock);
|
||||
|
||||
// If the 'then' block is not empty, then splice the instructions except for
|
||||
// If the 'then' block is not empty, then splice the operations except for
|
||||
// the terminator.
|
||||
auto &oldThenBlocks = ifOp.getThenBlocks();
|
||||
if (!oldThenBlocks.empty()) {
|
||||
|
@ -570,7 +570,7 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
|
|||
}
|
||||
|
||||
// Convert an "affine.apply" operation into a sequence of arithmetic
|
||||
// instructions using the StandardOps dialect. Return true on error.
|
||||
// operations using the StandardOps dialect. Return true on error.
|
||||
bool LowerAffinePass::lowerAffineApply(AffineApplyOp op) {
|
||||
FuncBuilder builder(op.getOperation());
|
||||
auto maybeExpandedMap =
|
||||
|
@ -590,12 +590,12 @@ bool LowerAffinePass::lowerAffineApply(AffineApplyOp op) {
|
|||
|
||||
// Entry point of the function convertor.
|
||||
//
|
||||
// Conversion is performed by recursively visiting instructions of a Function.
|
||||
// Conversion is performed by recursively visiting operations of a Function.
|
||||
// It reasons in terms of single-entry single-exit (SESE) regions that are not
|
||||
// materialized in the code. Instead, the pointer to the last block of the
|
||||
// region is maintained throughout the conversion as the insertion point of the
|
||||
// IR builder since we never change the first block after its creation. "Block"
|
||||
// instructions such as loops and branches create new SESE regions for their
|
||||
// operations such as loops and branches create new SESE regions for their
|
||||
// bodies, and surround them with additional basic blocks for the control flow.
|
||||
// Individual operations are simply appended to the end of the last basic block
|
||||
// of the current region. The SESE invariant allows us to easily handle nested
|
||||
|
@ -607,32 +607,32 @@ bool LowerAffinePass::lowerAffineApply(AffineApplyOp op) {
|
|||
// corresponding Value that has been defined previously. The value flow
|
||||
// starts with function arguments converted to basic block arguments.
|
||||
void LowerAffinePass::runOnFunction() {
|
||||
SmallVector<Instruction *, 8> instsToRewrite;
|
||||
SmallVector<Operation *, 8> instsToRewrite;
|
||||
|
||||
// Collect all the For instructions as well as AffineIfOps and AffineApplyOps.
|
||||
// Collect all the For operations as well as AffineIfOps and AffineApplyOps.
|
||||
// We do this as a prepass to avoid invalidating the walker with our rewrite.
|
||||
getFunction().walk([&](Instruction *inst) {
|
||||
if (inst->isa<AffineApplyOp>() || inst->isa<AffineForOp>() ||
|
||||
inst->isa<AffineIfOp>())
|
||||
instsToRewrite.push_back(inst);
|
||||
getFunction().walk([&](Operation *op) {
|
||||
if (op->isa<AffineApplyOp>() || op->isa<AffineForOp>() ||
|
||||
op->isa<AffineIfOp>())
|
||||
instsToRewrite.push_back(op);
|
||||
});
|
||||
|
||||
// Rewrite all of the ifs and fors. We walked the instructions in preorder,
|
||||
// Rewrite all of the ifs and fors. We walked the operations in preorder,
|
||||
// so we know that we will rewrite them in the same order.
|
||||
for (auto *inst : instsToRewrite) {
|
||||
if (auto ifOp = inst->dyn_cast<AffineIfOp>()) {
|
||||
for (auto *op : instsToRewrite) {
|
||||
if (auto ifOp = op->dyn_cast<AffineIfOp>()) {
|
||||
if (lowerAffineIf(ifOp))
|
||||
return signalPassFailure();
|
||||
} else if (auto forOp = inst->dyn_cast<AffineForOp>()) {
|
||||
} else if (auto forOp = op->dyn_cast<AffineForOp>()) {
|
||||
if (lowerAffineFor(forOp))
|
||||
return signalPassFailure();
|
||||
} else if (lowerAffineApply(inst->cast<AffineApplyOp>())) {
|
||||
} else if (lowerAffineApply(op->cast<AffineApplyOp>())) {
|
||||
return signalPassFailure();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lowers If and For instructions within a function into their lower level CFG
|
||||
/// Lowers If and For operations within a function into their lower level CFG
|
||||
/// equivalent blocks.
|
||||
FunctionPassBase *mlir::createLowerAffinePass() {
|
||||
return new LowerAffinePass();
|
||||
|
@ -640,4 +640,4 @@ FunctionPassBase *mlir::createLowerAffinePass() {
|
|||
|
||||
static PassRegistration<LowerAffinePass>
|
||||
pass("lower-affine",
|
||||
"Lower If, For, AffineApply instructions to primitive equivalents");
|
||||
"Lower If, For, AffineApply operations to primitive equivalents");
|
||||
|
|
|
@ -356,12 +356,12 @@ public:
|
|||
explicit VectorTransferExpander(MLIRContext *context)
|
||||
: MLLoweringPattern(VectorTransferOpTy::getOperationName(), 1, context) {}
|
||||
|
||||
PatternMatchResult match(Instruction *op) const override {
|
||||
PatternMatchResult match(Operation *op) const override {
|
||||
if (m_Op<VectorTransferOpTy>().match(op))
|
||||
return matchSuccess();
|
||||
return matchFailure();
|
||||
}
|
||||
void rewriteOpInst(Instruction *op, MLFuncGlobalLoweringState *funcWiseState,
|
||||
void rewriteOpInst(Operation *op, MLFuncGlobalLoweringState *funcWiseState,
|
||||
std::unique_ptr<PatternState> opState,
|
||||
MLFuncLoweringRewriter *rewriter) const override {
|
||||
VectorTransferRewriter<VectorTransferOpTy>(
|
||||
|
|
|
@ -55,7 +55,7 @@
|
|||
/// to the pass. This pass is thus a partial lowering that opens the "greybox"
|
||||
/// that is the super-vector abstraction. In particular, this pass can turn the
|
||||
/// vector_transfer_read and vector_transfer_write ops in either:
|
||||
/// 1. a loop nest with either scalar and vector load/store instructions; or
|
||||
/// 1. a loop nest with either scalar and vector load/store operations; or
|
||||
/// 2. a loop-nest with DmaStartOp / DmaWaitOp; or
|
||||
/// 3. a pre-existing blackbox library call that can be written manually or
|
||||
/// synthesized using search and superoptimization.
|
||||
|
@ -239,7 +239,7 @@ static SmallVector<unsigned, 8> delinearize(unsigned linearIndex,
|
|||
return res;
|
||||
}
|
||||
|
||||
static Instruction *instantiate(FuncBuilder *b, Instruction *opInst,
|
||||
static Operation *instantiate(FuncBuilder *b, Operation *opInst,
|
||||
VectorType hwVectorType,
|
||||
DenseMap<Value *, Value *> *substitutionsMap);
|
||||
|
||||
|
@ -259,9 +259,8 @@ static Value *substitute(Value *v, VectorType hwVectorType,
|
|||
auto *opInst = v->getDefiningOp();
|
||||
if (opInst->isa<ConstantOp>()) {
|
||||
FuncBuilder b(opInst);
|
||||
auto *inst = instantiate(&b, opInst, hwVectorType, substitutionsMap);
|
||||
auto res =
|
||||
substitutionsMap->insert(std::make_pair(v, inst->getResult(0)));
|
||||
auto *op = instantiate(&b, opInst, hwVectorType, substitutionsMap);
|
||||
auto res = substitutionsMap->insert(std::make_pair(v, op->getResult(0)));
|
||||
assert(res.second && "Insertion failed");
|
||||
return res.first->second;
|
||||
}
|
||||
|
@ -384,7 +383,7 @@ reindexAffineIndices(FuncBuilder *b, VectorType hwVectorType,
|
|||
/// - constant splat is replaced by constant splat of `hwVectorType`.
|
||||
/// TODO(ntv): add more substitutions on a per-need basis.
|
||||
static SmallVector<NamedAttribute, 1>
|
||||
materializeAttributes(Instruction *opInst, VectorType hwVectorType) {
|
||||
materializeAttributes(Operation *opInst, VectorType hwVectorType) {
|
||||
SmallVector<NamedAttribute, 1> res;
|
||||
for (auto a : opInst->getAttrs()) {
|
||||
if (auto splat = a.second.dyn_cast<SplatElementsAttr>()) {
|
||||
|
@ -404,7 +403,7 @@ materializeAttributes(Instruction *opInst, VectorType hwVectorType) {
|
|||
/// substitutionsMap.
|
||||
///
|
||||
/// If the underlying substitution fails, this fails too and returns nullptr.
|
||||
static Instruction *instantiate(FuncBuilder *b, Instruction *opInst,
|
||||
static Operation *instantiate(FuncBuilder *b, Operation *opInst,
|
||||
VectorType hwVectorType,
|
||||
DenseMap<Value *, Value *> *substitutionsMap) {
|
||||
assert(!opInst->isa<VectorTransferReadOp>() &&
|
||||
|
@ -481,7 +480,7 @@ static AffineMap projectedPermutationMap(VectorTransferOpTy transfer,
|
|||
/// `hwVectorType` int the covering of the super-vector type. For a more
|
||||
/// detailed description of the problem, see the description of
|
||||
/// reindexAffineIndices.
|
||||
static Instruction *instantiate(FuncBuilder *b, VectorTransferReadOp read,
|
||||
static Operation *instantiate(FuncBuilder *b, VectorTransferReadOp read,
|
||||
VectorType hwVectorType,
|
||||
ArrayRef<unsigned> hwVectorInstance,
|
||||
DenseMap<Value *, Value *> *substitutionsMap) {
|
||||
|
@ -505,7 +504,7 @@ static Instruction *instantiate(FuncBuilder *b, VectorTransferReadOp read,
|
|||
/// `hwVectorType` int the covering of th3e super-vector type. For a more
|
||||
/// detailed description of the problem, see the description of
|
||||
/// reindexAffineIndices.
|
||||
static Instruction *instantiate(FuncBuilder *b, VectorTransferWriteOp write,
|
||||
static Operation *instantiate(FuncBuilder *b, VectorTransferWriteOp write,
|
||||
VectorType hwVectorType,
|
||||
ArrayRef<unsigned> hwVectorInstance,
|
||||
DenseMap<Value *, Value *> *substitutionsMap) {
|
||||
|
@ -521,11 +520,11 @@ static Instruction *instantiate(FuncBuilder *b, VectorTransferWriteOp write,
|
|||
return cloned.getOperation();
|
||||
}
|
||||
|
||||
/// Returns `true` if inst instance is properly cloned and inserted, false
|
||||
/// Returns `true` if op instance is properly cloned and inserted, false
|
||||
/// otherwise.
|
||||
/// The multi-dimensional `hwVectorInstance` belongs to the shapeRatio of
|
||||
/// super-vector type to hw vector type.
|
||||
/// A cloned instance of `inst` is formed as follows:
|
||||
/// A cloned instance of `op` is formed as follows:
|
||||
/// 1. vector_transfer_read: the return `superVectorType` is replaced by
|
||||
/// `hwVectorType`. Additionally, affine indices are reindexed with
|
||||
/// `reindexAffineIndices` using `hwVectorInstance` and vector type
|
||||
|
@ -542,26 +541,26 @@ static Instruction *instantiate(FuncBuilder *b, VectorTransferWriteOp write,
|
|||
/// possible.
|
||||
///
|
||||
/// Returns true on failure.
|
||||
static bool instantiateMaterialization(Instruction *inst,
|
||||
static bool instantiateMaterialization(Operation *op,
|
||||
MaterializationState *state) {
|
||||
LLVM_DEBUG(dbgs() << "\ninstantiate: " << *inst);
|
||||
LLVM_DEBUG(dbgs() << "\ninstantiate: " << *op);
|
||||
|
||||
// Create a builder here for unroll-and-jam effects.
|
||||
FuncBuilder b(inst);
|
||||
FuncBuilder b(op);
|
||||
// AffineApplyOp are ignored: instantiating the proper vector op will take
|
||||
// care of AffineApplyOps by composing them properly.
|
||||
if (inst->isa<AffineApplyOp>()) {
|
||||
if (op->isa<AffineApplyOp>()) {
|
||||
return false;
|
||||
}
|
||||
if (inst->getNumRegions() != 0)
|
||||
return inst->emitError("NYI path Op with region");
|
||||
if (op->getNumRegions() != 0)
|
||||
return op->emitError("NYI path Op with region");
|
||||
|
||||
if (auto write = inst->dyn_cast<VectorTransferWriteOp>()) {
|
||||
if (auto write = op->dyn_cast<VectorTransferWriteOp>()) {
|
||||
auto *clone = instantiate(&b, write, state->hwVectorType,
|
||||
state->hwVectorInstance, state->substitutionsMap);
|
||||
return clone == nullptr;
|
||||
}
|
||||
if (auto read = inst->dyn_cast<VectorTransferReadOp>()) {
|
||||
if (auto read = op->dyn_cast<VectorTransferReadOp>()) {
|
||||
auto *clone = instantiate(&b, read, state->hwVectorType,
|
||||
state->hwVectorInstance, state->substitutionsMap);
|
||||
if (!clone) {
|
||||
|
@ -574,19 +573,19 @@ static bool instantiateMaterialization(Instruction *inst,
|
|||
// The only op with 0 results reaching this point must, by construction, be
|
||||
// VectorTransferWriteOps and have been caught above. Ops with >= 2 results
|
||||
// are not yet supported. So just support 1 result.
|
||||
if (inst->getNumResults() != 1) {
|
||||
return inst->emitError("NYI: ops with != 1 results");
|
||||
if (op->getNumResults() != 1) {
|
||||
return op->emitError("NYI: ops with != 1 results");
|
||||
}
|
||||
if (inst->getResult(0)->getType() != state->superVectorType) {
|
||||
return inst->emitError("Op does not return a supervector.");
|
||||
if (op->getResult(0)->getType() != state->superVectorType) {
|
||||
return op->emitError("Op does not return a supervector.");
|
||||
}
|
||||
auto *clone =
|
||||
instantiate(&b, inst, state->hwVectorType, state->substitutionsMap);
|
||||
instantiate(&b, op, state->hwVectorType, state->substitutionsMap);
|
||||
if (!clone) {
|
||||
return true;
|
||||
}
|
||||
state->substitutionsMap->insert(
|
||||
std::make_pair(inst->getResult(0), clone->getResult(0)));
|
||||
std::make_pair(op->getResult(0), clone->getResult(0)));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -612,7 +611,7 @@ static bool instantiateMaterialization(Instruction *inst,
|
|||
/// TODO(ntv): full loops + materialized allocs.
|
||||
/// TODO(ntv): partial unrolling + materialized allocs.
|
||||
static bool emitSlice(MaterializationState *state,
|
||||
SetVector<Instruction *> *slice) {
|
||||
SetVector<Operation *> *slice) {
|
||||
auto ratio = shapeRatio(state->superVectorType, state->hwVectorType);
|
||||
assert(ratio.hasValue() &&
|
||||
"ratio of super-vector to HW-vector shape is not integral");
|
||||
|
@ -627,10 +626,10 @@ static bool emitSlice(MaterializationState *state,
|
|||
DenseMap<Value *, Value *> substitutionMap;
|
||||
scopedState.substitutionsMap = &substitutionMap;
|
||||
// slice are topologically sorted, we can just clone them in order.
|
||||
for (auto *inst : *slice) {
|
||||
auto fail = instantiateMaterialization(inst, &scopedState);
|
||||
for (auto *op : *slice) {
|
||||
auto fail = instantiateMaterialization(op, &scopedState);
|
||||
if (fail) {
|
||||
inst->emitError("Unhandled super-vector materialization failure");
|
||||
op->emitError("Unhandled super-vector materialization failure");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -653,7 +652,7 @@ static bool emitSlice(MaterializationState *state,
|
|||
/// Materializes super-vector types into concrete hw vector types as follows:
|
||||
/// 1. start from super-vector terminators (current vector_transfer_write
|
||||
/// ops);
|
||||
/// 2. collect all the instructions that can be reached by transitive use-defs
|
||||
/// 2. collect all the operations that can be reached by transitive use-defs
|
||||
/// chains;
|
||||
/// 3. get the superVectorType for this particular terminator and the
|
||||
/// corresponding hardware vector type (for now limited to F32)
|
||||
|
@ -664,14 +663,13 @@ static bool emitSlice(MaterializationState *state,
|
|||
/// Notes
|
||||
/// =====
|
||||
/// The `slice` is sorted in topological order by construction.
|
||||
/// Additionally, this set is limited to instructions in the same lexical scope
|
||||
/// Additionally, this set is limited to operations in the same lexical scope
|
||||
/// because we currently disallow vectorization of defs that come from another
|
||||
/// scope.
|
||||
/// TODO(ntv): please document return value.
|
||||
static bool materialize(Function *f,
|
||||
const SetVector<Instruction *> &terminators,
|
||||
static bool materialize(Function *f, const SetVector<Operation *> &terminators,
|
||||
MaterializationState *state) {
|
||||
DenseSet<Instruction *> seen;
|
||||
DenseSet<Operation *> seen;
|
||||
DominanceInfo domInfo(f);
|
||||
for (auto *term : terminators) {
|
||||
// Short-circuit test, a given terminator may have been reached by some
|
||||
|
@ -688,15 +686,15 @@ static bool materialize(Function *f,
|
|||
// Note for the justification of this restriction.
|
||||
// TODO(ntv): relax scoping constraints.
|
||||
auto *enclosingScope = term->getParentOp();
|
||||
auto keepIfInSameScope = [enclosingScope, &domInfo](Instruction *inst) {
|
||||
assert(inst && "NULL inst");
|
||||
auto keepIfInSameScope = [enclosingScope, &domInfo](Operation *op) {
|
||||
assert(op && "NULL op");
|
||||
if (!enclosingScope) {
|
||||
// by construction, everyone is always under the top scope (null scope).
|
||||
return true;
|
||||
}
|
||||
return domInfo.properlyDominates(enclosingScope, inst);
|
||||
return domInfo.properlyDominates(enclosingScope, op);
|
||||
};
|
||||
SetVector<Instruction *> slice =
|
||||
SetVector<Operation *> slice =
|
||||
getSlice(term, keepIfInSameScope, keepIfInSameScope);
|
||||
assert(!slice.empty());
|
||||
|
||||
|
@ -749,16 +747,16 @@ void MaterializeVectorsPass::runOnFunction() {
|
|||
|
||||
// Capture terminators; i.e. vector_transfer_write ops involving a strict
|
||||
// super-vector of subVectorType.
|
||||
auto filter = [subVectorType](Instruction &inst) {
|
||||
if (!inst.isa<VectorTransferWriteOp>()) {
|
||||
auto filter = [subVectorType](Operation &op) {
|
||||
if (!op.isa<VectorTransferWriteOp>()) {
|
||||
return false;
|
||||
}
|
||||
return matcher::operatesOnSuperVectors(inst, subVectorType);
|
||||
return matcher::operatesOnSuperVectors(op, subVectorType);
|
||||
};
|
||||
auto pat = Op(filter);
|
||||
SmallVector<NestedMatch, 8> matches;
|
||||
pat.match(f, &matches);
|
||||
SetVector<Instruction *> terminators;
|
||||
SetVector<Operation *> terminators;
|
||||
for (auto m : matches) {
|
||||
terminators.insert(m.getMatchedOperation());
|
||||
}
|
||||
|
|
|
@ -54,8 +54,8 @@ namespace {
|
|||
// iteration of the innermost loop enclosing both the store op and the load op.
|
||||
//
|
||||
// (* A dependence being satisfied at a block: a dependence that is satisfied by
|
||||
// virtue of the destination instruction appearing textually / lexically after
|
||||
// the source instruction within the body of a 'affine.for' instruction; thus, a
|
||||
// virtue of the destination operation appearing textually / lexically after
|
||||
// the source operation within the body of a 'affine.for' operation; thus, a
|
||||
// dependence is always either satisfied by a loop or by a block).
|
||||
//
|
||||
// The above conditions are simple to check, sufficient, and powerful for most
|
||||
|
@ -77,7 +77,7 @@ struct MemRefDataFlowOpt : public FunctionPass<MemRefDataFlowOpt> {
|
|||
// A list of memref's that are potentially dead / could be eliminated.
|
||||
SmallPtrSet<Value *, 4> memrefsToErase;
|
||||
// Load op's whose results were replaced by those forwarded from stores.
|
||||
std::vector<Instruction *> loadOpsToErase;
|
||||
std::vector<Operation *> loadOpsToErase;
|
||||
|
||||
DominanceInfo *domInfo = nullptr;
|
||||
PostDominanceInfo *postDomInfo = nullptr;
|
||||
|
@ -94,13 +94,13 @@ FunctionPassBase *mlir::createMemRefDataFlowOptPass() {
|
|||
// This is a straightforward implementation not optimized for speed. Optimize
|
||||
// this in the future if needed.
|
||||
void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) {
|
||||
Instruction *lastWriteStoreOp = nullptr;
|
||||
Instruction *loadOpInst = loadOp.getOperation();
|
||||
Operation *lastWriteStoreOp = nullptr;
|
||||
Operation *loadOpInst = loadOp.getOperation();
|
||||
|
||||
// First pass over the use list to get minimum number of surrounding
|
||||
// loops common between the load op and the store op, with min taken across
|
||||
// all store ops.
|
||||
SmallVector<Instruction *, 8> storeOps;
|
||||
SmallVector<Operation *, 8> storeOps;
|
||||
unsigned minSurroundingLoops = getNestingDepth(*loadOpInst);
|
||||
for (InstOperand &use : loadOp.getMemRef()->getUses()) {
|
||||
auto storeOp = use.getOwner()->dyn_cast<StoreOp>();
|
||||
|
@ -119,11 +119,11 @@ void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) {
|
|||
// and loadOp.
|
||||
// The list of store op candidates for forwarding - need to satisfy the
|
||||
// conditions listed at the top.
|
||||
SmallVector<Instruction *, 8> fwdingCandidates;
|
||||
SmallVector<Operation *, 8> fwdingCandidates;
|
||||
// Store ops that have a dependence into the load (even if they aren't
|
||||
// forwarding candidates). Each forwarding candidate will be checked for a
|
||||
// post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores.
|
||||
SmallVector<Instruction *, 8> depSrcStores;
|
||||
SmallVector<Operation *, 8> depSrcStores;
|
||||
for (auto *storeOpInst : storeOps) {
|
||||
MemRefAccess srcAccess(storeOpInst);
|
||||
MemRefAccess destAccess(loadOpInst);
|
||||
|
@ -186,7 +186,7 @@ void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) {
|
|||
// that postdominates all 'depSrcStores' (if such a store exists) is the
|
||||
// unique store providing the value to the load, i.e., provably the last
|
||||
// writer to that memref loc.
|
||||
if (llvm::all_of(depSrcStores, [&](Instruction *depStore) {
|
||||
if (llvm::all_of(depSrcStores, [&](Operation *depStore) {
|
||||
return postDomInfo->postDominates(storeOpInst, depStore);
|
||||
})) {
|
||||
lastWriteStoreOp = storeOpInst;
|
||||
|
@ -236,9 +236,9 @@ void MemRefDataFlowOpt::runOnFunction() {
|
|||
// to do this as well, but we'll do it here since we collected these anyway.
|
||||
for (auto *memref : memrefsToErase) {
|
||||
// If the memref hasn't been alloc'ed in this function, skip.
|
||||
Instruction *defInst = memref->getDefiningOp();
|
||||
Operation *defInst = memref->getDefiningOp();
|
||||
if (!defInst || !defInst->isa<AllocOp>())
|
||||
// TODO(mlir-team): if the memref was returned by a 'call' instruction, we
|
||||
// TODO(mlir-team): if the memref was returned by a 'call' operation, we
|
||||
// could still erase it if the call had no side-effects.
|
||||
continue;
|
||||
if (std::any_of(memref->use_begin(), memref->use_end(),
|
||||
|
|
|
@ -53,23 +53,23 @@ FunctionPassBase *mlir::createPipelineDataTransferPass() {
|
|||
return new PipelineDataTransfer();
|
||||
}
|
||||
|
||||
// Returns the position of the tag memref operand given a DMA instruction.
|
||||
// Returns the position of the tag memref operand given a DMA operation.
|
||||
// Temporary utility: will be replaced when DmaStart/DmaFinish abstract op's are
|
||||
// added. TODO(b/117228571)
|
||||
static unsigned getTagMemRefPos(Instruction &dmaInst) {
|
||||
static unsigned getTagMemRefPos(Operation &dmaInst) {
|
||||
assert(dmaInst.isa<DmaStartOp>() || dmaInst.isa<DmaWaitOp>());
|
||||
if (dmaInst.isa<DmaStartOp>()) {
|
||||
// Second to last operand.
|
||||
return dmaInst.getNumOperands() - 2;
|
||||
}
|
||||
// First operand for a dma finish instruction.
|
||||
// First operand for a dma finish operation.
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Doubles the buffer of the supplied memref on the specified 'affine.for'
|
||||
/// instruction by adding a leading dimension of size two to the memref.
|
||||
/// operation by adding a leading dimension of size two to the memref.
|
||||
/// Replaces all uses of the old memref by the new one while indexing the newly
|
||||
/// added dimension by the loop IV of the specified 'affine.for' instruction
|
||||
/// added dimension by the loop IV of the specified 'affine.for' operation
|
||||
/// modulo 2. Returns false if such a replacement cannot be performed.
|
||||
static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
|
||||
auto *forBody = forOp.getBody();
|
||||
|
@ -104,7 +104,7 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
|
|||
dynamicDimCount++));
|
||||
}
|
||||
|
||||
// Create and place the alloc right before the 'affine.for' instruction.
|
||||
// Create and place the alloc right before the 'affine.for' operation.
|
||||
Value *newMemRef =
|
||||
bOuter.create<AllocOp>(forInst->getLoc(), newMemRefType, allocOperands);
|
||||
|
||||
|
@ -139,7 +139,7 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
|
|||
/// Returns success if the IR is in a valid state.
|
||||
void PipelineDataTransfer::runOnFunction() {
|
||||
// Do a post order walk so that inner loop DMAs are processed first. This is
|
||||
// necessary since 'affine.for' instructions nested within would otherwise
|
||||
// necessary since 'affine.for' operations nested within would otherwise
|
||||
// become invalid (erased) when the outer loop is pipelined (the pipelined one
|
||||
// gets deleted and replaced by a prologue, a new steady-state loop and an
|
||||
// epilogue).
|
||||
|
@ -173,27 +173,27 @@ static bool checkTagMatch(DmaStartOp startOp, DmaWaitOp waitOp) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// Identify matching DMA start/finish instructions to overlap computation with.
|
||||
// Identify matching DMA start/finish operations to overlap computation with.
|
||||
static void findMatchingStartFinishInsts(
|
||||
AffineForOp forOp,
|
||||
SmallVectorImpl<std::pair<Instruction *, Instruction *>> &startWaitPairs) {
|
||||
SmallVectorImpl<std::pair<Operation *, Operation *>> &startWaitPairs) {
|
||||
|
||||
// Collect outgoing DMA instructions - needed to check for dependences below.
|
||||
// Collect outgoing DMA operations - needed to check for dependences below.
|
||||
SmallVector<DmaStartOp, 4> outgoingDmaOps;
|
||||
for (auto &inst : *forOp.getBody()) {
|
||||
auto dmaStartOp = inst.dyn_cast<DmaStartOp>();
|
||||
for (auto &op : *forOp.getBody()) {
|
||||
auto dmaStartOp = op.dyn_cast<DmaStartOp>();
|
||||
if (dmaStartOp && dmaStartOp.isSrcMemorySpaceFaster())
|
||||
outgoingDmaOps.push_back(dmaStartOp);
|
||||
}
|
||||
|
||||
SmallVector<Instruction *, 4> dmaStartInsts, dmaFinishInsts;
|
||||
for (auto &inst : *forOp.getBody()) {
|
||||
// Collect DMA finish instructions.
|
||||
if (inst.isa<DmaWaitOp>()) {
|
||||
dmaFinishInsts.push_back(&inst);
|
||||
SmallVector<Operation *, 4> dmaStartInsts, dmaFinishInsts;
|
||||
for (auto &op : *forOp.getBody()) {
|
||||
// Collect DMA finish operations.
|
||||
if (op.isa<DmaWaitOp>()) {
|
||||
dmaFinishInsts.push_back(&op);
|
||||
continue;
|
||||
}
|
||||
auto dmaStartOp = inst.dyn_cast<DmaStartOp>();
|
||||
auto dmaStartOp = op.dyn_cast<DmaStartOp>();
|
||||
if (!dmaStartOp)
|
||||
continue;
|
||||
|
||||
|
@ -228,10 +228,10 @@ static void findMatchingStartFinishInsts(
|
|||
}
|
||||
}
|
||||
if (!escapingUses)
|
||||
dmaStartInsts.push_back(&inst);
|
||||
dmaStartInsts.push_back(&op);
|
||||
}
|
||||
|
||||
// For each start instruction, we look for a matching finish instruction.
|
||||
// For each start operation, we look for a matching finish operation.
|
||||
for (auto *dmaStartInst : dmaStartInsts) {
|
||||
for (auto *dmaFinishInst : dmaFinishInsts) {
|
||||
if (checkTagMatch(dmaStartInst->cast<DmaStartOp>(),
|
||||
|
@ -253,7 +253,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
|
|||
return;
|
||||
}
|
||||
|
||||
SmallVector<std::pair<Instruction *, Instruction *>, 4> startWaitPairs;
|
||||
SmallVector<std::pair<Operation *, Operation *>, 4> startWaitPairs;
|
||||
findMatchingStartFinishInsts(forOp, startWaitPairs);
|
||||
|
||||
if (startWaitPairs.empty()) {
|
||||
|
@ -263,7 +263,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
|
|||
|
||||
// Double the buffers for the higher memory space memref's.
|
||||
// Identify memref's to replace by scanning through all DMA start
|
||||
// instructions. A DMA start instruction has two memref's - the one from the
|
||||
// operations. A DMA start operation has two memref's - the one from the
|
||||
// higher level of memory hierarchy is the one to double buffer.
|
||||
// TODO(bondhugula): check whether double-buffering is even necessary.
|
||||
// TODO(bondhugula): make this work with different layouts: assuming here that
|
||||
|
@ -320,13 +320,13 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
|
|||
startWaitPairs.clear();
|
||||
findMatchingStartFinishInsts(forOp, startWaitPairs);
|
||||
|
||||
// Store shift for instruction for later lookup for AffineApplyOp's.
|
||||
DenseMap<Instruction *, unsigned> instShiftMap;
|
||||
// Store shift for operation for later lookup for AffineApplyOp's.
|
||||
DenseMap<Operation *, unsigned> instShiftMap;
|
||||
for (auto &pair : startWaitPairs) {
|
||||
auto *dmaStartInst = pair.first;
|
||||
assert(dmaStartInst->isa<DmaStartOp>());
|
||||
instShiftMap[dmaStartInst] = 0;
|
||||
// Set shifts for DMA start inst's affine operand computation slices to 0.
|
||||
// Set shifts for DMA start op's affine operand computation slices to 0.
|
||||
SmallVector<AffineApplyOp, 4> sliceOps;
|
||||
mlir::createAffineComputationSlice(dmaStartInst, &sliceOps);
|
||||
if (!sliceOps.empty()) {
|
||||
|
@ -336,32 +336,32 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
|
|||
} else {
|
||||
// If a slice wasn't created, the reachable affine.apply op's from its
|
||||
// operands are the ones that go with it.
|
||||
SmallVector<Instruction *, 4> affineApplyInsts;
|
||||
SmallVector<Operation *, 4> affineApplyInsts;
|
||||
SmallVector<Value *, 4> operands(dmaStartInst->getOperands());
|
||||
getReachableAffineApplyOps(operands, affineApplyInsts);
|
||||
for (auto *inst : affineApplyInsts) {
|
||||
instShiftMap[inst] = 0;
|
||||
for (auto *op : affineApplyInsts) {
|
||||
instShiftMap[op] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Everything else (including compute ops and dma finish) are shifted by one.
|
||||
for (auto &inst : *forOp.getBody()) {
|
||||
if (instShiftMap.find(&inst) == instShiftMap.end()) {
|
||||
instShiftMap[&inst] = 1;
|
||||
for (auto &op : *forOp.getBody()) {
|
||||
if (instShiftMap.find(&op) == instShiftMap.end()) {
|
||||
instShiftMap[&op] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Get shifts stored in map.
|
||||
std::vector<uint64_t> shifts(forOp.getBody()->getOperations().size());
|
||||
unsigned s = 0;
|
||||
for (auto &inst : *forOp.getBody()) {
|
||||
assert(instShiftMap.find(&inst) != instShiftMap.end());
|
||||
shifts[s++] = instShiftMap[&inst];
|
||||
for (auto &op : *forOp.getBody()) {
|
||||
assert(instShiftMap.find(&op) != instShiftMap.end());
|
||||
shifts[s++] = instShiftMap[&op];
|
||||
|
||||
// Tagging instructions with shifts for debugging purposes.
|
||||
// Tagging operations with shifts for debugging purposes.
|
||||
LLVM_DEBUG({
|
||||
FuncBuilder b(&inst);
|
||||
inst.setAttr("shift", b.getI64IntegerAttr(shifts[s - 1]));
|
||||
FuncBuilder b(&op);
|
||||
op.setAttr("shift", b.getI64IntegerAttr(shifts[s - 1]));
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -372,7 +372,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
|
|||
}
|
||||
|
||||
if (failed(instBodySkew(forOp, shifts))) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "inst body skewing failed - unexpected\n";);
|
||||
LLVM_DEBUG(llvm::dbgs() << "op body skewing failed - unexpected\n";);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ using namespace mlir;
|
|||
|
||||
namespace {
|
||||
|
||||
/// Simplifies all affine expressions appearing in the operation instructions of
|
||||
/// Simplifies all affine expressions appearing in the operations of
|
||||
/// the Function. This is mainly to test the simplifyAffineExpr method.
|
||||
/// TODO(someone): This should just be defined as a canonicalization pattern
|
||||
/// on AffineMap and driven from the existing canonicalization pass.
|
||||
|
@ -41,9 +41,9 @@ struct SimplifyAffineStructures
|
|||
void runOnFunction() override;
|
||||
|
||||
/// Utility to simplify an affine attribute and update its entry in the parent
|
||||
/// instruction if necessary.
|
||||
/// operation if necessary.
|
||||
template <typename AttributeT>
|
||||
void simplifyAndUpdateAttribute(Instruction *inst, Identifier name,
|
||||
void simplifyAndUpdateAttribute(Operation *op, Identifier name,
|
||||
AttributeT attr) {
|
||||
auto &simplified = simplifiedAttributes[attr];
|
||||
if (simplified == attr)
|
||||
|
@ -62,7 +62,7 @@ struct SimplifyAffineStructures
|
|||
}
|
||||
|
||||
// Simplification was successful, so update the attribute.
|
||||
inst->setAttr(name, simplified);
|
||||
op->setAttr(name, simplified);
|
||||
}
|
||||
|
||||
/// Performs basic integer set simplifications. Checks if it's empty, and
|
||||
|
@ -93,7 +93,7 @@ FunctionPassBase *mlir::createSimplifyAffineStructuresPass() {
|
|||
|
||||
void SimplifyAffineStructures::runOnFunction() {
|
||||
simplifiedAttributes.clear();
|
||||
getFunction().walk([&](Instruction *opInst) {
|
||||
getFunction().walk([&](Operation *opInst) {
|
||||
for (auto attr : opInst->getAttrs()) {
|
||||
if (auto mapAttr = attr.second.dyn_cast<AffineMapAttr>())
|
||||
simplifyAndUpdateAttribute(opInst, attr.first, mapAttr);
|
||||
|
|
|
@ -32,9 +32,9 @@ void StripDebugInfo::runOnFunction() {
|
|||
Function &func = getFunction();
|
||||
UnknownLoc unknownLoc = UnknownLoc::get(&getContext());
|
||||
|
||||
// Strip the debug info from the function and its instructions.
|
||||
// Strip the debug info from the function and its operations.
|
||||
func.setLoc(unknownLoc);
|
||||
func.walk([&](Instruction *inst) { inst->setLoc(unknownLoc); });
|
||||
func.walk([&](Operation *op) { op->setLoc(unknownLoc); });
|
||||
}
|
||||
|
||||
/// Creates a pass to strip debug information from a function.
|
||||
|
@ -43,4 +43,4 @@ FunctionPassBase *mlir::createStripDebugInfoPass() {
|
|||
}
|
||||
|
||||
static PassRegistration<StripDebugInfo>
|
||||
pass("strip-debuginfo", "Strip debug info from functions and instructions");
|
||||
pass("strip-debuginfo", "Strip debug info from functions and operations");
|
||||
|
|
|
@ -39,13 +39,13 @@ public:
|
|||
worklist.reserve(64);
|
||||
|
||||
// Add all operations to the worklist.
|
||||
fn.walk([&](Instruction *inst) { addToWorklist(inst); });
|
||||
fn.walk([&](Operation *op) { addToWorklist(op); });
|
||||
}
|
||||
|
||||
/// Perform the rewrites.
|
||||
void simplifyFunction();
|
||||
|
||||
void addToWorklist(Instruction *op) {
|
||||
void addToWorklist(Operation *op) {
|
||||
// Check to see if the worklist already contains this op.
|
||||
if (worklistMap.count(op))
|
||||
return;
|
||||
|
@ -54,7 +54,7 @@ public:
|
|||
worklist.push_back(op);
|
||||
}
|
||||
|
||||
Instruction *popFromWorklist() {
|
||||
Operation *popFromWorklist() {
|
||||
auto *op = worklist.back();
|
||||
worklist.pop_back();
|
||||
|
||||
|
@ -66,7 +66,7 @@ public:
|
|||
|
||||
/// If the specified operation is in the worklist, remove it. If not, this is
|
||||
/// a no-op.
|
||||
void removeFromWorklist(Instruction *op) {
|
||||
void removeFromWorklist(Operation *op) {
|
||||
auto it = worklistMap.find(op);
|
||||
if (it != worklistMap.end()) {
|
||||
assert(worklist[it->second] == op && "malformed worklist data structure");
|
||||
|
@ -78,7 +78,7 @@ public:
|
|||
protected:
|
||||
// Implement the hook for creating operations, and make sure that newly
|
||||
// created ops are added to the worklist for processing.
|
||||
Instruction *createOperation(const OperationState &state) override {
|
||||
Operation *createOperation(const OperationState &state) override {
|
||||
auto *result = builder.createOperation(state);
|
||||
addToWorklist(result);
|
||||
return result;
|
||||
|
@ -86,7 +86,7 @@ protected:
|
|||
|
||||
// If an operation is about to be removed, make sure it is not in our
|
||||
// worklist anymore because we'd get dangling references to it.
|
||||
void notifyOperationRemoved(Instruction *op) override {
|
||||
void notifyOperationRemoved(Operation *op) override {
|
||||
addToWorklist(op->getOperands());
|
||||
removeFromWorklist(op);
|
||||
}
|
||||
|
@ -94,7 +94,7 @@ protected:
|
|||
// When the root of a pattern is about to be replaced, it can trigger
|
||||
// simplifications to its users - make sure to add them to the worklist
|
||||
// before the root is changed.
|
||||
void notifyRootReplaced(Instruction *op) override {
|
||||
void notifyRootReplaced(Operation *op) override {
|
||||
for (auto *result : op->getResults())
|
||||
// TODO: Add a result->getUsers() iterator.
|
||||
for (auto &user : result->getUses())
|
||||
|
@ -102,15 +102,15 @@ protected:
|
|||
}
|
||||
|
||||
private:
|
||||
// Look over the provided operands for any defining instructions that should
|
||||
// Look over the provided operands for any defining operations that should
|
||||
// be re-added to the worklist. This function should be called when an
|
||||
// operation is modified or removed, as it may trigger further
|
||||
// simplifications.
|
||||
template <typename Operands> void addToWorklist(Operands &&operands) {
|
||||
for (Value *operand : operands) {
|
||||
// If the use count of this operand is now < 2, we re-add the defining
|
||||
// instruction to the worklist.
|
||||
// TODO(riverriddle) This is based on the fact that zero use instructions
|
||||
// operation to the worklist.
|
||||
// TODO(riverriddle) This is based on the fact that zero use operations
|
||||
// may be deleted, and that single use values often have more
|
||||
// canonicalization opportunities.
|
||||
if (!operand->use_empty() &&
|
||||
|
@ -131,13 +131,13 @@ private:
|
|||
/// need to be revisited, plus their index in the worklist. This allows us to
|
||||
/// efficiently remove operations from the worklist when they are erased from
|
||||
/// the function, even if they aren't the root of a pattern.
|
||||
std::vector<Instruction *> worklist;
|
||||
DenseMap<Instruction *, unsigned> worklistMap;
|
||||
std::vector<Operation *> worklist;
|
||||
DenseMap<Operation *, unsigned> worklistMap;
|
||||
|
||||
/// As part of canonicalization, we move constants to the top of the entry
|
||||
/// block of the current function and de-duplicate them. This keeps track of
|
||||
/// constants we have done this for.
|
||||
DenseMap<std::pair<Attribute, Type>, Instruction *> uniquedConstants;
|
||||
DenseMap<std::pair<Attribute, Type>, Operation *> uniquedConstants;
|
||||
};
|
||||
}; // end anonymous namespace
|
||||
|
||||
|
@ -199,7 +199,7 @@ void GreedyPatternRewriteDriver::simplifyFunction() {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Check to see if any operands to the instruction is constant and whether
|
||||
// Check to see if any operands to the operation is constant and whether
|
||||
// the operation knows how to constant fold itself.
|
||||
operandConstants.assign(op->getNumOperands(), Attribute());
|
||||
for (unsigned i = 0, e = op->getNumOperands(); i != e; ++i)
|
||||
|
|
|
@ -123,10 +123,10 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
|
|||
|
||||
// Replaces all IV uses to its single iteration value.
|
||||
auto *iv = forOp.getInductionVar();
|
||||
Instruction *forInst = forOp.getOperation();
|
||||
Operation *op = forOp.getOperation();
|
||||
if (!iv->use_empty()) {
|
||||
if (forOp.hasConstantLowerBound()) {
|
||||
auto *mlFunc = forInst->getFunction();
|
||||
auto *mlFunc = op->getFunction();
|
||||
FuncBuilder topBuilder(mlFunc);
|
||||
auto constOp = topBuilder.create<ConstantIndexOp>(
|
||||
forOp.getLoc(), forOp.getConstantLowerBound());
|
||||
|
@ -134,28 +134,28 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
|
|||
} else {
|
||||
AffineBound lb = forOp.getLowerBound();
|
||||
SmallVector<Value *, 4> lbOperands(lb.operand_begin(), lb.operand_end());
|
||||
FuncBuilder builder(forInst->getBlock(), Block::iterator(forInst));
|
||||
FuncBuilder builder(op->getBlock(), Block::iterator(op));
|
||||
if (lb.getMap() == builder.getDimIdentityMap()) {
|
||||
// No need of generating an affine.apply.
|
||||
iv->replaceAllUsesWith(lbOperands[0]);
|
||||
} else {
|
||||
auto affineApplyOp = builder.create<AffineApplyOp>(
|
||||
forInst->getLoc(), lb.getMap(), lbOperands);
|
||||
op->getLoc(), lb.getMap(), lbOperands);
|
||||
iv->replaceAllUsesWith(affineApplyOp);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Move the loop body instructions, except for terminator, to the loop's
|
||||
// Move the loop body operations, except for terminator, to the loop's
|
||||
// containing block.
|
||||
auto *block = forInst->getBlock();
|
||||
auto *block = op->getBlock();
|
||||
forOp.getBody()->getOperations().back().erase();
|
||||
block->getOperations().splice(Block::iterator(forInst),
|
||||
block->getOperations().splice(Block::iterator(op),
|
||||
forOp.getBody()->getOperations());
|
||||
forOp.erase();
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Promotes all single iteration for inst's in the Function, i.e., moves
|
||||
/// Promotes all single iteration for op's in the Function, i.e., moves
|
||||
/// their body into the containing Block.
|
||||
void mlir::promoteSingleIterationLoops(Function *f) {
|
||||
// Gathers all innermost loops through a post order pruned walk.
|
||||
|
@ -163,16 +163,16 @@ void mlir::promoteSingleIterationLoops(Function *f) {
|
|||
[](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
|
||||
}
|
||||
|
||||
/// Generates a 'affine.for' inst with the specified lower and upper bounds
|
||||
/// while generating the right IV remappings for the shifted instructions. The
|
||||
/// instruction blocks that go into the loop are specified in instGroupQueue
|
||||
/// Generates a 'affine.for' op with the specified lower and upper bounds
|
||||
/// while generating the right IV remappings for the shifted operations. The
|
||||
/// operation blocks that go into the loop are specified in instGroupQueue
|
||||
/// starting from the specified offset, and in that order; the first element of
|
||||
/// the pair specifies the shift applied to that group of instructions; note
|
||||
/// the pair specifies the shift applied to that group of operations; note
|
||||
/// that the shift is multiplied by the loop step before being applied. Returns
|
||||
/// nullptr if the generated loop simplifies to a single iteration one.
|
||||
static AffineForOp
|
||||
generateLoop(AffineMap lbMap, AffineMap ubMap,
|
||||
const std::vector<std::pair<uint64_t, ArrayRef<Instruction *>>>
|
||||
const std::vector<std::pair<uint64_t, ArrayRef<Operation *>>>
|
||||
&instGroupQueue,
|
||||
unsigned offset, AffineForOp srcForInst, FuncBuilder *b) {
|
||||
SmallVector<Value *, 4> lbOperands(srcForInst.getLowerBoundOperands());
|
||||
|
@ -194,8 +194,8 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
|
|||
it != e; ++it) {
|
||||
uint64_t shift = it->first;
|
||||
auto insts = it->second;
|
||||
// All 'same shift' instructions get added with their operands being
|
||||
// remapped to results of cloned instructions, and their IV used remapped.
|
||||
// All 'same shift' operations get added with their operands being
|
||||
// remapped to results of cloned operations, and their IV used remapped.
|
||||
// Generate the remapping if the shift is not zero: remappedIV = newIV -
|
||||
// shift.
|
||||
if (!srcIV->use_empty() && shift != 0) {
|
||||
|
@ -208,9 +208,9 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
|
|||
} else {
|
||||
operandMap.map(srcIV, loopChunkIV);
|
||||
}
|
||||
for (auto *inst : insts) {
|
||||
if (!inst->isa<AffineTerminatorOp>())
|
||||
bodyBuilder.clone(*inst, operandMap);
|
||||
for (auto *op : insts) {
|
||||
if (!op->isa<AffineTerminatorOp>())
|
||||
bodyBuilder.clone(*op, operandMap);
|
||||
}
|
||||
};
|
||||
if (succeeded(promoteIfSingleIteration(loopChunk)))
|
||||
|
@ -218,17 +218,17 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
|
|||
return loopChunk;
|
||||
}
|
||||
|
||||
/// Skew the instructions in the body of a 'affine.for' instruction with the
|
||||
/// specified instruction-wise shifts. The shifts are with respect to the
|
||||
/// Skew the operations in the body of a 'affine.for' operation with the
|
||||
/// specified operation-wise shifts. The shifts are with respect to the
|
||||
/// original execution order, and are multiplied by the loop 'step' before being
|
||||
/// applied. A shift of zero for each instruction will lead to no change.
|
||||
// The skewing of instructions with respect to one another can be used for
|
||||
/// applied. A shift of zero for each operation will lead to no change.
|
||||
// The skewing of operations with respect to one another can be used for
|
||||
// example to allow overlap of asynchronous operations (such as DMA
|
||||
// communication) with computation, or just relative shifting of instructions
|
||||
// communication) with computation, or just relative shifting of operations
|
||||
// for better register reuse, locality or parallelism. As such, the shifts are
|
||||
// typically expected to be at most of the order of the number of instructions.
|
||||
// typically expected to be at most of the order of the number of operations.
|
||||
// This method should not be used as a substitute for loop distribution/fission.
|
||||
// This method uses an algorithm// in time linear in the number of instructions
|
||||
// This method uses an algorithm// in time linear in the number of operations
|
||||
// in the body of the for loop - (using the 'sweep line' paradigm). This method
|
||||
// asserts preservation of SSA dominance. A check for that as well as that for
|
||||
// memory-based depedence preservation check rests with the users of this
|
||||
|
@ -267,14 +267,14 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
|
|||
return success();
|
||||
}
|
||||
|
||||
// An array of instruction groups sorted by shift amount; each group has all
|
||||
// instructions with the same shift in the order in which they appear in the
|
||||
// body of the 'affine.for' inst.
|
||||
std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1);
|
||||
// An array of operation groups sorted by shift amount; each group has all
|
||||
// operations with the same shift in the order in which they appear in the
|
||||
// body of the 'affine.for' op.
|
||||
std::vector<std::vector<Operation *>> sortedInstGroups(maxShift + 1);
|
||||
unsigned pos = 0;
|
||||
for (auto &inst : *forOp.getBody()) {
|
||||
for (auto &op : *forOp.getBody()) {
|
||||
auto shift = shifts[pos++];
|
||||
sortedInstGroups[shift].push_back(&inst);
|
||||
sortedInstGroups[shift].push_back(&op);
|
||||
}
|
||||
|
||||
// Unless the shifts have a specific pattern (which actually would be the
|
||||
|
@ -287,8 +287,8 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
|
|||
|
||||
// Do a sweep over the sorted shifts while storing open groups in a
|
||||
// vector, and generating loop portions as necessary during the sweep. A block
|
||||
// of instructions is paired with its shift.
|
||||
std::vector<std::pair<uint64_t, ArrayRef<Instruction *>>> instGroupQueue;
|
||||
// of operations is paired with its shift.
|
||||
std::vector<std::pair<uint64_t, ArrayRef<Operation *>>> instGroupQueue;
|
||||
|
||||
auto origLbMap = forOp.getLowerBoundMap();
|
||||
uint64_t lbShift = 0;
|
||||
|
@ -302,14 +302,14 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
|
|||
"Queue expected to be empty when the first block is found");
|
||||
// The interval for which the loop needs to be generated here is:
|
||||
// [lbShift, min(lbShift + tripCount, d)) and the body of the
|
||||
// loop needs to have all instructions in instQueue in that order.
|
||||
// loop needs to have all operations in instQueue in that order.
|
||||
AffineForOp res;
|
||||
if (lbShift + tripCount * step < d * step) {
|
||||
res = generateLoop(
|
||||
b.getShiftedAffineMap(origLbMap, lbShift),
|
||||
b.getShiftedAffineMap(origLbMap, lbShift + tripCount * step),
|
||||
instGroupQueue, 0, forOp, &b);
|
||||
// Entire loop for the queued inst groups generated, empty it.
|
||||
// Entire loop for the queued op groups generated, empty it.
|
||||
instGroupQueue.clear();
|
||||
lbShift += tripCount * step;
|
||||
} else {
|
||||
|
@ -325,11 +325,11 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
|
|||
// Start of first interval.
|
||||
lbShift = d * step;
|
||||
}
|
||||
// Augment the list of instructions that get into the current open interval.
|
||||
// Augment the list of operations that get into the current open interval.
|
||||
instGroupQueue.push_back({d, sortedInstGroups[d]});
|
||||
}
|
||||
|
||||
// Those instructions groups left in the queue now need to be processed (FIFO)
|
||||
// Those operations groups left in the queue now need to be processed (FIFO)
|
||||
// and their loops completed.
|
||||
for (unsigned i = 0, e = instGroupQueue.size(); i < e; ++i) {
|
||||
uint64_t ubShift = (instGroupQueue[i].first + tripCount) * step;
|
||||
|
@ -341,7 +341,7 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
|
|||
prologue = epilogue;
|
||||
}
|
||||
|
||||
// Erase the original for inst.
|
||||
// Erase the original for op.
|
||||
forOp.erase();
|
||||
|
||||
if (unrollPrologueEpilogue && prologue)
|
||||
|
@ -407,10 +407,10 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
|
|||
return failure();
|
||||
|
||||
// Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
|
||||
Instruction *forInst = forOp.getOperation();
|
||||
Operation *op = forOp.getOperation();
|
||||
if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
|
||||
FuncBuilder builder(forInst->getBlock(), ++Block::iterator(forInst));
|
||||
auto cleanupForInst = builder.clone(*forInst)->cast<AffineForOp>();
|
||||
FuncBuilder builder(op->getBlock(), ++Block::iterator(op));
|
||||
auto cleanupForInst = builder.clone(*op)->cast<AffineForOp>();
|
||||
AffineMap cleanupMap;
|
||||
SmallVector<Value *, 4> cleanupOperands;
|
||||
getCleanupLoopLowerBound(forOp, unrollFactor, &cleanupMap, &cleanupOperands,
|
||||
|
@ -435,7 +435,7 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
|
|||
// 'forOp'.
|
||||
FuncBuilder builder = forOp.getBodyBuilder();
|
||||
|
||||
// Keep a pointer to the last non-terminator instruction in the original block
|
||||
// Keep a pointer to the last non-terminator operation in the original block
|
||||
// so that we know what to clone (since we are doing this in-place).
|
||||
Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end(), 2);
|
||||
|
||||
|
@ -530,17 +530,17 @@ static void cloneLoopBodyInto(AffineForOp forOp, Value *oldIv,
|
|||
BlockAndValueMapping map;
|
||||
map.map(oldIv, newForOp.getInductionVar());
|
||||
FuncBuilder b = newForOp.getBodyBuilder();
|
||||
for (auto &inst : *forOp.getBody()) {
|
||||
for (auto &op : *forOp.getBody()) {
|
||||
// Step over newForOp in case it is nested under forOp.
|
||||
if (&inst == newForOp.getOperation()) {
|
||||
if (&op == newForOp.getOperation()) {
|
||||
continue;
|
||||
}
|
||||
if (inst.isa<AffineTerminatorOp>()) {
|
||||
if (op.isa<AffineTerminatorOp>()) {
|
||||
continue;
|
||||
}
|
||||
auto *instClone = b.clone(inst, map);
|
||||
auto *instClone = b.clone(op, map);
|
||||
unsigned idx = 0;
|
||||
for (auto r : inst.getResults()) {
|
||||
for (auto r : op.getResults()) {
|
||||
// Since we do a forward pass over the body, we iteratively augment
|
||||
// the `map` with everything we clone.
|
||||
map.map(r, instClone->getResult(idx++));
|
||||
|
@ -567,8 +567,8 @@ stripmineSink(AffineForOp forOp, uint64_t factor,
|
|||
auto scaledStep = originalStep * factor;
|
||||
forOp.setStep(scaledStep);
|
||||
|
||||
auto *forInst = forOp.getOperation();
|
||||
FuncBuilder b(forInst->getBlock(), ++Block::iterator(forInst));
|
||||
auto *op = forOp.getOperation();
|
||||
FuncBuilder b(op->getBlock(), ++Block::iterator(op));
|
||||
|
||||
// Lower-bound map creation.
|
||||
auto lbMap = forOp.getLowerBoundMap();
|
||||
|
@ -588,11 +588,11 @@ stripmineSink(AffineForOp forOp, uint64_t factor,
|
|||
auto newForOp = b.create<AffineForOp>(t.getLoc(), lbOperands, lbMap,
|
||||
ubOperands, ubMap, originalStep);
|
||||
cloneLoopBodyInto(t, forOp.getInductionVar(), newForOp);
|
||||
// Remove all instructions from `t` except `newForOp`.
|
||||
// Remove all operations from `t` except `newForOp`.
|
||||
auto rit = ++newForOp.getOperation()->getReverseIterator();
|
||||
auto re = t.getBody()->rend();
|
||||
for (auto &inst : llvm::make_early_inc_range(llvm::make_range(rit, re))) {
|
||||
inst.erase();
|
||||
for (auto &op : llvm::make_early_inc_range(llvm::make_range(rit, re))) {
|
||||
op.erase();
|
||||
}
|
||||
innerLoops.push_back(newForOp);
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ using namespace mlir;
|
|||
/// Return true if this operation dereferences one or more memref's.
|
||||
// Temporary utility: will be replaced when this is modeled through
|
||||
// side-effects/op traits. TODO(b/117228571)
|
||||
static bool isMemRefDereferencingOp(Instruction &op) {
|
||||
static bool isMemRefDereferencingOp(Operation &op) {
|
||||
if (op.isa<LoadOp>() || op.isa<StoreOp>() || op.isa<DmaStartOp>() ||
|
||||
op.isa<DmaWaitOp>())
|
||||
return true;
|
||||
|
@ -48,8 +48,8 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
|
|||
ArrayRef<Value *> extraIndices,
|
||||
AffineMap indexRemap,
|
||||
ArrayRef<Value *> extraOperands,
|
||||
Instruction *domInstFilter,
|
||||
Instruction *postDomInstFilter) {
|
||||
Operation *domInstFilter,
|
||||
Operation *postDomInstFilter) {
|
||||
unsigned newMemRefRank = newMemRef->getType().cast<MemRefType>().getRank();
|
||||
(void)newMemRefRank; // unused in opt mode
|
||||
unsigned oldMemRefRank = oldMemRef->getType().cast<MemRefType>().getRank();
|
||||
|
@ -76,7 +76,7 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
|
|||
llvm::make_unique<PostDominanceInfo>(postDomInstFilter->getFunction());
|
||||
|
||||
// The ops where memref replacement succeeds are replaced with new ones.
|
||||
SmallVector<Instruction *, 8> opsToErase;
|
||||
SmallVector<Operation *, 8> opsToErase;
|
||||
|
||||
// Walk all uses of old memref. Operation using the memref gets replaced.
|
||||
for (auto &use : llvm::make_early_inc_range(oldMemRef->getUses())) {
|
||||
|
@ -115,7 +115,7 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
|
|||
};
|
||||
unsigned memRefOperandPos = getMemRefOperandPos();
|
||||
|
||||
// Construct the new operation instruction using this memref.
|
||||
// Construct the new operation using this memref.
|
||||
OperationState state(opInst->getContext(), opInst->getLoc(),
|
||||
opInst->getName());
|
||||
state.setOperandListToResizable(opInst->hasResizableOperandsList());
|
||||
|
@ -192,9 +192,9 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Given an operation instruction, inserts one or more single result affine
|
||||
/// Given an operation, inserts one or more single result affine
|
||||
/// apply operations, results of which are exclusively used by this operation
|
||||
/// instruction. The operands of these newly created affine apply ops are
|
||||
/// operation. The operands of these newly created affine apply ops are
|
||||
/// guaranteed to be loop iterators or terminal symbols of a function.
|
||||
///
|
||||
/// Before
|
||||
|
@ -221,7 +221,7 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
|
|||
/// uses besides this opInst; otherwise returns the list of affine.apply
|
||||
/// operations created in output argument `sliceOps`.
|
||||
void mlir::createAffineComputationSlice(
|
||||
Instruction *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
|
||||
Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
|
||||
// Collect all operands that are results of affine apply ops.
|
||||
SmallVector<Value *, 4> subOperands;
|
||||
subOperands.reserve(opInst->getNumOperands());
|
||||
|
@ -233,13 +233,13 @@ void mlir::createAffineComputationSlice(
|
|||
}
|
||||
|
||||
// Gather sequence of AffineApplyOps reachable from 'subOperands'.
|
||||
SmallVector<Instruction *, 4> affineApplyOps;
|
||||
SmallVector<Operation *, 4> affineApplyOps;
|
||||
getReachableAffineApplyOps(subOperands, affineApplyOps);
|
||||
// Skip transforming if there are no affine maps to compose.
|
||||
if (affineApplyOps.empty())
|
||||
return;
|
||||
|
||||
// Check if all uses of the affine apply op's lie only in this op inst, in
|
||||
// Check if all uses of the affine apply op's lie only in this op op, in
|
||||
// which case there would be nothing to do.
|
||||
bool localized = true;
|
||||
for (auto *op : affineApplyOps) {
|
||||
|
@ -291,7 +291,7 @@ void mlir::createAffineComputationSlice(
|
|||
}
|
||||
|
||||
void mlir::remapFunctionAttrs(
|
||||
Instruction &op, const DenseMap<Attribute, FunctionAttr> &remappingTable) {
|
||||
Operation &op, const DenseMap<Attribute, FunctionAttr> &remappingTable) {
|
||||
for (auto attr : op.getAttrs()) {
|
||||
// Do the remapping, if we got the same thing back, then it must contain
|
||||
// functions that aren't getting remapped.
|
||||
|
@ -310,9 +310,8 @@ void mlir::remapFunctionAttrs(
|
|||
void mlir::remapFunctionAttrs(
|
||||
Function &fn, const DenseMap<Attribute, FunctionAttr> &remappingTable) {
|
||||
|
||||
// Look at all instructions in a Function.
|
||||
fn.walk(
|
||||
[&](Instruction *inst) { remapFunctionAttrs(*inst, remappingTable); });
|
||||
// Look at all operations in a Function.
|
||||
fn.walk([&](Operation *op) { remapFunctionAttrs(*op, remappingTable); });
|
||||
}
|
||||
|
||||
void mlir::remapFunctionAttrs(
|
||||
|
|
|
@ -77,7 +77,7 @@ static llvm::cl::opt<bool> clTestNormalizeMaps(
|
|||
llvm::cl::desc(
|
||||
"Enable testing the normalization of AffineAffineApplyOp "
|
||||
"where each AffineAffineApplyOp in the composition is a single output "
|
||||
"instruction."),
|
||||
"operation."),
|
||||
llvm::cl::cat(clOptionsCategory));
|
||||
|
||||
namespace {
|
||||
|
@ -104,16 +104,16 @@ void VectorizerTestPass::testVectorShapeRatio() {
|
|||
clTestVectorShapeRatio.end());
|
||||
auto subVectorType =
|
||||
VectorType::get(shape, FloatType::getF32(f->getContext()));
|
||||
// Only filter instructions that operate on a strict super-vector and have one
|
||||
// Only filter operations that operate on a strict super-vector and have one
|
||||
// return. This makes testing easier.
|
||||
auto filter = [subVectorType](Instruction &inst) {
|
||||
auto filter = [subVectorType](Operation &op) {
|
||||
assert(subVectorType.getElementType() ==
|
||||
FloatType::getF32(subVectorType.getContext()) &&
|
||||
"Only f32 supported for now");
|
||||
if (!matcher::operatesOnSuperVectors(inst, subVectorType)) {
|
||||
if (!matcher::operatesOnSuperVectors(op, subVectorType)) {
|
||||
return false;
|
||||
}
|
||||
if (inst.getNumResults() != 1) {
|
||||
if (op.getNumResults() != 1) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -138,10 +138,10 @@ void VectorizerTestPass::testVectorShapeRatio() {
|
|||
}
|
||||
}
|
||||
|
||||
static std::string toString(Instruction *inst) {
|
||||
static std::string toString(Operation *op) {
|
||||
std::string res;
|
||||
llvm::raw_string_ostream os(res);
|
||||
inst->print(os);
|
||||
op->print(os);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -150,9 +150,9 @@ static NestedPattern patternTestSlicingOps() {
|
|||
constexpr auto kTestSlicingOpName = "slicing-test-op";
|
||||
using functional::map;
|
||||
using matcher::Op;
|
||||
// Match all OpInstructions with the kTestSlicingOpName name.
|
||||
auto filter = [](Instruction &inst) {
|
||||
return inst.getName().getStringRef() == kTestSlicingOpName;
|
||||
// Match all operations with the kTestSlicingOpName name.
|
||||
auto filter = [](Operation &op) {
|
||||
return op.getName().getStringRef() == kTestSlicingOpName;
|
||||
};
|
||||
return Op(filter);
|
||||
}
|
||||
|
@ -163,7 +163,7 @@ void VectorizerTestPass::testBackwardSlicing() {
|
|||
SmallVector<NestedMatch, 8> matches;
|
||||
patternTestSlicingOps().match(f, &matches);
|
||||
for (auto m : matches) {
|
||||
SetVector<Instruction *> backwardSlice;
|
||||
SetVector<Operation *> backwardSlice;
|
||||
getBackwardSlice(m.getMatchedOperation(), &backwardSlice);
|
||||
auto strs = map(toString, backwardSlice);
|
||||
outs() << "\nmatched: " << *m.getMatchedOperation()
|
||||
|
@ -179,7 +179,7 @@ void VectorizerTestPass::testForwardSlicing() {
|
|||
SmallVector<NestedMatch, 8> matches;
|
||||
patternTestSlicingOps().match(f, &matches);
|
||||
for (auto m : matches) {
|
||||
SetVector<Instruction *> forwardSlice;
|
||||
SetVector<Operation *> forwardSlice;
|
||||
getForwardSlice(m.getMatchedOperation(), &forwardSlice);
|
||||
auto strs = map(toString, forwardSlice);
|
||||
outs() << "\nmatched: " << *m.getMatchedOperation()
|
||||
|
@ -196,7 +196,7 @@ void VectorizerTestPass::testSlicing() {
|
|||
SmallVector<NestedMatch, 8> matches;
|
||||
patternTestSlicingOps().match(f, &matches);
|
||||
for (auto m : matches) {
|
||||
SetVector<Instruction *> staticSlice = getSlice(m.getMatchedOperation());
|
||||
SetVector<Operation *> staticSlice = getSlice(m.getMatchedOperation());
|
||||
auto strs = map(toString, staticSlice);
|
||||
outs() << "\nmatched: " << *m.getMatchedOperation() << " static slice: ";
|
||||
for (const auto &s : strs) {
|
||||
|
@ -205,8 +205,8 @@ void VectorizerTestPass::testSlicing() {
|
|||
}
|
||||
}
|
||||
|
||||
static bool customOpWithAffineMapAttribute(Instruction &inst) {
|
||||
return inst.getName().getStringRef() ==
|
||||
static bool customOpWithAffineMapAttribute(Operation &op) {
|
||||
return op.getName().getStringRef() ==
|
||||
VectorizerTestPass::kTestAffineMapOpName;
|
||||
}
|
||||
|
||||
|
@ -233,12 +233,10 @@ void VectorizerTestPass::testComposeMaps() {
|
|||
simplifyAffineMap(res).print(outs() << "\nComposed map: ");
|
||||
}
|
||||
|
||||
static bool affineApplyOp(Instruction &inst) {
|
||||
return inst.isa<AffineApplyOp>();
|
||||
}
|
||||
static bool affineApplyOp(Operation &op) { return op.isa<AffineApplyOp>(); }
|
||||
|
||||
static bool singleResultAffineApplyOpWithoutUses(Instruction &inst) {
|
||||
auto app = inst.dyn_cast<AffineApplyOp>();
|
||||
static bool singleResultAffineApplyOpWithoutUses(Operation &op) {
|
||||
auto app = op.dyn_cast<AffineApplyOp>();
|
||||
return app && app.use_empty();
|
||||
}
|
||||
|
||||
|
|
|
@ -166,7 +166,7 @@ using namespace mlir;
|
|||
/// references along fastest varying dimensions and loops with recursive nested
|
||||
/// patterns capturing imperfectly-nested loop nests; the SLP vectorizer, on
|
||||
/// the other hand, performs flat pattern matching inside a single unrolled loop
|
||||
/// body and stitches together pieces of load and store instructions into full
|
||||
/// body and stitches together pieces of load and store operations into full
|
||||
/// 1-D vectors. We envision that the SLP vectorizer is a good way to capture
|
||||
/// innermost loop, control-flow dependent patterns that super-vectorization may
|
||||
/// not be able to capture easily. In other words, super-vectorization does not
|
||||
|
@ -662,13 +662,12 @@ namespace {
|
|||
|
||||
struct VectorizationStrategy {
|
||||
SmallVector<int64_t, 8> vectorSizes;
|
||||
DenseMap<Instruction *, unsigned> loopToVectorDim;
|
||||
DenseMap<Operation *, unsigned> loopToVectorDim;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
static void vectorizeLoopIfProfitable(Instruction *loop,
|
||||
unsigned depthInPattern,
|
||||
static void vectorizeLoopIfProfitable(Operation *loop, unsigned depthInPattern,
|
||||
unsigned patternDepth,
|
||||
VectorizationStrategy *strategy) {
|
||||
assert(patternDepth > depthInPattern &&
|
||||
|
@ -716,23 +715,23 @@ static LogicalResult analyzeProfitability(ArrayRef<NestedMatch> matches,
|
|||
namespace {
|
||||
|
||||
struct VectorizationState {
|
||||
/// Adds an entry of pre/post vectorization instructions in the state.
|
||||
void registerReplacement(Instruction *key, Instruction *value);
|
||||
/// Adds an entry of pre/post vectorization operations in the state.
|
||||
void registerReplacement(Operation *key, Operation *value);
|
||||
/// When the current vectorization pattern is successful, this erases the
|
||||
/// instructions that were marked for erasure in the proper order and resets
|
||||
/// operations that were marked for erasure in the proper order and resets
|
||||
/// the internal state for the next pattern.
|
||||
void finishVectorizationPattern();
|
||||
|
||||
// In-order tracking of original Instruction that have been vectorized.
|
||||
// In-order tracking of original Operation that have been vectorized.
|
||||
// Erase in reverse order.
|
||||
SmallVector<Instruction *, 16> toErase;
|
||||
// Set of Instruction that have been vectorized (the values in the
|
||||
SmallVector<Operation *, 16> toErase;
|
||||
// Set of Operation that have been vectorized (the values in the
|
||||
// vectorizationMap for hashed access). The vectorizedSet is used in
|
||||
// particular to filter the instructions that have already been vectorized by
|
||||
// particular to filter the operations that have already been vectorized by
|
||||
// this pattern, when iterating over nested loops in this pattern.
|
||||
DenseSet<Instruction *> vectorizedSet;
|
||||
// Map of old scalar Instruction to new vectorized Instruction.
|
||||
DenseMap<Instruction *, Instruction *> vectorizationMap;
|
||||
DenseSet<Operation *> vectorizedSet;
|
||||
// Map of old scalar Operation to new vectorized Operation.
|
||||
DenseMap<Operation *, Operation *> vectorizationMap;
|
||||
// Map of old scalar Value to new vectorized Value.
|
||||
DenseMap<Value *, Value *> replacementMap;
|
||||
// The strategy drives which loop to vectorize by which amount.
|
||||
|
@ -742,17 +741,16 @@ struct VectorizationState {
|
|||
// operations that have been vectorized. They can be retrieved from
|
||||
// `vectorizationMap` but it is convenient to keep track of them in a separate
|
||||
// data structure.
|
||||
DenseSet<Instruction *> roots;
|
||||
// Terminal instructions for the worklist in the vectorizeNonTerminals
|
||||
DenseSet<Operation *> roots;
|
||||
// Terminal operations for the worklist in the vectorizeNonTerminals
|
||||
// function. They consist of the subset of store operations that have been
|
||||
// vectorized. They can be retrieved from `vectorizationMap` but it is
|
||||
// convenient to keep track of them in a separate data structure. Since they
|
||||
// do not necessarily belong to use-def chains starting from loads (e.g
|
||||
// storing a constant), we need to handle them in a post-pass.
|
||||
DenseSet<Instruction *> terminals;
|
||||
// Checks that the type of `inst` is StoreOp and adds it to the terminals
|
||||
// set.
|
||||
void registerTerminal(Instruction *inst);
|
||||
DenseSet<Operation *> terminals;
|
||||
// Checks that the type of `op` is StoreOp and adds it to the terminals set.
|
||||
void registerTerminal(Operation *op);
|
||||
|
||||
private:
|
||||
void registerReplacement(Value *key, Value *value);
|
||||
|
@ -760,8 +758,7 @@ private:
|
|||
|
||||
} // end namespace
|
||||
|
||||
void VectorizationState::registerReplacement(Instruction *key,
|
||||
Instruction *value) {
|
||||
void VectorizationState::registerReplacement(Operation *key, Operation *value) {
|
||||
LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ commit vectorized op: ");
|
||||
LLVM_DEBUG(key->print(dbgs()));
|
||||
LLVM_DEBUG(dbgs() << " into ");
|
||||
|
@ -780,19 +777,19 @@ void VectorizationState::registerReplacement(Instruction *key,
|
|||
}
|
||||
}
|
||||
|
||||
void VectorizationState::registerTerminal(Instruction *inst) {
|
||||
assert(inst->isa<StoreOp>() && "terminal must be a StoreOp");
|
||||
assert(terminals.count(inst) == 0 &&
|
||||
void VectorizationState::registerTerminal(Operation *op) {
|
||||
assert(op->isa<StoreOp>() && "terminal must be a StoreOp");
|
||||
assert(terminals.count(op) == 0 &&
|
||||
"terminal was already inserted previously");
|
||||
terminals.insert(inst);
|
||||
terminals.insert(op);
|
||||
}
|
||||
|
||||
void VectorizationState::finishVectorizationPattern() {
|
||||
while (!toErase.empty()) {
|
||||
auto *inst = toErase.pop_back_val();
|
||||
auto *op = toErase.pop_back_val();
|
||||
LLVM_DEBUG(dbgs() << "\n[early-vect] finishVectorizationPattern erase: ");
|
||||
LLVM_DEBUG(inst->print(dbgs()));
|
||||
inst->erase();
|
||||
LLVM_DEBUG(op->print(dbgs()));
|
||||
op->erase();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -857,13 +854,13 @@ static LogicalResult vectorizeAffineForOp(AffineForOp loop, int64_t step,
|
|||
using namespace functional;
|
||||
loop.setStep(step);
|
||||
|
||||
FilterFunctionType notVectorizedThisPattern = [state](Instruction &inst) {
|
||||
if (!matcher::isLoadOrStore(inst)) {
|
||||
FilterFunctionType notVectorizedThisPattern = [state](Operation &op) {
|
||||
if (!matcher::isLoadOrStore(op)) {
|
||||
return false;
|
||||
}
|
||||
return state->vectorizationMap.count(&inst) == 0 &&
|
||||
state->vectorizedSet.count(&inst) == 0 &&
|
||||
state->roots.count(&inst) == 0 && state->terminals.count(&inst) == 0;
|
||||
return state->vectorizationMap.count(&op) == 0 &&
|
||||
state->vectorizedSet.count(&op) == 0 &&
|
||||
state->roots.count(&op) == 0 && state->terminals.count(&op) == 0;
|
||||
};
|
||||
auto loadAndStores = matcher::Op(notVectorizedThisPattern);
|
||||
SmallVector<NestedMatch, 8> loadAndStoresMatches;
|
||||
|
@ -891,8 +888,8 @@ static LogicalResult vectorizeAffineForOp(AffineForOp loop, int64_t step,
|
|||
/// we can build a cost model and a search procedure.
|
||||
static FilterFunctionType
|
||||
isVectorizableLoopPtrFactory(unsigned fastestVaryingMemRefDimension) {
|
||||
return [fastestVaryingMemRefDimension](Instruction &forInst) {
|
||||
auto loop = forInst.cast<AffineForOp>();
|
||||
return [fastestVaryingMemRefDimension](Operation &forOp) {
|
||||
auto loop = forOp.cast<AffineForOp>();
|
||||
return isVectorizableLoopAlongFastestVaryingMemRefDim(
|
||||
loop, fastestVaryingMemRefDimension);
|
||||
};
|
||||
|
@ -943,14 +940,13 @@ vectorizeLoopsAndLoadsRecursively(NestedMatch oneMatch,
|
|||
/// element type.
|
||||
/// If `type` is not a valid vector type or if the scalar constant is not a
|
||||
/// valid vector element type, returns nullptr.
|
||||
static Value *vectorizeConstant(Instruction *inst, ConstantOp constant,
|
||||
Type type) {
|
||||
static Value *vectorizeConstant(Operation *op, ConstantOp constant, Type type) {
|
||||
if (!type || !type.isa<VectorType>() ||
|
||||
!VectorType::isValidElementType(constant.getType())) {
|
||||
return nullptr;
|
||||
}
|
||||
FuncBuilder b(inst);
|
||||
Location loc = inst->getLoc();
|
||||
FuncBuilder b(op);
|
||||
Location loc = op->getLoc();
|
||||
auto vectorType = type.cast<VectorType>();
|
||||
auto attr = SplatElementsAttr::get(vectorType, constant.getValue());
|
||||
auto *constantOpInst = constant.getOperation();
|
||||
|
@ -962,10 +958,10 @@ static Value *vectorizeConstant(Instruction *inst, ConstantOp constant,
|
|||
return b.createOperation(state)->getResult(0);
|
||||
}
|
||||
|
||||
/// Tries to vectorize a given operand `op` of Instruction `inst` during
|
||||
/// Tries to vectorize a given operand `op` of Operation `op` during
|
||||
/// def-chain propagation or during terminal vectorization, by applying the
|
||||
/// following logic:
|
||||
/// 1. if the defining instruction is part of the vectorizedSet (i.e. vectorized
|
||||
/// 1. if the defining operation is part of the vectorizedSet (i.e. vectorized
|
||||
/// useby -def propagation), `op` is already in the proper vector form;
|
||||
/// 2. otherwise, the `op` may be in some other vector form that fails to
|
||||
/// vectorize atm (i.e. broadcasting required), returns nullptr to indicate
|
||||
|
@ -983,7 +979,7 @@ static Value *vectorizeConstant(Instruction *inst, ConstantOp constant,
|
|||
/// vectorization is possible with the above logic. Returns nullptr otherwise.
|
||||
///
|
||||
/// TODO(ntv): handle more complex cases.
|
||||
static Value *vectorizeOperand(Value *operand, Instruction *inst,
|
||||
static Value *vectorizeOperand(Value *operand, Operation *op,
|
||||
VectorizationState *state) {
|
||||
LLVM_DEBUG(dbgs() << "\n[early-vect]vectorize operand: ");
|
||||
LLVM_DEBUG(operand->print(dbgs()));
|
||||
|
@ -1011,7 +1007,7 @@ static Value *vectorizeOperand(Value *operand, Instruction *inst,
|
|||
// 3. vectorize constant.
|
||||
if (auto constant = operand->getDefiningOp()->dyn_cast<ConstantOp>()) {
|
||||
return vectorizeConstant(
|
||||
inst, constant,
|
||||
op, constant,
|
||||
VectorType::get(state->strategy->vectorSizes, operand->getType()));
|
||||
}
|
||||
// 4. currently non-vectorizable.
|
||||
|
@ -1020,7 +1016,7 @@ static Value *vectorizeOperand(Value *operand, Instruction *inst,
|
|||
return nullptr;
|
||||
};
|
||||
|
||||
/// Encodes Instruction-specific behavior for vectorization. In general we
|
||||
/// Encodes Operation-specific behavior for vectorization. In general we
|
||||
/// assume that all operands of an op must be vectorized but this is not always
|
||||
/// true. In the future, it would be nice to have a trait that describes how a
|
||||
/// particular operation vectorizes. For now we implement the case distinction
|
||||
|
@ -1029,7 +1025,7 @@ static Value *vectorizeOperand(Value *operand, Instruction *inst,
|
|||
/// TODO(ntv): consider adding a trait to Op to describe how it gets vectorized.
|
||||
/// Maybe some Ops are not vectorizable or require some tricky logic, we cannot
|
||||
/// do one-off logic here; ideally it would be TableGen'd.
|
||||
static Instruction *vectorizeOneInstruction(Instruction *opInst,
|
||||
static Operation *vectorizeOneOperation(Operation *opInst,
|
||||
VectorizationState *state) {
|
||||
// Sanity checks.
|
||||
assert(!opInst->isa<LoadOp>() &&
|
||||
|
@ -1079,9 +1075,8 @@ static Instruction *vectorizeOneInstruction(Instruction *opInst,
|
|||
// Create a clone of the op with the proper operands and return types.
|
||||
// TODO(ntv): The following assumes there is always an op with a fixed
|
||||
// name that works both in scalar mode and vector mode.
|
||||
// TODO(ntv): Is it worth considering an Instruction.clone operation
|
||||
// which changes the type so we can promote an Instruction with less
|
||||
// boilerplate?
|
||||
// TODO(ntv): Is it worth considering an Operation.clone operation which
|
||||
// changes the type so we can promote an Operation with less boilerplate?
|
||||
FuncBuilder b(opInst);
|
||||
OperationState newOp(b.getContext(), opInst->getLoc(),
|
||||
opInst->getName().getStringRef(), vectorOperands,
|
||||
|
@ -1100,31 +1095,31 @@ static Instruction *vectorizeOneInstruction(Instruction *opInst,
|
|||
/// replacementMap. If any such replacement is missing, vectorization fails.
|
||||
static LogicalResult vectorizeNonTerminals(VectorizationState *state) {
|
||||
// 1. create initial worklist with the uses of the roots.
|
||||
SetVector<Instruction *> worklist;
|
||||
SetVector<Operation *> worklist;
|
||||
// Note: state->roots have already been vectorized and must not be vectorized
|
||||
// again. This fits `getForwardSlice` which does not insert `inst` in the
|
||||
// again. This fits `getForwardSlice` which does not insert `op` in the
|
||||
// result.
|
||||
// Note: we have to exclude terminals because some of their defs may not be
|
||||
// nested under the vectorization pattern (e.g. constants defined in an
|
||||
// encompassing scope).
|
||||
// TODO(ntv): Use a backward slice for terminals, avoid special casing and
|
||||
// merge implementations.
|
||||
for (auto *inst : state->roots) {
|
||||
getForwardSlice(inst, &worklist, [state](Instruction *inst) {
|
||||
return state->terminals.count(inst) == 0; // propagate if not terminal
|
||||
for (auto *op : state->roots) {
|
||||
getForwardSlice(op, &worklist, [state](Operation *op) {
|
||||
return state->terminals.count(op) == 0; // propagate if not terminal
|
||||
});
|
||||
}
|
||||
// We merged multiple slices, topological order may not hold anymore.
|
||||
worklist = topologicalSort(worklist);
|
||||
|
||||
for (unsigned i = 0; i < worklist.size(); ++i) {
|
||||
auto *inst = worklist[i];
|
||||
auto *op = worklist[i];
|
||||
LLVM_DEBUG(dbgs() << "\n[early-vect] vectorize use: ");
|
||||
LLVM_DEBUG(inst->print(dbgs()));
|
||||
LLVM_DEBUG(op->print(dbgs()));
|
||||
|
||||
// Create vector form of the instruction.
|
||||
// Insert it just before inst, on success register inst as replaced.
|
||||
auto *vectorizedInst = vectorizeOneInstruction(inst, state);
|
||||
// Create vector form of the operation.
|
||||
// Insert it just before op, on success register op as replaced.
|
||||
auto *vectorizedInst = vectorizeOneOperation(op, state);
|
||||
if (!vectorizedInst) {
|
||||
return failure();
|
||||
}
|
||||
|
@ -1133,7 +1128,7 @@ static LogicalResult vectorizeNonTerminals(VectorizationState *state) {
|
|||
// Note that we cannot just call replaceAllUsesWith because it may
|
||||
// result in ops with mixed types, for ops whose operands have not all
|
||||
// yet been vectorized. This would be invalid IR.
|
||||
state->registerReplacement(inst, vectorizedInst);
|
||||
state->registerReplacement(op, vectorizedInst);
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
@ -1193,9 +1188,8 @@ static LogicalResult vectorizeRootMatch(NestedMatch m,
|
|||
return guard.failure();
|
||||
}
|
||||
|
||||
// 2. Vectorize operations reached by use-def chains from root
|
||||
// except the terminals (store instructions) that need to be
|
||||
// post-processed separately.
|
||||
// 2. Vectorize operations reached by use-def chains from root except the
|
||||
// terminals (store operations) that need to be post-processed separately.
|
||||
// TODO(ntv): add more as we expand.
|
||||
if (failed(vectorizeNonTerminals(&state))) {
|
||||
LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ failed vectorizeNonTerminals");
|
||||
|
@ -1208,8 +1202,8 @@ static LogicalResult vectorizeRootMatch(NestedMatch m,
|
|||
// encompassing scope).
|
||||
// TODO(ntv): Use a backward slice for terminals, avoid special casing and
|
||||
// merge implementations.
|
||||
for (auto *inst : state.terminals) {
|
||||
if (!vectorizeOneInstruction(inst, &state)) { // nullptr == failure
|
||||
for (auto *op : state.terminals) {
|
||||
if (!vectorizeOneOperation(op, &state)) { // nullptr == failure
|
||||
LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ failed to vectorize terminals");
|
||||
return guard.failure();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue