From 99b87c9707b389183de33961f81d4b2730b033c8 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Wed, 27 Mar 2019 14:02:02 -0700 Subject: [PATCH] Replace usages of Instruction with Operation in the Transforms/ directory. PiperOrigin-RevId: 240636130 --- .../mlir/Transforms/DialectConversion.h | 7 +- mlir/include/mlir/Transforms/LoopUtils.h | 8 +- .../mlir/Transforms/MLPatternLoweringPass.h | 16 +- mlir/include/mlir/Transforms/Passes.h | 2 +- mlir/include/mlir/Transforms/Utils.h | 20 +- mlir/lib/Transforms/CSE.cpp | 24 +- mlir/lib/Transforms/ConstantFold.cpp | 12 +- mlir/lib/Transforms/DialectConversion.cpp | 32 +-- mlir/lib/Transforms/DmaGeneration.cpp | 44 ++- mlir/lib/Transforms/LoopFusion.cpp | 269 +++++++++--------- mlir/lib/Transforms/LoopTiling.cpp | 6 +- mlir/lib/Transforms/LoopUnroll.cpp | 6 +- mlir/lib/Transforms/LoopUnrollAndJam.cpp | 16 +- mlir/lib/Transforms/LowerAffine.cpp | 56 ++-- mlir/lib/Transforms/LowerVectorTransfers.cpp | 4 +- mlir/lib/Transforms/MaterializeVectors.cpp | 102 ++++--- mlir/lib/Transforms/MemRefDataFlowOpt.cpp | 22 +- mlir/lib/Transforms/PipelineDataTransfer.cpp | 76 ++--- .../Transforms/SimplifyAffineStructures.cpp | 10 +- mlir/lib/Transforms/StripDebugInfo.cpp | 6 +- .../Utils/GreedyPatternRewriteDriver.cpp | 28 +- mlir/lib/Transforms/Utils/LoopUtils.cpp | 104 +++---- mlir/lib/Transforms/Utils/Utils.cpp | 27 +- .../Vectorization/VectorizerTestPass.cpp | 38 ++- mlir/lib/Transforms/Vectorize.cpp | 126 ++++---- 25 files changed, 522 insertions(+), 539 deletions(-) diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index fe3a46d6050c..27af342079b5 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -33,7 +33,6 @@ class Block; class FuncBuilder; class MLIRContext; class Operation; -using Instruction = Operation; class Type; class Value; @@ -43,7 +42,7 @@ class FunctionConversion; } /// Base class for the dialect op conversion patterns. Specific conversions -/// must derive this class and implement `PatternMatch match(Instruction *)` +/// must derive this class and implement `PatternMatch match(Operation *)` /// defined in `Pattern` and at least one of `rewrite` and `rewriteTerminator`. // // TODO(zinenko): this should eventually converge with RewritePattern. So far, @@ -67,7 +66,7 @@ public: /// DialectOpConversion ever needs to replace an operation that does not have /// successors. This function should not fail. If some specific cases of the /// operation are not supported, these cases should not be matched. - virtual SmallVector rewrite(Instruction *op, + virtual SmallVector rewrite(Operation *op, ArrayRef operands, FuncBuilder &rewriter) const { llvm_unreachable("unimplemented rewrite, did you mean rewriteTerminator?"); @@ -85,7 +84,7 @@ public: /// successors. This function should not fail the pass. If some specific /// cases of the operation are not supported, these cases should not be /// matched. - virtual void rewriteTerminator(Instruction *op, + virtual void rewriteTerminator(Operation *op, ArrayRef properOperands, ArrayRef destinations, ArrayRef> operands, diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h index 1d5203e77d5a..f1e7b503769f 100644 --- a/mlir/include/mlir/Transforms/LoopUtils.h +++ b/mlir/include/mlir/Transforms/LoopUtils.h @@ -34,10 +34,10 @@ class Function; class FuncBuilder; class Value; -/// Unrolls this for instruction completely if the trip count is known to be +/// Unrolls this for operation completely if the trip count is known to be /// constant. Returns failure otherwise. LogicalResult loopUnrollFull(AffineForOp forOp); -/// Unrolls this for instruction by the specified unroll factor. Returns failure +/// Unrolls this for operation by the specified unroll factor. Returns failure /// if the loop cannot be unrolled either due to restrictions or due to invalid /// unroll factors. LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor); @@ -73,8 +73,8 @@ void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor, SmallVectorImpl *operands, FuncBuilder *builder); -/// Skew the instructions in the body of a 'affine.for' instruction with the -/// specified instruction-wise shifts. The shifts are with respect to the +/// Skew the operations in the body of a 'affine.for' operation with the +/// specified operation-wise shifts. The shifts are with respect to the /// original execution order, and are multiplied by the loop 'step' before being /// applied. LLVM_NODISCARD diff --git a/mlir/include/mlir/Transforms/MLPatternLoweringPass.h b/mlir/include/mlir/Transforms/MLPatternLoweringPass.h index c9ed3a38a654..c43b551c49ae 100644 --- a/mlir/include/mlir/Transforms/MLPatternLoweringPass.h +++ b/mlir/include/mlir/Transforms/MLPatternLoweringPass.h @@ -37,7 +37,7 @@ public: FuncBuilder *getBuilder() { return builder; } - Instruction *createOperation(const OperationState &state) override { + Operation *createOperation(const OperationState &state) override { auto *result = builder->createOperation(state); return result; } @@ -66,7 +66,7 @@ public: /// must override). It will be passed the function-wise state, common to all /// matches, and the state returned by the `match` call, if any. The subclass /// must use `rewriter` to modify the function. - virtual void rewriteOpInst(Instruction *op, + virtual void rewriteOpInst(Operation *op, MLFuncGlobalLoweringState *funcWiseState, std::unique_ptr opState, MLFuncLoweringRewriter *rewriter) const = 0; @@ -123,14 +123,14 @@ void applyMLPatternsGreedily( FuncBuilder builder(f); MLFuncLoweringRewriter rewriter(&builder); - llvm::SmallVector ops; - f->walk([&ops](Instruction *inst) { ops.push_back(inst); }); + llvm::SmallVector ops; + f->walk([&ops](Operation *op) { ops.push_back(op); }); - for (Instruction *inst : ops) { + for (Operation *op : ops) { for (const auto &pattern : patterns) { - builder.setInsertionPoint(inst); - if (auto matchResult = pattern->match(inst)) { - pattern->rewriteOpInst(inst, funcWiseState, std::move(*matchResult), + builder.setInsertionPoint(op); + if (auto matchResult = pattern->match(op)) { + pattern->rewriteOpInst(op, funcWiseState, std::move(*matchResult), &rewriter); break; } diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h index 3a75a2619f42..634f690c4516 100644 --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -82,7 +82,7 @@ FunctionPassBase *createLoopFusionPass(unsigned fastMemorySpace = 0, /// memory hierarchy. FunctionPassBase *createPipelineDataTransferPass(); -/// Lowers affine control flow instructions (ForStmt, IfStmt and AffineApplyOp) +/// Lowers affine control flow operations (ForStmt, IfStmt and AffineApplyOp) /// to equivalent lower-level constructs (flow of basic blocks and arithmetic /// primitives). FunctionPassBase *createLowerAffinePass(); diff --git a/mlir/include/mlir/Transforms/Utils.h b/mlir/include/mlir/Transforms/Utils.h index 97bf16c2c54d..db25ed1f26d8 100644 --- a/mlir/include/mlir/Transforms/Utils.h +++ b/mlir/include/mlir/Transforms/Utils.h @@ -73,8 +73,8 @@ bool replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef, ArrayRef extraIndices = {}, AffineMap indexRemap = AffineMap(), ArrayRef extraOperands = {}, - Instruction *domInstFilter = nullptr, - Instruction *postDomInstFilter = nullptr); + Operation *domInstFilter = nullptr, + Operation *postDomInstFilter = nullptr); /// Creates and inserts into 'builder' a new AffineApplyOp, with the number of /// its results equal to the number of operands, as a composition @@ -83,13 +83,13 @@ bool replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef, /// these will also be collected into a single (multi-result) affine apply op. /// The final results of the composed AffineApplyOp are returned in output /// parameter 'results'. Returns the affine apply op created. -Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc, - ArrayRef operands, - ArrayRef affineApplyOps, - SmallVectorImpl *results); +Operation *createComposedAffineApplyOp(FuncBuilder *builder, Location loc, + ArrayRef operands, + ArrayRef affineApplyOps, + SmallVectorImpl *results); -/// Given an instruction, inserts one or more single result affine apply -/// operations, results of which are exclusively used by this instruction. +/// Given an operation, inserts one or more single result affine apply +/// operations, results of which are exclusively used by this operation. /// The operands of these newly created affine apply ops are /// guaranteed to be loop iterators or terminal symbols of a function. /// @@ -117,13 +117,13 @@ Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc, /// (i.e., there was no affine computation slice to create). /// 2. If all the affine.apply op's supplying operands to this opInst did not /// have any uses other than those in this opInst. -void createAffineComputationSlice(Instruction *opInst, +void createAffineComputationSlice(Operation *opInst, SmallVectorImpl *sliceOps); /// Replaces (potentially nested) function attributes in the operation "op" /// with those specified in "remappingTable". void remapFunctionAttrs( - Instruction &op, const DenseMap &remappingTable); + Operation &op, const DenseMap &remappingTable); /// Replaces (potentially nested) function attributes all operations of the /// Function "fn" with those specified in "remappingTable". diff --git a/mlir/lib/Transforms/CSE.cpp b/mlir/lib/Transforms/CSE.cpp index c3916a07c182..f90e12db7723 100644 --- a/mlir/lib/Transforms/CSE.cpp +++ b/mlir/lib/Transforms/CSE.cpp @@ -38,11 +38,11 @@ using namespace mlir; namespace { // TODO(riverriddle) Handle commutative operations. -struct SimpleOperationInfo : public llvm::DenseMapInfo { - static unsigned getHashValue(const Instruction *opC) { - auto *op = const_cast(opC); +struct SimpleOperationInfo : public llvm::DenseMapInfo { + static unsigned getHashValue(const Operation *opC) { + auto *op = const_cast(opC); // Hash the operations based upon their: - // - Instruction Name + // - Operation Name // - Attributes // - Result Types // - Operands @@ -51,9 +51,9 @@ struct SimpleOperationInfo : public llvm::DenseMapInfo { hash_combine_range(op->result_type_begin(), op->result_type_end()), hash_combine_range(op->operand_begin(), op->operand_end())); } - static bool isEqual(const Instruction *lhsC, const Instruction *rhsC) { - auto *lhs = const_cast(lhsC); - auto *rhs = const_cast(rhsC); + static bool isEqual(const Operation *lhsC, const Operation *rhsC) { + auto *lhs = const_cast(lhsC); + auto *rhs = const_cast(rhsC); if (lhs == rhs) return true; if (lhs == getTombstoneKey() || lhs == getEmptyKey() || @@ -90,8 +90,8 @@ struct CSE : public FunctionPass { /// Shared implementation of operation elimination and scoped map definitions. using AllocatorTy = llvm::RecyclingAllocator< llvm::BumpPtrAllocator, - llvm::ScopedHashTableVal>; - using ScopedMapTy = llvm::ScopedHashTable>; + using ScopedMapTy = llvm::ScopedHashTable; /// Represents a single entry in the depth first traversal of a CFG. @@ -112,7 +112,7 @@ struct CSE : public FunctionPass { /// Attempt to eliminate a redundant operation. Returns true if the operation /// was marked for removal, false otherwise. - bool simplifyOperation(Instruction *op); + bool simplifyOperation(Operation *op); void simplifyBlock(DominanceInfo &domInfo, Block *bb); void simplifyRegion(DominanceInfo &domInfo, Region ®ion); @@ -124,12 +124,12 @@ private: ScopedMapTy knownValues; /// Operations marked as dead and to be erased. - std::vector opsToErase; + std::vector opsToErase; }; } // end anonymous namespace /// Attempt to eliminate a redundant operation. -bool CSE::simplifyOperation(Instruction *op) { +bool CSE::simplifyOperation(Operation *op) { // Don't simplify operations with nested blocks. We don't currently model // equality comparisons correctly among other things. It is also unclear // whether we would want to CSE such operations. diff --git a/mlir/lib/Transforms/ConstantFold.cpp b/mlir/lib/Transforms/ConstantFold.cpp index 4c4c8cc40198..364c3dcd6adc 100644 --- a/mlir/lib/Transforms/ConstantFold.cpp +++ b/mlir/lib/Transforms/ConstantFold.cpp @@ -31,9 +31,9 @@ struct ConstantFold : public FunctionPass { // All constants in the function post folding. SmallVector existingConstants; // Operations that were folded and that need to be erased. - std::vector opInstsToErase; + std::vector opInstsToErase; - void foldInstruction(Instruction *op); + void foldOperation(Operation *op); void runOnFunction() override; }; } // end anonymous namespace @@ -41,7 +41,7 @@ struct ConstantFold : public FunctionPass { /// Attempt to fold the specified operation, updating the IR to match. If /// constants are found, we keep track of them in the existingConstants list. /// -void ConstantFold::foldInstruction(Instruction *op) { +void ConstantFold::foldOperation(Operation *op) { // If this operation is already a constant, just remember it for cleanup // later, and don't try to fold it. if (auto constant = op->dyn_cast()) { @@ -97,15 +97,15 @@ void ConstantFold::runOnFunction() { existingConstants.clear(); opInstsToErase.clear(); - getFunction().walk([&](Instruction *inst) { foldInstruction(inst); }); + getFunction().walk([&](Operation *op) { foldOperation(op); }); // At this point, these operations are dead, remove them. // TODO: This is assuming that all constant foldable operations have no // side effects. When we have side effect modeling, we should verify that // the operation is effect-free before we remove it. Until then this is // close enough. - for (auto *inst : opInstsToErase) { - inst->erase(); + for (auto *op : opInstsToErase) { + op->erase(); } // By the time we are done, we may have simplified a bunch of code, leaving diff --git a/mlir/lib/Transforms/DialectConversion.cpp b/mlir/lib/Transforms/DialectConversion.cpp index a659b2e480b4..2d16f23d41f5 100644 --- a/mlir/lib/Transforms/DialectConversion.cpp +++ b/mlir/lib/Transforms/DialectConversion.cpp @@ -50,7 +50,7 @@ private: // Utility that looks up a list of value in the value remapping table. Returns // an empty vector if one of the values is not mapped yet. SmallVector lookupValues( - const llvm::iterator_range &operands); + const llvm::iterator_range &operands); // Converts the given function to the dialect using hooks defined in // `dialectConversion`. Returns the converted function or `nullptr` on error. @@ -61,16 +61,16 @@ private: // passes them to `converter->rewriteTerminator` function defined in the // pattern, together with `builder`. LogicalResult convertOpWithSuccessors(DialectOpConversion *converter, - Instruction *op, FuncBuilder &builder); + Operation *op, FuncBuilder &builder); // Converts an operation without successors. Extracts the converted operands // from `valueRemapping` and passes them to the `converter->rewrite` function // defined in the pattern, together with `builder`. - LogicalResult convertOp(DialectOpConversion *converter, Instruction *op, + LogicalResult convertOp(DialectOpConversion *converter, Operation *op, FuncBuilder &builder); - // Converts a block by traversing its instructions sequentially, looking for - // the first pattern match and dispatching the instruction conversion to + // Converts a block by traversing its operations sequentially, looking for + // the first pattern match and dispatching the operation conversion to // either `convertOp` or `convertOpWithSuccessors` depending on the presence // of successors. If there is no match, clones the operation. // @@ -101,7 +101,7 @@ private: } // end namespace mlir SmallVector impl::FunctionConversion::lookupValues( - const llvm::iterator_range &operands) { + const llvm::iterator_range &operands) { SmallVector remapped; remapped.reserve(llvm::size(operands)); for (Value *operand : operands) { @@ -114,7 +114,7 @@ SmallVector impl::FunctionConversion::lookupValues( } LogicalResult impl::FunctionConversion::convertOpWithSuccessors( - DialectOpConversion *converter, Instruction *op, FuncBuilder &builder) { + DialectOpConversion *converter, Operation *op, FuncBuilder &builder) { SmallVector destinations; destinations.reserve(op->getNumSuccessors()); SmallVector operands = lookupValues(op->getOperands()); @@ -146,7 +146,7 @@ LogicalResult impl::FunctionConversion::convertOpWithSuccessors( LogicalResult impl::FunctionConversion::convertOp(DialectOpConversion *converter, - Instruction *op, FuncBuilder &builder) { + Operation *op, FuncBuilder &builder) { auto operands = lookupValues(op->getOperands()); assert((!operands.empty() || op->getNumOperands() == 0) && "converting op before ops defining its operands"); @@ -170,22 +170,22 @@ impl::FunctionConversion::convertBlock(Block *block, FuncBuilder &builder, builder.setInsertionPointToStart(mapping.lookupOrNull(block)); // Iterate over ops and convert them. - for (Instruction &inst : *block) { - if (inst.getNumRegions() != 0) { - inst.emitError("unsupported region instruction"); + for (Operation &op : *block) { + if (op.getNumRegions() != 0) { + op.emitError("unsupported region operation"); return failure(); } // Find the first matching conversion and apply it. bool converted = false; for (auto *conversion : conversions) { - if (!conversion->match(&inst)) + if (!conversion->match(&op)) continue; - if (inst.getNumSuccessors() != 0) { - if (failed(convertOpWithSuccessors(conversion, &inst, builder))) + if (op.getNumSuccessors() != 0) { + if (failed(convertOpWithSuccessors(conversion, &op, builder))) return failure(); - } else if (failed(convertOp(conversion, &inst, builder))) { + } else if (failed(convertOp(conversion, &op, builder))) { return failure(); } converted = true; @@ -193,7 +193,7 @@ impl::FunctionConversion::convertBlock(Block *block, FuncBuilder &builder, } // If there is no conversion provided for the op, clone the op as is. if (!converted) - builder.clone(inst, mapping); + builder.clone(op, mapping); } // Recurse to children unless they have been already visited. diff --git a/mlir/lib/Transforms/DmaGeneration.cpp b/mlir/lib/Transforms/DmaGeneration.cpp index e04ae3d45bb1..83ba858447bd 100644 --- a/mlir/lib/Transforms/DmaGeneration.cpp +++ b/mlir/lib/Transforms/DmaGeneration.cpp @@ -170,7 +170,7 @@ static void getMultiLevelStrides(const MemRefRegion ®ion, /// dynamic shaped memref's for now. `numParamLoopIVs` is the number of /// enclosing loop IVs of opInst (starting from the outermost) that the region /// is parametric on. -static bool getFullMemRefAsRegion(Instruction *opInst, unsigned numParamLoopIVs, +static bool getFullMemRefAsRegion(Operation *opInst, unsigned numParamLoopIVs, MemRefRegion *region) { unsigned rank; if (auto loadOp = opInst->dyn_cast()) { @@ -212,11 +212,11 @@ static bool getFullMemRefAsRegion(Instruction *opInst, unsigned numParamLoopIVs, } static void emitNoteForBlock(Block &block, const Twine &message) { - auto *inst = block.getContainingOp(); - if (!inst) { + auto *op = block.getContainingOp(); + if (!op) { block.getFunction()->emitNote(message); } else { - inst->emitNote(message); + op->emitNote(message); } } @@ -350,7 +350,7 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block, fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace); // Create the fast memory space buffer just before the 'affine.for' - // instruction. + // operation. fastMemRef = prologue.create(loc, fastMemRefType).getResult(); // Record it. fastBufferMap[memref] = fastMemRef; @@ -391,7 +391,7 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block, top.create(loc, strideInfos[0].numEltPerStride); } - // Record the last instruction just before the point where we insert the + // Record the last operation just before the point where we insert the // outgoing DMAs. We later do the memref replacement later only in [begin, // postDomFilter] so that the original memref's in the DMA ops themselves // don't get replaced. @@ -464,7 +464,7 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block, } /// Generate DMAs for this block. The block is partitioned into separate -/// `regions`; each region is either a sequence of one or more instructions +/// `regions`; each region is either a sequence of one or more operations /// starting and ending with a load or store op, or just a loop (which could /// have other loops nested within). Returns false on an error, true otherwise. bool DmaGeneration::runOnBlock(Block *block) { @@ -472,20 +472,19 @@ bool DmaGeneration::runOnBlock(Block *block) { return true; // Every loop in the block starts and ends a region. A contiguous sequence of - // operation instructions starting and ending with a load/store op is also + // operations starting and ending with a load/store op is also // identified as a region. Straightline code (contiguous chunks of operation - // instructions) are always assumed to not exhaust memory. As a result, this + // operations) are always assumed to not exhaust memory. As a result, this // approach is conservative in some cases at the moment, we do a check later // and report an error with location info. - // TODO(bondhugula): An 'affine.if' instruction is being treated similar to an - // operation instruction. 'affine.if''s could have 'affine.for's in them; + // TODO(bondhugula): An 'affine.if' operation is being treated similar to an + // operation. 'affine.if''s could have 'affine.for's in them; // treat them separately. // Get to the first load, store, or for op. auto curBegin = - std::find_if(block->begin(), block->end(), [&](Instruction &inst) { - return inst.isa() || inst.isa() || - inst.isa(); + std::find_if(block->begin(), block->end(), [&](Operation &op) { + return op.isa() || op.isa() || op.isa(); }); for (auto it = curBegin; it != block->end(); ++it) { @@ -513,7 +512,7 @@ bool DmaGeneration::runOnBlock(Block *block) { runOnBlock(/*begin=*/curBegin, /*end=*/it); // Recurse onto the body of this loop. runOnBlock(forOp.getBody()); - // The next region starts right after the 'affine.for' instruction. + // The next region starts right after the 'affine.for' operation. curBegin = std::next(it); } else { // We have enough capacity, i.e., DMAs will be computed for the portion @@ -583,10 +582,10 @@ findHighestBlockForPlacement(const MemRefRegion ®ion, Block &block, } } -/// Generates DMAs for a contiguous sequence of instructions in `block` in the +/// Generates DMAs for a contiguous sequence of operations in `block` in the /// iterator range [begin, end). Returns the total size of the DMA buffers used. // Since we generate alloc's and dealloc's for all DMA buffers (before and -// after the range of instructions resp), all of the fast memory capacity is +// after the range of operations resp), all of the fast memory capacity is // assumed to be available. uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) { if (begin == end) @@ -610,8 +609,8 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) { // To check for errors when walking the block. bool error = false; - // Walk this range of instructions to gather all memory regions. - block->walk(begin, end, [&](Instruction *opInst) { + // Walk this range of operations to gather all memory regions. + block->walk(begin, end, [&](Operation *opInst) { // Gather regions to allocate to buffers in faster memory space. if (auto loadOp = opInst->dyn_cast()) { if (loadOp.getMemRefType().getMemorySpace() != slowMemorySpace) @@ -738,8 +737,7 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) { return totalDmaBuffersSizeInBytes; } - // For a range of operation instructions, a note will be emitted at the - // caller. + // For a range of operations, a note will be emitted at the caller. AffineForOp forOp; uint64_t sizeInKib = llvm::divideCeil(totalDmaBuffersSizeInBytes, 1024); if (llvm::DebugFlag && (forOp = begin->dyn_cast())) { @@ -750,8 +748,8 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) { if (totalDmaBuffersSizeInBytes > fastMemCapacityBytes) { StringRef str = "Total size of all DMA buffers' for this block " "exceeds fast memory capacity\n"; - if (auto *inst = block->getContainingOp()) - inst->emitError(str); + if (auto *op = block->getContainingOp()) + op->emitError(str); else block->getFunction()->emitError(str); } diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp index 7a6f188e6af4..80308ea6a404 100644 --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Transforms/LoopFusion.cpp @@ -123,26 +123,26 @@ namespace { // operations, and whether or not an IfInst was encountered in the loop nest. struct LoopNestStateCollector { SmallVector forOps; - SmallVector loadOpInsts; - SmallVector storeOpInsts; + SmallVector loadOpInsts; + SmallVector storeOpInsts; bool hasNonForRegion = false; - void collect(Instruction *instToWalk) { - instToWalk->walk([&](Instruction *opInst) { - if (opInst->isa()) - forOps.push_back(opInst->cast()); - else if (opInst->getNumRegions() != 0) + void collect(Operation *opToWalk) { + opToWalk->walk([&](Operation *op) { + if (op->isa()) + forOps.push_back(op->cast()); + else if (op->getNumRegions() != 0) hasNonForRegion = true; - else if (opInst->isa()) - loadOpInsts.push_back(opInst); - else if (opInst->isa()) - storeOpInsts.push_back(opInst); + else if (op->isa()) + loadOpInsts.push_back(op); + else if (op->isa()) + storeOpInsts.push_back(op); }); } }; // TODO(b/117228571) Replace when this is modeled through side-effects/op traits -static bool isMemRefDereferencingOp(Instruction &op) { +static bool isMemRefDereferencingOp(Operation &op) { if (op.isa() || op.isa() || op.isa() || op.isa()) return true; @@ -150,7 +150,7 @@ static bool isMemRefDereferencingOp(Instruction &op) { } // MemRefDependenceGraph is a graph data structure where graph nodes are -// top-level instructions in a Function which contain load/store ops, and edges +// top-level operations in a Function which contain load/store ops, and edges // are memref dependences between the nodes. // TODO(andydavis) Add a more flexible dependece graph representation. // TODO(andydavis) Add a depth parameter to dependence graph construction. @@ -163,12 +163,12 @@ public: // The unique identifier of this node in the graph. unsigned id; // The top-level statment which is (or contains) loads/stores. - Instruction *inst; + Operation *op; // List of load operations. - SmallVector loads; + SmallVector loads; // List of store op insts. - SmallVector stores; - Node(unsigned id, Instruction *inst) : id(id), inst(inst) {} + SmallVector stores; + Node(unsigned id, Operation *op) : id(id), op(op) {} // Returns the load op count for 'memref'. unsigned getLoadOpCount(Value *memref) { @@ -192,7 +192,7 @@ public: // Returns all store ops in 'storeOps' which access 'memref'. void getStoreOpsForMemref(Value *memref, - SmallVectorImpl *storeOps) { + SmallVectorImpl *storeOps) { for (auto *storeOpInst : stores) { if (memref == storeOpInst->cast().getMemRef()) storeOps->push_back(storeOpInst); @@ -201,7 +201,7 @@ public: // Returns all load ops in 'loadOps' which access 'memref'. void getLoadOpsForMemref(Value *memref, - SmallVectorImpl *loadOps) { + SmallVectorImpl *loadOps) { for (auto *loadOpInst : loads) { if (memref == loadOpInst->cast().getMemRef()) loadOps->push_back(loadOpInst); @@ -236,7 +236,7 @@ public: // which contain accesses to the same memref 'value'. If the value is a // non-memref value, then the dependence is between a graph node which // defines an SSA value and another graph node which uses the SSA value - // (e.g. a constant instruction defining a value which is used inside a loop + // (e.g. a constant operation defining a value which is used inside a loop // nest). Value *value; }; @@ -266,9 +266,9 @@ public: return &it->second; } - // Adds a node with 'inst' to the graph and returns its unique identifier. - unsigned addNode(Instruction *inst) { - Node node(nextNodeId++, inst); + // Adds a node with 'op' to the graph and returns its unique identifier. + unsigned addNode(Operation *op) { + Node node(nextNodeId++, op); nodes.insert({node.id, node}); return node.id; } @@ -301,9 +301,9 @@ public: Node *node = getNode(id); for (auto *storeOpInst : node->stores) { auto *memref = storeOpInst->cast().getMemRef(); - auto *inst = memref->getDefiningOp(); + auto *op = memref->getDefiningOp(); // Return true if 'memref' is a block argument. - if (!inst) + if (!op) return true; // Return true if any use of 'memref' escapes the function. for (auto &use : memref->getUses()) @@ -436,50 +436,50 @@ public: return outEdgeCount; } - // Computes and returns an insertion point instruction, before which the + // Computes and returns an insertion point operation, before which the // the fused loop nest can be inserted while preserving // dependences. Returns nullptr if no such insertion point is found. - Instruction *getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) { + Operation *getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) { if (outEdges.count(srcId) == 0) - return getNode(dstId)->inst; + return getNode(dstId)->op; // Build set of insts in range (srcId, dstId) which depend on 'srcId'. - SmallPtrSet srcDepInsts; + SmallPtrSet srcDepInsts; for (auto &outEdge : outEdges[srcId]) if (outEdge.id != dstId) - srcDepInsts.insert(getNode(outEdge.id)->inst); + srcDepInsts.insert(getNode(outEdge.id)->op); // Build set of insts in range (srcId, dstId) on which 'dstId' depends. - SmallPtrSet dstDepInsts; + SmallPtrSet dstDepInsts; for (auto &inEdge : inEdges[dstId]) if (inEdge.id != srcId) - dstDepInsts.insert(getNode(inEdge.id)->inst); + dstDepInsts.insert(getNode(inEdge.id)->op); - Instruction *srcNodeInst = getNode(srcId)->inst; - Instruction *dstNodeInst = getNode(dstId)->inst; + Operation *srcNodeInst = getNode(srcId)->op; + Operation *dstNodeInst = getNode(dstId)->op; // Computing insertion point: - // *) Walk all instruction positions in Block instruction list in the - // range (src, dst). For each instruction 'inst' visited in this search: - // *) Store in 'firstSrcDepPos' the first position where 'inst' has a + // *) Walk all operation positions in Block operation list in the + // range (src, dst). For each operation 'op' visited in this search: + // *) Store in 'firstSrcDepPos' the first position where 'op' has a // dependence edge from 'srcNode'. - // *) Store in 'lastDstDepPost' the last position where 'inst' has a + // *) Store in 'lastDstDepPost' the last position where 'op' has a // dependence edge to 'dstNode'. // *) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the - // instruction insertion point (or return null pointer if no such + // operation insertion point (or return null pointer if no such // insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos'). - SmallVector depInsts; + SmallVector depInsts; Optional firstSrcDepPos; Optional lastDstDepPos; unsigned pos = 0; for (Block::iterator it = std::next(Block::iterator(srcNodeInst)); it != Block::iterator(dstNodeInst); ++it) { - Instruction *inst = &(*it); - if (srcDepInsts.count(inst) > 0 && firstSrcDepPos == None) + Operation *op = &(*it); + if (srcDepInsts.count(op) > 0 && firstSrcDepPos == None) firstSrcDepPos = pos; - if (dstDepInsts.count(inst) > 0) + if (dstDepInsts.count(op) > 0) lastDstDepPos = pos; - depInsts.push_back(inst); + depInsts.push_back(op); ++pos; } @@ -557,8 +557,8 @@ public: } // Adds ops in 'loads' and 'stores' to node at 'id'. - void addToNode(unsigned id, const SmallVectorImpl &loads, - const SmallVectorImpl &stores) { + void addToNode(unsigned id, const SmallVectorImpl &loads, + const SmallVectorImpl &stores) { Node *node = getNode(id); for (auto *loadOpInst : loads) node->loads.push_back(loadOpInst); @@ -596,7 +596,7 @@ public: continue; assert(nodes.count(edge.id) > 0); // Skip if 'edge.id' is not a loop nest. - if (!getNode(edge.id)->inst->isa()) + if (!getNode(edge.id)->op->isa()) continue; // Visit current input edge 'edge'. callback(edge); @@ -623,7 +623,7 @@ public: void dump() const { print(llvm::errs()); } }; -// Intializes the data dependence graph by walking instructions in 'f'. +// Intializes the data dependence graph by walking operations in 'f'. // Assigns each node in the graph a node id based on program order in 'f'. // TODO(andydavis) Add support for taking a Block arg to construct the // dependence graph at a different depth. @@ -634,18 +634,18 @@ bool MemRefDependenceGraph::init(Function &f) { if (f.getBlocks().size() != 1) return false; - DenseMap forToNodeMap; - for (auto &inst : f.front()) { - if (auto forOp = inst.dyn_cast()) { + DenseMap forToNodeMap; + for (auto &op : f.front()) { + if (auto forOp = op.dyn_cast()) { // Create graph node 'id' to represent top-level 'forOp' and record // all loads and store accesses it contains. LoopNestStateCollector collector; - collector.collect(&inst); + collector.collect(&op); // Return false if a non 'affine.for' region was found (not currently // supported). if (collector.hasNonForRegion) return false; - Node node(nextNodeId++, &inst); + Node node(nextNodeId++, &op); for (auto *opInst : collector.loadOpInsts) { node.loads.push_back(opInst); auto *memref = opInst->cast().getMemRef(); @@ -656,29 +656,29 @@ bool MemRefDependenceGraph::init(Function &f) { auto *memref = opInst->cast().getMemRef(); memrefAccesses[memref].insert(node.id); } - forToNodeMap[&inst] = node.id; + forToNodeMap[&op] = node.id; nodes.insert({node.id, node}); - } else if (auto loadOp = inst.dyn_cast()) { + } else if (auto loadOp = op.dyn_cast()) { // Create graph node for top-level load op. - Node node(nextNodeId++, &inst); - node.loads.push_back(&inst); - auto *memref = inst.cast().getMemRef(); + Node node(nextNodeId++, &op); + node.loads.push_back(&op); + auto *memref = op.cast().getMemRef(); memrefAccesses[memref].insert(node.id); nodes.insert({node.id, node}); - } else if (auto storeOp = inst.dyn_cast()) { + } else if (auto storeOp = op.dyn_cast()) { // Create graph node for top-level store op. - Node node(nextNodeId++, &inst); - node.stores.push_back(&inst); - auto *memref = inst.cast().getMemRef(); + Node node(nextNodeId++, &op); + node.stores.push_back(&op); + auto *memref = op.cast().getMemRef(); memrefAccesses[memref].insert(node.id); nodes.insert({node.id, node}); - } else if (inst.getNumRegions() != 0) { + } else if (op.getNumRegions() != 0) { // Return false if another region is found (not currently supported). return false; - } else if (inst.getNumResults() > 0 && !inst.use_empty()) { + } else if (op.getNumResults() > 0 && !op.use_empty()) { // Create graph node for top-level producer of SSA values, which // could be used by loop nest nodes. - Node node(nextNodeId++, &inst); + Node node(nextNodeId++, &op); nodes.insert({node.id, node}); } } @@ -689,7 +689,7 @@ bool MemRefDependenceGraph::init(Function &f) { const Node &node = idAndNode.second; if (!node.loads.empty() || !node.stores.empty()) continue; - auto *opInst = node.inst; + auto *opInst = node.op; for (auto *value : opInst->getResults()) { for (auto &use : value->getUses()) { SmallVector loops; @@ -728,11 +728,11 @@ namespace { // and operation count) for a loop nest up until the innermost loop body. struct LoopNestStats { // Map from AffineForOp to immediate child AffineForOps in its loop body. - DenseMap> loopMap; + DenseMap> loopMap; // Map from AffineForOp to count of operations in its loop body. - DenseMap opCountMap; + DenseMap opCountMap; // Map from AffineForOp to its constant trip count. - DenseMap tripCountMap; + DenseMap tripCountMap; }; // LoopNestStatsCollector walks a single loop nest and gathers per-loop @@ -743,8 +743,8 @@ struct LoopNestStatsCollector { LoopNestStatsCollector(LoopNestStats *stats) : stats(stats) {} - void collect(Instruction *inst) { - inst->walk([&](AffineForOp forOp) { + void collect(Operation *op) { + op->walk([&](AffineForOp forOp) { auto *forInst = forOp.getOperation(); auto *parentInst = forOp.getOperation()->getParentOp(); if (parentInst != nullptr) { @@ -753,11 +753,11 @@ struct LoopNestStatsCollector { stats->loopMap[parentInst].push_back(forOp); } - // Record the number of op instructions in the body of 'forOp'. + // Record the number of op operations in the body of 'forOp'. unsigned count = 0; stats->opCountMap[forInst] = 0; - for (auto &inst : *forOp.getBody()) { - if (!inst.isa() && !inst.isa()) + for (auto &op : *forOp.getBody()) { + if (!op.isa() && !op.isa()) ++count; } stats->opCountMap[forInst] = count; @@ -789,9 +789,9 @@ struct LoopNestStatsCollector { // NOTE: this is used to compute the cost of fusing a slice of some loop nest // within another loop. static int64_t getComputeCost( - Instruction *forInst, LoopNestStats *stats, - llvm::SmallDenseMap *tripCountOverrideMap, - DenseMap *computeCostMap) { + Operation *forInst, LoopNestStats *stats, + llvm::SmallDenseMap *tripCountOverrideMap, + DenseMap *computeCostMap) { // 'opCount' is the total number operations in one iteration of 'forOp' body int64_t opCount = stats->opCountMap[forInst]; if (stats->loopMap.count(forInst) > 0) { @@ -843,8 +843,8 @@ static Optional getConstDifference(AffineMap lbMap, AffineMap ubMap) { // was encountered). // TODO(andydavis) Make this work with non-unit step loops. static bool buildSliceTripCountMap( - Instruction *srcOpInst, ComputationSliceState *sliceState, - llvm::SmallDenseMap *tripCountMap) { + Operation *srcOpInst, ComputationSliceState *sliceState, + llvm::SmallDenseMap *tripCountMap) { SmallVector srcLoopIVs; getLoopIVs(*srcOpInst, &srcLoopIVs); unsigned numSrcLoopIVs = srcLoopIVs.size(); @@ -873,12 +873,11 @@ static bool buildSliceTripCountMap( // Removes load operations from 'srcLoads' which operate on 'memref', and // adds them to 'dstLoads'. -static void -moveLoadsAccessingMemrefTo(Value *memref, - SmallVectorImpl *srcLoads, - SmallVectorImpl *dstLoads) { +static void moveLoadsAccessingMemrefTo(Value *memref, + SmallVectorImpl *srcLoads, + SmallVectorImpl *dstLoads) { dstLoads->clear(); - SmallVector srcLoadsToKeep; + SmallVector srcLoadsToKeep; for (auto *load : *srcLoads) { if (load->cast().getMemRef() == memref) dstLoads->push_back(load); @@ -889,7 +888,7 @@ moveLoadsAccessingMemrefTo(Value *memref, } // Returns the innermost common loop depth for the set of operations in 'ops'. -static unsigned getInnermostCommonLoopDepth(ArrayRef ops) { +static unsigned getInnermostCommonLoopDepth(ArrayRef ops) { unsigned numOps = ops.size(); assert(numOps > 0); @@ -917,10 +916,10 @@ static unsigned getInnermostCommonLoopDepth(ArrayRef ops) { // Returns the maximum loop depth at which no dependences between 'loadOpInsts' // and 'storeOpInsts' are satisfied. -static unsigned getMaxLoopDepth(ArrayRef loadOpInsts, - ArrayRef storeOpInsts) { +static unsigned getMaxLoopDepth(ArrayRef loadOpInsts, + ArrayRef storeOpInsts) { // Merge loads and stores into the same array. - SmallVector ops(loadOpInsts.begin(), loadOpInsts.end()); + SmallVector ops(loadOpInsts.begin(), loadOpInsts.end()); ops.append(storeOpInsts.begin(), storeOpInsts.end()); // Compute the innermost common loop depth for loads and stores. @@ -970,7 +969,7 @@ static unsigned getMaxLoopDepth(ArrayRef loadOpInsts, // dependence componenent lexicographically negative. // TODO(andydavis) Move this function to LoopUtils. static bool -computeLoopInterchangePermutation(ArrayRef ops, +computeLoopInterchangePermutation(ArrayRef ops, unsigned maxLoopDepth, SmallVectorImpl *loopPermMap) { // Gather dependence components for dependences between all ops in 'ops' @@ -1054,12 +1053,12 @@ computeLoopInterchangePermutation(ArrayRef ops, // This can increase the loop depth at which we can fuse a slice, since we are // pushing loop carried dependence to a greater depth in the loop nest. static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) { - assert(node->inst->isa()); + assert(node->op->isa()); // Get perfectly nested sequence of loops starting at root of loop nest // (the first op being another AffineFor, and the second op - a terminator). // TODO(andydavis,bondhugula) Share this with similar code in loop tiling. SmallVector loops; - AffineForOp curr = node->inst->cast(); + AffineForOp curr = node->op->cast(); loops.push_back(curr); auto *currBody = curr.getBody(); while (currBody->begin() == std::prev(currBody->end(), 2) && @@ -1071,7 +1070,7 @@ static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) { return; // Merge loads and stores into the same array. - SmallVector memOps(node->loads.begin(), node->loads.end()); + SmallVector memOps(node->loads.begin(), node->loads.end()); memOps.append(node->stores.begin(), node->stores.end()); // Compute loop permutation in 'loopPermMap'. @@ -1091,7 +1090,7 @@ static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) { } } assert(loopNestRootIndex != -1 && "invalid root index"); - node->inst = loops[loopNestRootIndex].getOperation(); + node->op = loops[loopNestRootIndex].getOperation(); } // TODO(mlir-team): improve/complete this when we have target data. @@ -1114,8 +1113,7 @@ unsigned getMemRefEltSizeInBytes(MemRefType memRefType) { // MemRefRegion written to by 'srcStoreOpInst' at depth 'dstLoopDepth'. // TODO(bondhugula): consider refactoring the common code from generateDma and // this one. -static Value *createPrivateMemRef(AffineForOp forOp, - Instruction *srcStoreOpInst, +static Value *createPrivateMemRef(AffineForOp forOp, Operation *srcStoreOpInst, unsigned dstLoopDepth, Optional fastMemorySpace, uint64_t localBufSizeThreshold) { @@ -1228,7 +1226,7 @@ static Value *createPrivateMemRef(AffineForOp forOp, // Does the slice have a single iteration? static uint64_t getSliceIterationCount( - const llvm::SmallDenseMap &sliceTripCountMap) { + const llvm::SmallDenseMap &sliceTripCountMap) { uint64_t iterCount = 1; for (const auto &count : sliceTripCountMap) { iterCount *= count.second; @@ -1275,7 +1273,7 @@ static bool canFuseSrcWhichWritesToLiveOut(unsigned srcId, unsigned dstId, return false; // Compute MemRefRegion 'dstWriteRegion' for 'dstStoreOpInst' on 'memref'. - SmallVector dstStoreOps; + SmallVector dstStoreOps; dstNode->getStoreOpsForMemref(memref, &dstStoreOps); assert(dstStoreOps.size() == 1); auto *dstStoreOpInst = dstStoreOps[0]; @@ -1305,8 +1303,8 @@ static bool canFuseSrcWhichWritesToLiveOut(unsigned srcId, unsigned dstId, // and each load op in 'dstLoadOpInsts' at 'dstLoopDepth', and returns // the union in 'sliceState'. Returns true on success, false otherwise. // TODO(andydavis) Move this to a loop fusion utility function. -static bool getSliceUnion(Instruction *srcOpInst, - ArrayRef dstLoadOpInsts, +static bool getSliceUnion(Operation *srcOpInst, + ArrayRef dstLoadOpInsts, unsigned numSrcLoopIVs, unsigned dstLoopDepth, ComputationSliceState *sliceState) { MemRefAccess srcAccess(srcOpInst); @@ -1415,10 +1413,9 @@ static bool getSliceUnion(Instruction *srcOpInst, // *) Compares the total cost of the unfused loop nests to the min cost fused // loop nest computed in the previous step, and returns true if the latter // is lower. -static bool isFusionProfitable(Instruction *srcOpInst, - Instruction *srcStoreOpInst, - ArrayRef dstLoadOpInsts, - ArrayRef dstStoreOpInsts, +static bool isFusionProfitable(Operation *srcOpInst, Operation *srcStoreOpInst, + ArrayRef dstLoadOpInsts, + ArrayRef dstStoreOpInsts, ComputationSliceState *sliceState, unsigned *dstLoopDepth, bool maximalFusion) { LLVM_DEBUG({ @@ -1492,7 +1489,7 @@ static bool isFusionProfitable(Instruction *srcOpInst, MemRefRegion srcWriteRegion(srcStoreOpInst->getLoc()); if (failed(srcWriteRegion.compute(srcStoreOpInst, /*loopDepth=*/0))) { LLVM_DEBUG(llvm::dbgs() - << "Unable to compute MemRefRegion for source instruction\n."); + << "Unable to compute MemRefRegion for source operation\n."); return false; } @@ -1510,8 +1507,8 @@ static bool isFusionProfitable(Instruction *srcOpInst, // Evaluate all depth choices for materializing the slice in the destination // loop nest. - llvm::SmallDenseMap sliceTripCountMap; - DenseMap computeCostMap; + llvm::SmallDenseMap sliceTripCountMap; + DenseMap computeCostMap; for (unsigned i = maxDstLoopDepth; i >= 1; --i) { // Compute the union of slice bounds of all ops in 'dstLoadOpInsts'. if (!getSliceUnion(srcOpInst, dstLoadOpInsts, numSrcLoopIVs, i, @@ -1754,7 +1751,7 @@ static bool isFusionProfitable(Instruction *srcOpInst, // bounds to be functions of 'dstLoopNest' IVs and symbols. // *) Fuse the 'srcLoopNest' computation slice into the 'dstLoopNest', // at a loop depth determined by the cost model in 'isFusionProfitable'. -// *) Add the newly fused load/store operation instructions to the state, +// *) Add the newly fused load/store operations to the state, // and also add newly fuse load ops to 'dstLoopOps' to be considered // as fusion dst load ops in another iteration. // *) Remove old src loop nest and its associated state. @@ -1773,7 +1770,7 @@ static bool isFusionProfitable(Instruction *srcOpInst, // is preserved in the fused loop nest. // *) Update graph state to reflect the fusion of 'sibNode' into 'dstNode'. // -// Given a graph where top-level instructions are vertices in the set 'V' and +// Given a graph where top-level operations are vertices in the set 'V' and // edges in the set 'E' are dependences between vertices, this algorithm // takes O(V) time for initialization, and has runtime O(V + E). // @@ -1844,7 +1841,7 @@ public: // Get 'dstNode' into which to attempt fusion. auto *dstNode = mdg->getNode(dstId); // Skip if 'dstNode' is not a loop nest. - if (!dstNode->inst->isa()) + if (!dstNode->op->isa()) continue; // Sink sequential loops in 'dstNode' (and thus raise parallel loops) // while preserving relative order. This can increase the maximum loop @@ -1852,8 +1849,8 @@ public: // consumer loop nest. sinkSequentialLoops(dstNode); - SmallVector loads = dstNode->loads; - SmallVector dstLoadOpInsts; + SmallVector loads = dstNode->loads; + SmallVector dstLoadOpInsts; DenseSet visitedMemrefs; while (!loads.empty()) { // Get memref of load on top of the stack. @@ -1882,7 +1879,7 @@ public: // Get 'srcNode' from which to attempt fusion into 'dstNode'. auto *srcNode = mdg->getNode(srcId); // Skip if 'srcNode' is not a loop nest. - if (!srcNode->inst->isa()) + if (!srcNode->op->isa()) continue; // Skip if 'srcNode' has more than one store to any memref. // TODO(andydavis) Support fusing multi-output src loop nests. @@ -1908,9 +1905,9 @@ public: if (mdg->getOutEdgeCount(srcNode->id, memref) > maxSrcUserCount) continue; - // Compute an instruction list insertion point for the fused loop + // Compute an operation list insertion point for the fused loop // nest which preserves dependences. - Instruction *insertPointInst = + Operation *insertPointInst = mdg->getFusedLoopNestInsertionPoint(srcNode->id, dstNode->id); if (insertPointInst == nullptr) continue; @@ -1918,7 +1915,7 @@ public: // Get unique 'srcNode' store op. auto *srcStoreOpInst = srcNode->stores.front(); // Gather 'dstNode' store ops to 'memref'. - SmallVector dstStoreOpInsts; + SmallVector dstStoreOpInsts; for (auto *storeOpInst : dstNode->stores) if (storeOpInst->cast().getMemRef() == memref) dstStoreOpInsts.push_back(storeOpInst); @@ -1938,7 +1935,7 @@ public: LLVM_DEBUG(llvm::dbgs() << "\tslice loop nest:\n" << *sliceLoopNest.getOperation() << "\n"); // Move 'dstAffineForOp' before 'insertPointInst' if needed. - auto dstAffineForOp = dstNode->inst->cast(); + auto dstAffineForOp = dstNode->op->cast(); if (insertPointInst != dstAffineForOp.getOperation()) { dstAffineForOp.getOperation()->moveBefore(insertPointInst); } @@ -1954,7 +1951,7 @@ public: } if (!writesToLiveInOrOut) { // Create private memref for 'memref' in 'dstAffineForOp'. - SmallVector storesForMemref; + SmallVector storesForMemref; for (auto *storeOpInst : sliceCollector.storeOpInsts) { if (storeOpInst->cast().getMemRef() == memref) storesForMemref.push_back(storeOpInst); @@ -1995,7 +1992,7 @@ public: // so it is safe to remove. if (writesToLiveInOrOut || mdg->canRemoveNode(srcNode->id)) { mdg->removeNode(srcNode->id); - srcNode->inst->erase(); + srcNode->op->erase(); } else { // Add remaining users of 'oldMemRef' back on the worklist (if not // already there), as its replacement with a local/private memref @@ -2034,7 +2031,7 @@ public: // Get 'dstNode' into which to attempt fusion. auto *dstNode = mdg->getNode(dstId); // Skip if 'dstNode' is not a loop nest. - if (!dstNode->inst->isa()) + if (!dstNode->op->isa()) continue; // Attempt to fuse 'dstNode' with its sibling nodes in the graph. fuseWithSiblingNodes(dstNode); @@ -2051,11 +2048,11 @@ public: // TODO(andydavis) Check that 'sibStoreOpInst' post-dominates all other // stores to the same memref in 'sibNode' loop nest. auto *sibNode = mdg->getNode(sibId); - // Compute an instruction list insertion point for the fused loop + // Compute an operation list insertion point for the fused loop // nest which preserves dependences. - assert(sibNode->inst->getBlock() == dstNode->inst->getBlock()); - Instruction *insertPointInst = - sibNode->inst->isBeforeInBlock(dstNode->inst) + assert(sibNode->op->getBlock() == dstNode->op->getBlock()); + Operation *insertPointInst = + sibNode->op->isBeforeInBlock(dstNode->op) ? mdg->getFusedLoopNestInsertionPoint(sibNode->id, dstNode->id) : mdg->getFusedLoopNestInsertionPoint(dstNode->id, sibNode->id); if (insertPointInst == nullptr) @@ -2064,21 +2061,21 @@ public: // Check if fusion would be profitable and at what depth. // Get unique 'sibNode' load op to 'memref'. - SmallVector sibLoadOpInsts; + SmallVector sibLoadOpInsts; sibNode->getLoadOpsForMemref(memref, &sibLoadOpInsts); // Currently findSiblingNodeToFuse searches for siblings with one load. assert(sibLoadOpInsts.size() == 1); - Instruction *sibLoadOpInst = sibLoadOpInsts[0]; + Operation *sibLoadOpInst = sibLoadOpInsts[0]; assert(!sibNode->stores.empty()); // TODO(andydavis) Choose the store which postdominates all other stores. auto *sibStoreOpInst = sibNode->stores.back(); // Gather 'dstNode' load ops to 'memref'. - SmallVector dstLoadOpInsts; + SmallVector dstLoadOpInsts; dstNode->getLoadOpsForMemref(memref, &dstLoadOpInsts); // Gather 'dstNode' store ops to 'memref'. - SmallVector dstStoreOpInsts; + SmallVector dstStoreOpInsts; dstNode->getStoreOpsForMemref(memref, &dstStoreOpInsts); unsigned bestDstLoopDepth; @@ -2094,8 +2091,8 @@ public: auto sliceLoopNest = mlir::insertBackwardComputationSlice( sibLoadOpInst, dstLoadOpInsts[0], bestDstLoopDepth, &sliceState); if (sliceLoopNest != nullptr) { - auto dstForInst = dstNode->inst->cast(); - // Update instruction position of fused loop nest (if needed). + auto dstForInst = dstNode->op->cast(); + // Update operation position of fused loop nest (if needed). if (insertPointInst != dstForInst.getOperation()) { dstForInst.getOperation()->moveBefore(insertPointInst); } @@ -2140,7 +2137,7 @@ public: if (outEdge.id == dstNode->id || outEdge.value != inEdge.value) return; auto *sibNode = mdg->getNode(sibNodeId); - if (!sibNode->inst->isa()) + if (!sibNode->op->isa()) return; // Skip if 'outEdge' is not a read-after-write dependence. // TODO(andydavis) Remove restrict to single load op restriction. @@ -2196,7 +2193,7 @@ public: } // Collect dst loop stats after memref privatizaton transformation. - auto dstForInst = dstNode->inst->cast(); + auto dstForInst = dstNode->op->cast(); LoopNestStateCollector dstLoopCollector; dstLoopCollector.collect(dstForInst.getOperation()); // Clear and add back loads and stores @@ -2208,7 +2205,7 @@ public: // function. if (mdg->getOutEdgeCount(sibNode->id) == 0) { mdg->removeNode(sibNode->id); - sibNode->inst->cast().erase(); + sibNode->op->cast().erase(); } } @@ -2218,13 +2215,13 @@ public: if (pair.second > 0) continue; auto *memref = pair.first; - // Skip if there exist other uses (return instruction or function calls). + // Skip if there exist other uses (return operation or function calls). if (!memref->use_empty()) continue; // Use list expected to match the dep graph info. - auto *inst = memref->getDefiningOp(); - if (inst && inst->isa()) - inst->erase(); + auto *op = memref->getDefiningOp(); + if (op && op->isa()) + op->erase(); } } }; diff --git a/mlir/lib/Transforms/LoopTiling.cpp b/mlir/lib/Transforms/LoopTiling.cpp index f99b602cf0b2..f7fef1a428cd 100644 --- a/mlir/lib/Transforms/LoopTiling.cpp +++ b/mlir/lib/Transforms/LoopTiling.cpp @@ -180,7 +180,7 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef band, assert(!band.empty()); assert(band.size() == tileSizes.size() && "Incorrect number of tile sizes"); - // Check if the supplied for inst's are all successively nested. + // Check if the supplied for op's are all successively nested. for (unsigned i = 1, e = band.size(); i < e; i++) { assert(band[i].getOperation()->getParentOp() == band[i - 1].getOperation()); } @@ -269,8 +269,8 @@ static void getTileableBands(Function &f, }; for (auto &block : f) - for (auto &inst : block) - if (auto forOp = inst.dyn_cast()) + for (auto &op : block) + if (auto forOp = op.dyn_cast()) getMaximalPerfectLoopNest(forOp); } diff --git a/mlir/lib/Transforms/LoopUnroll.cpp b/mlir/lib/Transforms/LoopUnroll.cpp index 5687c6126d1b..3b79d6245beb 100644 --- a/mlir/lib/Transforms/LoopUnroll.cpp +++ b/mlir/lib/Transforms/LoopUnroll.cpp @@ -79,7 +79,7 @@ struct LoopUnroll : public FunctionPass { void runOnFunction() override; - /// Unroll this for inst. Returns failure if nothing was done. + /// Unroll this for op. Returns failure if nothing was done. LogicalResult runOnAffineForOp(AffineForOp forOp); static const unsigned kDefaultUnrollFactor = 4; @@ -106,7 +106,7 @@ void LoopUnroll::runOnFunction() { hasInnerLoops |= walkPostOrder(&(*Start++)); return hasInnerLoops; } - bool walkPostOrder(Instruction *opInst) { + bool walkPostOrder(Operation *opInst) { bool hasInnerLoops = false; for (auto ®ion : opInst->getRegions()) for (auto &block : region) @@ -158,7 +158,7 @@ void LoopUnroll::runOnFunction() { } } -/// Unrolls a 'affine.for' inst. Returns success if the loop was unrolled, +/// Unrolls a 'affine.for' op. Returns success if the loop was unrolled, /// failure otherwise. The default unroll factor is 4. LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) { // Use the function callback if one was provided. diff --git a/mlir/lib/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Transforms/LoopUnrollAndJam.cpp index 3ea20c0c2829..a3a24f6c0f78 100644 --- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp +++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp @@ -17,7 +17,7 @@ // // This file implements loop unroll and jam. Unroll and jam is a transformation // that improves locality, in particular, register reuse, while also improving -// instruction level parallelism. The example below shows what it does in nearly +// operation level parallelism. The example below shows what it does in nearly // the general case. Loop unroll and jam currently works if the bounds of the // loops inner to the loop being unroll-jammed do not depend on the latter. // @@ -39,7 +39,7 @@ // S6(i+1); // // Note: 'if/else' blocks are not jammed. So, if there are loops inside if -// inst's, bodies of those loops will not be jammed. +// op's, bodies of those loops will not be jammed. //===----------------------------------------------------------------------===// #include "mlir/Transforms/Passes.h" @@ -96,7 +96,7 @@ void LoopUnrollAndJam::runOnFunction() { runOnAffineForOp(forOp); } -/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is +/// Unroll and jam a 'affine.for' op. Default unroll jam factor is /// kDefaultUnrollJamFactor. Return failure if nothing was done. LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) { // Unroll and jam by the factor that was passed if any. @@ -123,16 +123,16 @@ LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp, /// Unrolls and jams this loop by the specified factor. LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp, uint64_t unrollJamFactor) { - // Gathers all maximal sub-blocks of instructions that do not themselves - // include a for inst (a instruction could have a descendant for inst though + // Gathers all maximal sub-blocks of operations that do not themselves + // include a for op (a operation could have a descendant for op though // in its tree). Ignore the block terminators. struct JamBlockGatherer { - // Store iterators to the first and last inst of each sub-block found. + // Store iterators to the first and last op of each sub-block found. std::vector> subBlocks; // This is a linear time walk. - void walk(Instruction *inst) { - for (auto ®ion : inst->getRegions()) + void walk(Operation *op) { + for (auto ®ion : op->getRegions()) for (auto &block : region) walk(block); } diff --git a/mlir/lib/Transforms/LowerAffine.cpp b/mlir/lib/Transforms/LowerAffine.cpp index acc9481e89cd..3676c2faae95 100644 --- a/mlir/lib/Transforms/LowerAffine.cpp +++ b/mlir/lib/Transforms/LowerAffine.cpp @@ -32,7 +32,7 @@ using namespace mlir; namespace { -// Visit affine expressions recursively and build the sequence of instructions +// Visit affine expressions recursively and build the sequence of operations // that correspond to it. Visitation functions return an Value of the // expression subtree they visited or `nullptr` on error. class AffineApplyExpander @@ -102,7 +102,7 @@ public: // Floor division operation (rounds towards negative infinity). // // For positive divisors, it can be implemented without branching and with a - // single division instruction as + // single division operation as // // a floordiv b = // let negative = a < 0 in @@ -144,7 +144,7 @@ public: // Ceiling division operation (rounds towards positive infinity). // // For positive divisors, it can be implemented without branching and with a - // single division instruction as + // single division operation as // // a ceildiv b = // let negative = a <= 0 in @@ -213,7 +213,7 @@ private: }; } // namespace -// Create a sequence of instructions that implement the `expr` applied to the +// Create a sequence of operations that implement the `expr` applied to the // given dimension and symbol values. static mlir::Value *expandAffineExpr(FuncBuilder *builder, Location loc, AffineExpr expr, @@ -222,7 +222,7 @@ static mlir::Value *expandAffineExpr(FuncBuilder *builder, Location loc, return AffineApplyExpander(builder, dimValues, symbolValues, loc).visit(expr); } -// Create a sequence of instructions that implement the `affineMap` applied to +// Create a sequence of operations that implement the `affineMap` applied to // the given `operands` (as it it were an AffineApplyOp). Optional> static expandAffineMap( FuncBuilder *builder, Location loc, AffineMap affineMap, @@ -395,16 +395,16 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) { return false; } -// Convert an "if" instruction into a flow of basic blocks. +// Convert an "if" operation into a flow of basic blocks. // -// Create an SESE region for the if instruction (including its "then" and -// optional "else" instruction blocks) and append it to the end of the current +// Create an SESE region for the if operation (including its "then" and +// optional "else" operation blocks) and append it to the end of the current // region. The conditional region consists of a sequence of condition-checking // blocks that implement the short-circuit scheme, followed by a "then" SESE // region and an "else" SESE region, and the continuation block that -// post-dominates all blocks of the "if" instruction. The flow of blocks that +// post-dominates all blocks of the "if" operation. The flow of blocks that // correspond to the "then" and "else" clauses are constructed recursively, -// enabling easy nesting of "if" instructions and if-then-else-if chains. +// enabling easy nesting of "if" operations and if-then-else-if chains. // // +--------------------------------+ // | | @@ -465,12 +465,12 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) { auto *continueBlock = condBlock->splitBlock(ifInst); // Create a block for the 'then' code, inserting it between the cond and - // continue blocks. Move the instructions over from the AffineIfOp and add a + // continue blocks. Move the operations over from the AffineIfOp and add a // branch to the continuation point. Block *thenBlock = new Block(); thenBlock->insertBefore(continueBlock); - // If the 'then' block is not empty, then splice the instructions except for + // If the 'then' block is not empty, then splice the operations except for // the terminator. auto &oldThenBlocks = ifOp.getThenBlocks(); if (!oldThenBlocks.empty()) { @@ -570,7 +570,7 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) { } // Convert an "affine.apply" operation into a sequence of arithmetic -// instructions using the StandardOps dialect. Return true on error. +// operations using the StandardOps dialect. Return true on error. bool LowerAffinePass::lowerAffineApply(AffineApplyOp op) { FuncBuilder builder(op.getOperation()); auto maybeExpandedMap = @@ -590,12 +590,12 @@ bool LowerAffinePass::lowerAffineApply(AffineApplyOp op) { // Entry point of the function convertor. // -// Conversion is performed by recursively visiting instructions of a Function. +// Conversion is performed by recursively visiting operations of a Function. // It reasons in terms of single-entry single-exit (SESE) regions that are not // materialized in the code. Instead, the pointer to the last block of the // region is maintained throughout the conversion as the insertion point of the // IR builder since we never change the first block after its creation. "Block" -// instructions such as loops and branches create new SESE regions for their +// operations such as loops and branches create new SESE regions for their // bodies, and surround them with additional basic blocks for the control flow. // Individual operations are simply appended to the end of the last basic block // of the current region. The SESE invariant allows us to easily handle nested @@ -607,32 +607,32 @@ bool LowerAffinePass::lowerAffineApply(AffineApplyOp op) { // corresponding Value that has been defined previously. The value flow // starts with function arguments converted to basic block arguments. void LowerAffinePass::runOnFunction() { - SmallVector instsToRewrite; + SmallVector instsToRewrite; - // Collect all the For instructions as well as AffineIfOps and AffineApplyOps. + // Collect all the For operations as well as AffineIfOps and AffineApplyOps. // We do this as a prepass to avoid invalidating the walker with our rewrite. - getFunction().walk([&](Instruction *inst) { - if (inst->isa() || inst->isa() || - inst->isa()) - instsToRewrite.push_back(inst); + getFunction().walk([&](Operation *op) { + if (op->isa() || op->isa() || + op->isa()) + instsToRewrite.push_back(op); }); - // Rewrite all of the ifs and fors. We walked the instructions in preorder, + // Rewrite all of the ifs and fors. We walked the operations in preorder, // so we know that we will rewrite them in the same order. - for (auto *inst : instsToRewrite) { - if (auto ifOp = inst->dyn_cast()) { + for (auto *op : instsToRewrite) { + if (auto ifOp = op->dyn_cast()) { if (lowerAffineIf(ifOp)) return signalPassFailure(); - } else if (auto forOp = inst->dyn_cast()) { + } else if (auto forOp = op->dyn_cast()) { if (lowerAffineFor(forOp)) return signalPassFailure(); - } else if (lowerAffineApply(inst->cast())) { + } else if (lowerAffineApply(op->cast())) { return signalPassFailure(); } } } -/// Lowers If and For instructions within a function into their lower level CFG +/// Lowers If and For operations within a function into their lower level CFG /// equivalent blocks. FunctionPassBase *mlir::createLowerAffinePass() { return new LowerAffinePass(); @@ -640,4 +640,4 @@ FunctionPassBase *mlir::createLowerAffinePass() { static PassRegistration pass("lower-affine", - "Lower If, For, AffineApply instructions to primitive equivalents"); + "Lower If, For, AffineApply operations to primitive equivalents"); diff --git a/mlir/lib/Transforms/LowerVectorTransfers.cpp b/mlir/lib/Transforms/LowerVectorTransfers.cpp index 708ad7d1693a..0e5a8680f77c 100644 --- a/mlir/lib/Transforms/LowerVectorTransfers.cpp +++ b/mlir/lib/Transforms/LowerVectorTransfers.cpp @@ -356,12 +356,12 @@ public: explicit VectorTransferExpander(MLIRContext *context) : MLLoweringPattern(VectorTransferOpTy::getOperationName(), 1, context) {} - PatternMatchResult match(Instruction *op) const override { + PatternMatchResult match(Operation *op) const override { if (m_Op().match(op)) return matchSuccess(); return matchFailure(); } - void rewriteOpInst(Instruction *op, MLFuncGlobalLoweringState *funcWiseState, + void rewriteOpInst(Operation *op, MLFuncGlobalLoweringState *funcWiseState, std::unique_ptr opState, MLFuncLoweringRewriter *rewriter) const override { VectorTransferRewriter( diff --git a/mlir/lib/Transforms/MaterializeVectors.cpp b/mlir/lib/Transforms/MaterializeVectors.cpp index 2a877c456805..7e4a459326f9 100644 --- a/mlir/lib/Transforms/MaterializeVectors.cpp +++ b/mlir/lib/Transforms/MaterializeVectors.cpp @@ -55,7 +55,7 @@ /// to the pass. This pass is thus a partial lowering that opens the "greybox" /// that is the super-vector abstraction. In particular, this pass can turn the /// vector_transfer_read and vector_transfer_write ops in either: -/// 1. a loop nest with either scalar and vector load/store instructions; or +/// 1. a loop nest with either scalar and vector load/store operations; or /// 2. a loop-nest with DmaStartOp / DmaWaitOp; or /// 3. a pre-existing blackbox library call that can be written manually or /// synthesized using search and superoptimization. @@ -239,9 +239,9 @@ static SmallVector delinearize(unsigned linearIndex, return res; } -static Instruction *instantiate(FuncBuilder *b, Instruction *opInst, - VectorType hwVectorType, - DenseMap *substitutionsMap); +static Operation *instantiate(FuncBuilder *b, Operation *opInst, + VectorType hwVectorType, + DenseMap *substitutionsMap); /// Not all Values belong to a program slice scoped within the immediately /// enclosing loop. @@ -259,9 +259,8 @@ static Value *substitute(Value *v, VectorType hwVectorType, auto *opInst = v->getDefiningOp(); if (opInst->isa()) { FuncBuilder b(opInst); - auto *inst = instantiate(&b, opInst, hwVectorType, substitutionsMap); - auto res = - substitutionsMap->insert(std::make_pair(v, inst->getResult(0))); + auto *op = instantiate(&b, opInst, hwVectorType, substitutionsMap); + auto res = substitutionsMap->insert(std::make_pair(v, op->getResult(0))); assert(res.second && "Insertion failed"); return res.first->second; } @@ -384,7 +383,7 @@ reindexAffineIndices(FuncBuilder *b, VectorType hwVectorType, /// - constant splat is replaced by constant splat of `hwVectorType`. /// TODO(ntv): add more substitutions on a per-need basis. static SmallVector -materializeAttributes(Instruction *opInst, VectorType hwVectorType) { +materializeAttributes(Operation *opInst, VectorType hwVectorType) { SmallVector res; for (auto a : opInst->getAttrs()) { if (auto splat = a.second.dyn_cast()) { @@ -404,9 +403,9 @@ materializeAttributes(Instruction *opInst, VectorType hwVectorType) { /// substitutionsMap. /// /// If the underlying substitution fails, this fails too and returns nullptr. -static Instruction *instantiate(FuncBuilder *b, Instruction *opInst, - VectorType hwVectorType, - DenseMap *substitutionsMap) { +static Operation *instantiate(FuncBuilder *b, Operation *opInst, + VectorType hwVectorType, + DenseMap *substitutionsMap) { assert(!opInst->isa() && "Should call the function specialized for VectorTransferReadOp"); assert(!opInst->isa() && @@ -481,10 +480,10 @@ static AffineMap projectedPermutationMap(VectorTransferOpTy transfer, /// `hwVectorType` int the covering of the super-vector type. For a more /// detailed description of the problem, see the description of /// reindexAffineIndices. -static Instruction *instantiate(FuncBuilder *b, VectorTransferReadOp read, - VectorType hwVectorType, - ArrayRef hwVectorInstance, - DenseMap *substitutionsMap) { +static Operation *instantiate(FuncBuilder *b, VectorTransferReadOp read, + VectorType hwVectorType, + ArrayRef hwVectorInstance, + DenseMap *substitutionsMap) { SmallVector indices = map(makePtrDynCaster(), read.getIndices()); auto affineIndices = @@ -505,10 +504,10 @@ static Instruction *instantiate(FuncBuilder *b, VectorTransferReadOp read, /// `hwVectorType` int the covering of th3e super-vector type. For a more /// detailed description of the problem, see the description of /// reindexAffineIndices. -static Instruction *instantiate(FuncBuilder *b, VectorTransferWriteOp write, - VectorType hwVectorType, - ArrayRef hwVectorInstance, - DenseMap *substitutionsMap) { +static Operation *instantiate(FuncBuilder *b, VectorTransferWriteOp write, + VectorType hwVectorType, + ArrayRef hwVectorInstance, + DenseMap *substitutionsMap) { SmallVector indices = map(makePtrDynCaster(), write.getIndices()); auto affineIndices = @@ -521,11 +520,11 @@ static Instruction *instantiate(FuncBuilder *b, VectorTransferWriteOp write, return cloned.getOperation(); } -/// Returns `true` if inst instance is properly cloned and inserted, false +/// Returns `true` if op instance is properly cloned and inserted, false /// otherwise. /// The multi-dimensional `hwVectorInstance` belongs to the shapeRatio of /// super-vector type to hw vector type. -/// A cloned instance of `inst` is formed as follows: +/// A cloned instance of `op` is formed as follows: /// 1. vector_transfer_read: the return `superVectorType` is replaced by /// `hwVectorType`. Additionally, affine indices are reindexed with /// `reindexAffineIndices` using `hwVectorInstance` and vector type @@ -542,26 +541,26 @@ static Instruction *instantiate(FuncBuilder *b, VectorTransferWriteOp write, /// possible. /// /// Returns true on failure. -static bool instantiateMaterialization(Instruction *inst, +static bool instantiateMaterialization(Operation *op, MaterializationState *state) { - LLVM_DEBUG(dbgs() << "\ninstantiate: " << *inst); + LLVM_DEBUG(dbgs() << "\ninstantiate: " << *op); // Create a builder here for unroll-and-jam effects. - FuncBuilder b(inst); + FuncBuilder b(op); // AffineApplyOp are ignored: instantiating the proper vector op will take // care of AffineApplyOps by composing them properly. - if (inst->isa()) { + if (op->isa()) { return false; } - if (inst->getNumRegions() != 0) - return inst->emitError("NYI path Op with region"); + if (op->getNumRegions() != 0) + return op->emitError("NYI path Op with region"); - if (auto write = inst->dyn_cast()) { + if (auto write = op->dyn_cast()) { auto *clone = instantiate(&b, write, state->hwVectorType, state->hwVectorInstance, state->substitutionsMap); return clone == nullptr; } - if (auto read = inst->dyn_cast()) { + if (auto read = op->dyn_cast()) { auto *clone = instantiate(&b, read, state->hwVectorType, state->hwVectorInstance, state->substitutionsMap); if (!clone) { @@ -574,19 +573,19 @@ static bool instantiateMaterialization(Instruction *inst, // The only op with 0 results reaching this point must, by construction, be // VectorTransferWriteOps and have been caught above. Ops with >= 2 results // are not yet supported. So just support 1 result. - if (inst->getNumResults() != 1) { - return inst->emitError("NYI: ops with != 1 results"); + if (op->getNumResults() != 1) { + return op->emitError("NYI: ops with != 1 results"); } - if (inst->getResult(0)->getType() != state->superVectorType) { - return inst->emitError("Op does not return a supervector."); + if (op->getResult(0)->getType() != state->superVectorType) { + return op->emitError("Op does not return a supervector."); } auto *clone = - instantiate(&b, inst, state->hwVectorType, state->substitutionsMap); + instantiate(&b, op, state->hwVectorType, state->substitutionsMap); if (!clone) { return true; } state->substitutionsMap->insert( - std::make_pair(inst->getResult(0), clone->getResult(0))); + std::make_pair(op->getResult(0), clone->getResult(0))); return false; } @@ -612,7 +611,7 @@ static bool instantiateMaterialization(Instruction *inst, /// TODO(ntv): full loops + materialized allocs. /// TODO(ntv): partial unrolling + materialized allocs. static bool emitSlice(MaterializationState *state, - SetVector *slice) { + SetVector *slice) { auto ratio = shapeRatio(state->superVectorType, state->hwVectorType); assert(ratio.hasValue() && "ratio of super-vector to HW-vector shape is not integral"); @@ -627,10 +626,10 @@ static bool emitSlice(MaterializationState *state, DenseMap substitutionMap; scopedState.substitutionsMap = &substitutionMap; // slice are topologically sorted, we can just clone them in order. - for (auto *inst : *slice) { - auto fail = instantiateMaterialization(inst, &scopedState); + for (auto *op : *slice) { + auto fail = instantiateMaterialization(op, &scopedState); if (fail) { - inst->emitError("Unhandled super-vector materialization failure"); + op->emitError("Unhandled super-vector materialization failure"); return true; } } @@ -653,7 +652,7 @@ static bool emitSlice(MaterializationState *state, /// Materializes super-vector types into concrete hw vector types as follows: /// 1. start from super-vector terminators (current vector_transfer_write /// ops); -/// 2. collect all the instructions that can be reached by transitive use-defs +/// 2. collect all the operations that can be reached by transitive use-defs /// chains; /// 3. get the superVectorType for this particular terminator and the /// corresponding hardware vector type (for now limited to F32) @@ -664,14 +663,13 @@ static bool emitSlice(MaterializationState *state, /// Notes /// ===== /// The `slice` is sorted in topological order by construction. -/// Additionally, this set is limited to instructions in the same lexical scope +/// Additionally, this set is limited to operations in the same lexical scope /// because we currently disallow vectorization of defs that come from another /// scope. /// TODO(ntv): please document return value. -static bool materialize(Function *f, - const SetVector &terminators, +static bool materialize(Function *f, const SetVector &terminators, MaterializationState *state) { - DenseSet seen; + DenseSet seen; DominanceInfo domInfo(f); for (auto *term : terminators) { // Short-circuit test, a given terminator may have been reached by some @@ -688,15 +686,15 @@ static bool materialize(Function *f, // Note for the justification of this restriction. // TODO(ntv): relax scoping constraints. auto *enclosingScope = term->getParentOp(); - auto keepIfInSameScope = [enclosingScope, &domInfo](Instruction *inst) { - assert(inst && "NULL inst"); + auto keepIfInSameScope = [enclosingScope, &domInfo](Operation *op) { + assert(op && "NULL op"); if (!enclosingScope) { // by construction, everyone is always under the top scope (null scope). return true; } - return domInfo.properlyDominates(enclosingScope, inst); + return domInfo.properlyDominates(enclosingScope, op); }; - SetVector slice = + SetVector slice = getSlice(term, keepIfInSameScope, keepIfInSameScope); assert(!slice.empty()); @@ -749,16 +747,16 @@ void MaterializeVectorsPass::runOnFunction() { // Capture terminators; i.e. vector_transfer_write ops involving a strict // super-vector of subVectorType. - auto filter = [subVectorType](Instruction &inst) { - if (!inst.isa()) { + auto filter = [subVectorType](Operation &op) { + if (!op.isa()) { return false; } - return matcher::operatesOnSuperVectors(inst, subVectorType); + return matcher::operatesOnSuperVectors(op, subVectorType); }; auto pat = Op(filter); SmallVector matches; pat.match(f, &matches); - SetVector terminators; + SetVector terminators; for (auto m : matches) { terminators.insert(m.getMatchedOperation()); } diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp index 9779ab78a3fe..a579d4393689 100644 --- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp +++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp @@ -54,8 +54,8 @@ namespace { // iteration of the innermost loop enclosing both the store op and the load op. // // (* A dependence being satisfied at a block: a dependence that is satisfied by -// virtue of the destination instruction appearing textually / lexically after -// the source instruction within the body of a 'affine.for' instruction; thus, a +// virtue of the destination operation appearing textually / lexically after +// the source operation within the body of a 'affine.for' operation; thus, a // dependence is always either satisfied by a loop or by a block). // // The above conditions are simple to check, sufficient, and powerful for most @@ -77,7 +77,7 @@ struct MemRefDataFlowOpt : public FunctionPass { // A list of memref's that are potentially dead / could be eliminated. SmallPtrSet memrefsToErase; // Load op's whose results were replaced by those forwarded from stores. - std::vector loadOpsToErase; + std::vector loadOpsToErase; DominanceInfo *domInfo = nullptr; PostDominanceInfo *postDomInfo = nullptr; @@ -94,13 +94,13 @@ FunctionPassBase *mlir::createMemRefDataFlowOptPass() { // This is a straightforward implementation not optimized for speed. Optimize // this in the future if needed. void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) { - Instruction *lastWriteStoreOp = nullptr; - Instruction *loadOpInst = loadOp.getOperation(); + Operation *lastWriteStoreOp = nullptr; + Operation *loadOpInst = loadOp.getOperation(); // First pass over the use list to get minimum number of surrounding // loops common between the load op and the store op, with min taken across // all store ops. - SmallVector storeOps; + SmallVector storeOps; unsigned minSurroundingLoops = getNestingDepth(*loadOpInst); for (InstOperand &use : loadOp.getMemRef()->getUses()) { auto storeOp = use.getOwner()->dyn_cast(); @@ -119,11 +119,11 @@ void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) { // and loadOp. // The list of store op candidates for forwarding - need to satisfy the // conditions listed at the top. - SmallVector fwdingCandidates; + SmallVector fwdingCandidates; // Store ops that have a dependence into the load (even if they aren't // forwarding candidates). Each forwarding candidate will be checked for a // post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores. - SmallVector depSrcStores; + SmallVector depSrcStores; for (auto *storeOpInst : storeOps) { MemRefAccess srcAccess(storeOpInst); MemRefAccess destAccess(loadOpInst); @@ -186,7 +186,7 @@ void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) { // that postdominates all 'depSrcStores' (if such a store exists) is the // unique store providing the value to the load, i.e., provably the last // writer to that memref loc. - if (llvm::all_of(depSrcStores, [&](Instruction *depStore) { + if (llvm::all_of(depSrcStores, [&](Operation *depStore) { return postDomInfo->postDominates(storeOpInst, depStore); })) { lastWriteStoreOp = storeOpInst; @@ -236,9 +236,9 @@ void MemRefDataFlowOpt::runOnFunction() { // to do this as well, but we'll do it here since we collected these anyway. for (auto *memref : memrefsToErase) { // If the memref hasn't been alloc'ed in this function, skip. - Instruction *defInst = memref->getDefiningOp(); + Operation *defInst = memref->getDefiningOp(); if (!defInst || !defInst->isa()) - // TODO(mlir-team): if the memref was returned by a 'call' instruction, we + // TODO(mlir-team): if the memref was returned by a 'call' operation, we // could still erase it if the call had no side-effects. continue; if (std::any_of(memref->use_begin(), memref->use_end(), diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp index a7d37161aa17..667aad2f79db 100644 --- a/mlir/lib/Transforms/PipelineDataTransfer.cpp +++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp @@ -53,23 +53,23 @@ FunctionPassBase *mlir::createPipelineDataTransferPass() { return new PipelineDataTransfer(); } -// Returns the position of the tag memref operand given a DMA instruction. +// Returns the position of the tag memref operand given a DMA operation. // Temporary utility: will be replaced when DmaStart/DmaFinish abstract op's are // added. TODO(b/117228571) -static unsigned getTagMemRefPos(Instruction &dmaInst) { +static unsigned getTagMemRefPos(Operation &dmaInst) { assert(dmaInst.isa() || dmaInst.isa()); if (dmaInst.isa()) { // Second to last operand. return dmaInst.getNumOperands() - 2; } - // First operand for a dma finish instruction. + // First operand for a dma finish operation. return 0; } /// Doubles the buffer of the supplied memref on the specified 'affine.for' -/// instruction by adding a leading dimension of size two to the memref. +/// operation by adding a leading dimension of size two to the memref. /// Replaces all uses of the old memref by the new one while indexing the newly -/// added dimension by the loop IV of the specified 'affine.for' instruction +/// added dimension by the loop IV of the specified 'affine.for' operation /// modulo 2. Returns false if such a replacement cannot be performed. static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) { auto *forBody = forOp.getBody(); @@ -104,7 +104,7 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) { dynamicDimCount++)); } - // Create and place the alloc right before the 'affine.for' instruction. + // Create and place the alloc right before the 'affine.for' operation. Value *newMemRef = bOuter.create(forInst->getLoc(), newMemRefType, allocOperands); @@ -139,7 +139,7 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) { /// Returns success if the IR is in a valid state. void PipelineDataTransfer::runOnFunction() { // Do a post order walk so that inner loop DMAs are processed first. This is - // necessary since 'affine.for' instructions nested within would otherwise + // necessary since 'affine.for' operations nested within would otherwise // become invalid (erased) when the outer loop is pipelined (the pipelined one // gets deleted and replaced by a prologue, a new steady-state loop and an // epilogue). @@ -173,27 +173,27 @@ static bool checkTagMatch(DmaStartOp startOp, DmaWaitOp waitOp) { return true; } -// Identify matching DMA start/finish instructions to overlap computation with. +// Identify matching DMA start/finish operations to overlap computation with. static void findMatchingStartFinishInsts( AffineForOp forOp, - SmallVectorImpl> &startWaitPairs) { + SmallVectorImpl> &startWaitPairs) { - // Collect outgoing DMA instructions - needed to check for dependences below. + // Collect outgoing DMA operations - needed to check for dependences below. SmallVector outgoingDmaOps; - for (auto &inst : *forOp.getBody()) { - auto dmaStartOp = inst.dyn_cast(); + for (auto &op : *forOp.getBody()) { + auto dmaStartOp = op.dyn_cast(); if (dmaStartOp && dmaStartOp.isSrcMemorySpaceFaster()) outgoingDmaOps.push_back(dmaStartOp); } - SmallVector dmaStartInsts, dmaFinishInsts; - for (auto &inst : *forOp.getBody()) { - // Collect DMA finish instructions. - if (inst.isa()) { - dmaFinishInsts.push_back(&inst); + SmallVector dmaStartInsts, dmaFinishInsts; + for (auto &op : *forOp.getBody()) { + // Collect DMA finish operations. + if (op.isa()) { + dmaFinishInsts.push_back(&op); continue; } - auto dmaStartOp = inst.dyn_cast(); + auto dmaStartOp = op.dyn_cast(); if (!dmaStartOp) continue; @@ -228,10 +228,10 @@ static void findMatchingStartFinishInsts( } } if (!escapingUses) - dmaStartInsts.push_back(&inst); + dmaStartInsts.push_back(&op); } - // For each start instruction, we look for a matching finish instruction. + // For each start operation, we look for a matching finish operation. for (auto *dmaStartInst : dmaStartInsts) { for (auto *dmaFinishInst : dmaFinishInsts) { if (checkTagMatch(dmaStartInst->cast(), @@ -253,7 +253,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) { return; } - SmallVector, 4> startWaitPairs; + SmallVector, 4> startWaitPairs; findMatchingStartFinishInsts(forOp, startWaitPairs); if (startWaitPairs.empty()) { @@ -263,7 +263,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) { // Double the buffers for the higher memory space memref's. // Identify memref's to replace by scanning through all DMA start - // instructions. A DMA start instruction has two memref's - the one from the + // operations. A DMA start operation has two memref's - the one from the // higher level of memory hierarchy is the one to double buffer. // TODO(bondhugula): check whether double-buffering is even necessary. // TODO(bondhugula): make this work with different layouts: assuming here that @@ -320,13 +320,13 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) { startWaitPairs.clear(); findMatchingStartFinishInsts(forOp, startWaitPairs); - // Store shift for instruction for later lookup for AffineApplyOp's. - DenseMap instShiftMap; + // Store shift for operation for later lookup for AffineApplyOp's. + DenseMap instShiftMap; for (auto &pair : startWaitPairs) { auto *dmaStartInst = pair.first; assert(dmaStartInst->isa()); instShiftMap[dmaStartInst] = 0; - // Set shifts for DMA start inst's affine operand computation slices to 0. + // Set shifts for DMA start op's affine operand computation slices to 0. SmallVector sliceOps; mlir::createAffineComputationSlice(dmaStartInst, &sliceOps); if (!sliceOps.empty()) { @@ -336,32 +336,32 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) { } else { // If a slice wasn't created, the reachable affine.apply op's from its // operands are the ones that go with it. - SmallVector affineApplyInsts; + SmallVector affineApplyInsts; SmallVector operands(dmaStartInst->getOperands()); getReachableAffineApplyOps(operands, affineApplyInsts); - for (auto *inst : affineApplyInsts) { - instShiftMap[inst] = 0; + for (auto *op : affineApplyInsts) { + instShiftMap[op] = 0; } } } // Everything else (including compute ops and dma finish) are shifted by one. - for (auto &inst : *forOp.getBody()) { - if (instShiftMap.find(&inst) == instShiftMap.end()) { - instShiftMap[&inst] = 1; + for (auto &op : *forOp.getBody()) { + if (instShiftMap.find(&op) == instShiftMap.end()) { + instShiftMap[&op] = 1; } } // Get shifts stored in map. std::vector shifts(forOp.getBody()->getOperations().size()); unsigned s = 0; - for (auto &inst : *forOp.getBody()) { - assert(instShiftMap.find(&inst) != instShiftMap.end()); - shifts[s++] = instShiftMap[&inst]; + for (auto &op : *forOp.getBody()) { + assert(instShiftMap.find(&op) != instShiftMap.end()); + shifts[s++] = instShiftMap[&op]; - // Tagging instructions with shifts for debugging purposes. + // Tagging operations with shifts for debugging purposes. LLVM_DEBUG({ - FuncBuilder b(&inst); - inst.setAttr("shift", b.getI64IntegerAttr(shifts[s - 1])); + FuncBuilder b(&op); + op.setAttr("shift", b.getI64IntegerAttr(shifts[s - 1])); }); } @@ -372,7 +372,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) { } if (failed(instBodySkew(forOp, shifts))) { - LLVM_DEBUG(llvm::dbgs() << "inst body skewing failed - unexpected\n";); + LLVM_DEBUG(llvm::dbgs() << "op body skewing failed - unexpected\n";); return; } } diff --git a/mlir/lib/Transforms/SimplifyAffineStructures.cpp b/mlir/lib/Transforms/SimplifyAffineStructures.cpp index 4ff5367abbb2..e777a4d9ca36 100644 --- a/mlir/lib/Transforms/SimplifyAffineStructures.cpp +++ b/mlir/lib/Transforms/SimplifyAffineStructures.cpp @@ -32,7 +32,7 @@ using namespace mlir; namespace { -/// Simplifies all affine expressions appearing in the operation instructions of +/// Simplifies all affine expressions appearing in the operations of /// the Function. This is mainly to test the simplifyAffineExpr method. /// TODO(someone): This should just be defined as a canonicalization pattern /// on AffineMap and driven from the existing canonicalization pass. @@ -41,9 +41,9 @@ struct SimplifyAffineStructures void runOnFunction() override; /// Utility to simplify an affine attribute and update its entry in the parent - /// instruction if necessary. + /// operation if necessary. template - void simplifyAndUpdateAttribute(Instruction *inst, Identifier name, + void simplifyAndUpdateAttribute(Operation *op, Identifier name, AttributeT attr) { auto &simplified = simplifiedAttributes[attr]; if (simplified == attr) @@ -62,7 +62,7 @@ struct SimplifyAffineStructures } // Simplification was successful, so update the attribute. - inst->setAttr(name, simplified); + op->setAttr(name, simplified); } /// Performs basic integer set simplifications. Checks if it's empty, and @@ -93,7 +93,7 @@ FunctionPassBase *mlir::createSimplifyAffineStructuresPass() { void SimplifyAffineStructures::runOnFunction() { simplifiedAttributes.clear(); - getFunction().walk([&](Instruction *opInst) { + getFunction().walk([&](Operation *opInst) { for (auto attr : opInst->getAttrs()) { if (auto mapAttr = attr.second.dyn_cast()) simplifyAndUpdateAttribute(opInst, attr.first, mapAttr); diff --git a/mlir/lib/Transforms/StripDebugInfo.cpp b/mlir/lib/Transforms/StripDebugInfo.cpp index 9d6b7a0ba272..1691976a05a1 100644 --- a/mlir/lib/Transforms/StripDebugInfo.cpp +++ b/mlir/lib/Transforms/StripDebugInfo.cpp @@ -32,9 +32,9 @@ void StripDebugInfo::runOnFunction() { Function &func = getFunction(); UnknownLoc unknownLoc = UnknownLoc::get(&getContext()); - // Strip the debug info from the function and its instructions. + // Strip the debug info from the function and its operations. func.setLoc(unknownLoc); - func.walk([&](Instruction *inst) { inst->setLoc(unknownLoc); }); + func.walk([&](Operation *op) { op->setLoc(unknownLoc); }); } /// Creates a pass to strip debug information from a function. @@ -43,4 +43,4 @@ FunctionPassBase *mlir::createStripDebugInfoPass() { } static PassRegistration - pass("strip-debuginfo", "Strip debug info from functions and instructions"); + pass("strip-debuginfo", "Strip debug info from functions and operations"); diff --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp index 79a2b12d242f..b01b8dba5988 100644 --- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp +++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp @@ -39,13 +39,13 @@ public: worklist.reserve(64); // Add all operations to the worklist. - fn.walk([&](Instruction *inst) { addToWorklist(inst); }); + fn.walk([&](Operation *op) { addToWorklist(op); }); } /// Perform the rewrites. void simplifyFunction(); - void addToWorklist(Instruction *op) { + void addToWorklist(Operation *op) { // Check to see if the worklist already contains this op. if (worklistMap.count(op)) return; @@ -54,7 +54,7 @@ public: worklist.push_back(op); } - Instruction *popFromWorklist() { + Operation *popFromWorklist() { auto *op = worklist.back(); worklist.pop_back(); @@ -66,7 +66,7 @@ public: /// If the specified operation is in the worklist, remove it. If not, this is /// a no-op. - void removeFromWorklist(Instruction *op) { + void removeFromWorklist(Operation *op) { auto it = worklistMap.find(op); if (it != worklistMap.end()) { assert(worklist[it->second] == op && "malformed worklist data structure"); @@ -78,7 +78,7 @@ public: protected: // Implement the hook for creating operations, and make sure that newly // created ops are added to the worklist for processing. - Instruction *createOperation(const OperationState &state) override { + Operation *createOperation(const OperationState &state) override { auto *result = builder.createOperation(state); addToWorklist(result); return result; @@ -86,7 +86,7 @@ protected: // If an operation is about to be removed, make sure it is not in our // worklist anymore because we'd get dangling references to it. - void notifyOperationRemoved(Instruction *op) override { + void notifyOperationRemoved(Operation *op) override { addToWorklist(op->getOperands()); removeFromWorklist(op); } @@ -94,7 +94,7 @@ protected: // When the root of a pattern is about to be replaced, it can trigger // simplifications to its users - make sure to add them to the worklist // before the root is changed. - void notifyRootReplaced(Instruction *op) override { + void notifyRootReplaced(Operation *op) override { for (auto *result : op->getResults()) // TODO: Add a result->getUsers() iterator. for (auto &user : result->getUses()) @@ -102,15 +102,15 @@ protected: } private: - // Look over the provided operands for any defining instructions that should + // Look over the provided operands for any defining operations that should // be re-added to the worklist. This function should be called when an // operation is modified or removed, as it may trigger further // simplifications. template void addToWorklist(Operands &&operands) { for (Value *operand : operands) { // If the use count of this operand is now < 2, we re-add the defining - // instruction to the worklist. - // TODO(riverriddle) This is based on the fact that zero use instructions + // operation to the worklist. + // TODO(riverriddle) This is based on the fact that zero use operations // may be deleted, and that single use values often have more // canonicalization opportunities. if (!operand->use_empty() && @@ -131,13 +131,13 @@ private: /// need to be revisited, plus their index in the worklist. This allows us to /// efficiently remove operations from the worklist when they are erased from /// the function, even if they aren't the root of a pattern. - std::vector worklist; - DenseMap worklistMap; + std::vector worklist; + DenseMap worklistMap; /// As part of canonicalization, we move constants to the top of the entry /// block of the current function and de-duplicate them. This keeps track of /// constants we have done this for. - DenseMap, Instruction *> uniquedConstants; + DenseMap, Operation *> uniquedConstants; }; }; // end anonymous namespace @@ -199,7 +199,7 @@ void GreedyPatternRewriteDriver::simplifyFunction() { continue; } - // Check to see if any operands to the instruction is constant and whether + // Check to see if any operands to the operation is constant and whether // the operation knows how to constant fold itself. operandConstants.assign(op->getNumOperands(), Attribute()); for (unsigned i = 0, e = op->getNumOperands(); i != e; ++i) diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index 2760e8b8bd3d..0f962657fad0 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -123,10 +123,10 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) { // Replaces all IV uses to its single iteration value. auto *iv = forOp.getInductionVar(); - Instruction *forInst = forOp.getOperation(); + Operation *op = forOp.getOperation(); if (!iv->use_empty()) { if (forOp.hasConstantLowerBound()) { - auto *mlFunc = forInst->getFunction(); + auto *mlFunc = op->getFunction(); FuncBuilder topBuilder(mlFunc); auto constOp = topBuilder.create( forOp.getLoc(), forOp.getConstantLowerBound()); @@ -134,28 +134,28 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) { } else { AffineBound lb = forOp.getLowerBound(); SmallVector lbOperands(lb.operand_begin(), lb.operand_end()); - FuncBuilder builder(forInst->getBlock(), Block::iterator(forInst)); + FuncBuilder builder(op->getBlock(), Block::iterator(op)); if (lb.getMap() == builder.getDimIdentityMap()) { // No need of generating an affine.apply. iv->replaceAllUsesWith(lbOperands[0]); } else { auto affineApplyOp = builder.create( - forInst->getLoc(), lb.getMap(), lbOperands); + op->getLoc(), lb.getMap(), lbOperands); iv->replaceAllUsesWith(affineApplyOp); } } } - // Move the loop body instructions, except for terminator, to the loop's + // Move the loop body operations, except for terminator, to the loop's // containing block. - auto *block = forInst->getBlock(); + auto *block = op->getBlock(); forOp.getBody()->getOperations().back().erase(); - block->getOperations().splice(Block::iterator(forInst), + block->getOperations().splice(Block::iterator(op), forOp.getBody()->getOperations()); forOp.erase(); return success(); } -/// Promotes all single iteration for inst's in the Function, i.e., moves +/// Promotes all single iteration for op's in the Function, i.e., moves /// their body into the containing Block. void mlir::promoteSingleIterationLoops(Function *f) { // Gathers all innermost loops through a post order pruned walk. @@ -163,16 +163,16 @@ void mlir::promoteSingleIterationLoops(Function *f) { [](AffineForOp forOp) { promoteIfSingleIteration(forOp); }); } -/// Generates a 'affine.for' inst with the specified lower and upper bounds -/// while generating the right IV remappings for the shifted instructions. The -/// instruction blocks that go into the loop are specified in instGroupQueue +/// Generates a 'affine.for' op with the specified lower and upper bounds +/// while generating the right IV remappings for the shifted operations. The +/// operation blocks that go into the loop are specified in instGroupQueue /// starting from the specified offset, and in that order; the first element of -/// the pair specifies the shift applied to that group of instructions; note +/// the pair specifies the shift applied to that group of operations; note /// that the shift is multiplied by the loop step before being applied. Returns /// nullptr if the generated loop simplifies to a single iteration one. static AffineForOp generateLoop(AffineMap lbMap, AffineMap ubMap, - const std::vector>> + const std::vector>> &instGroupQueue, unsigned offset, AffineForOp srcForInst, FuncBuilder *b) { SmallVector lbOperands(srcForInst.getLowerBoundOperands()); @@ -194,8 +194,8 @@ generateLoop(AffineMap lbMap, AffineMap ubMap, it != e; ++it) { uint64_t shift = it->first; auto insts = it->second; - // All 'same shift' instructions get added with their operands being - // remapped to results of cloned instructions, and their IV used remapped. + // All 'same shift' operations get added with their operands being + // remapped to results of cloned operations, and their IV used remapped. // Generate the remapping if the shift is not zero: remappedIV = newIV - // shift. if (!srcIV->use_empty() && shift != 0) { @@ -208,9 +208,9 @@ generateLoop(AffineMap lbMap, AffineMap ubMap, } else { operandMap.map(srcIV, loopChunkIV); } - for (auto *inst : insts) { - if (!inst->isa()) - bodyBuilder.clone(*inst, operandMap); + for (auto *op : insts) { + if (!op->isa()) + bodyBuilder.clone(*op, operandMap); } }; if (succeeded(promoteIfSingleIteration(loopChunk))) @@ -218,17 +218,17 @@ generateLoop(AffineMap lbMap, AffineMap ubMap, return loopChunk; } -/// Skew the instructions in the body of a 'affine.for' instruction with the -/// specified instruction-wise shifts. The shifts are with respect to the +/// Skew the operations in the body of a 'affine.for' operation with the +/// specified operation-wise shifts. The shifts are with respect to the /// original execution order, and are multiplied by the loop 'step' before being -/// applied. A shift of zero for each instruction will lead to no change. -// The skewing of instructions with respect to one another can be used for +/// applied. A shift of zero for each operation will lead to no change. +// The skewing of operations with respect to one another can be used for // example to allow overlap of asynchronous operations (such as DMA -// communication) with computation, or just relative shifting of instructions +// communication) with computation, or just relative shifting of operations // for better register reuse, locality or parallelism. As such, the shifts are -// typically expected to be at most of the order of the number of instructions. +// typically expected to be at most of the order of the number of operations. // This method should not be used as a substitute for loop distribution/fission. -// This method uses an algorithm// in time linear in the number of instructions +// This method uses an algorithm// in time linear in the number of operations // in the body of the for loop - (using the 'sweep line' paradigm). This method // asserts preservation of SSA dominance. A check for that as well as that for // memory-based depedence preservation check rests with the users of this @@ -267,14 +267,14 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef shifts, return success(); } - // An array of instruction groups sorted by shift amount; each group has all - // instructions with the same shift in the order in which they appear in the - // body of the 'affine.for' inst. - std::vector> sortedInstGroups(maxShift + 1); + // An array of operation groups sorted by shift amount; each group has all + // operations with the same shift in the order in which they appear in the + // body of the 'affine.for' op. + std::vector> sortedInstGroups(maxShift + 1); unsigned pos = 0; - for (auto &inst : *forOp.getBody()) { + for (auto &op : *forOp.getBody()) { auto shift = shifts[pos++]; - sortedInstGroups[shift].push_back(&inst); + sortedInstGroups[shift].push_back(&op); } // Unless the shifts have a specific pattern (which actually would be the @@ -287,8 +287,8 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef shifts, // Do a sweep over the sorted shifts while storing open groups in a // vector, and generating loop portions as necessary during the sweep. A block - // of instructions is paired with its shift. - std::vector>> instGroupQueue; + // of operations is paired with its shift. + std::vector>> instGroupQueue; auto origLbMap = forOp.getLowerBoundMap(); uint64_t lbShift = 0; @@ -302,14 +302,14 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef shifts, "Queue expected to be empty when the first block is found"); // The interval for which the loop needs to be generated here is: // [lbShift, min(lbShift + tripCount, d)) and the body of the - // loop needs to have all instructions in instQueue in that order. + // loop needs to have all operations in instQueue in that order. AffineForOp res; if (lbShift + tripCount * step < d * step) { res = generateLoop( b.getShiftedAffineMap(origLbMap, lbShift), b.getShiftedAffineMap(origLbMap, lbShift + tripCount * step), instGroupQueue, 0, forOp, &b); - // Entire loop for the queued inst groups generated, empty it. + // Entire loop for the queued op groups generated, empty it. instGroupQueue.clear(); lbShift += tripCount * step; } else { @@ -325,11 +325,11 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef shifts, // Start of first interval. lbShift = d * step; } - // Augment the list of instructions that get into the current open interval. + // Augment the list of operations that get into the current open interval. instGroupQueue.push_back({d, sortedInstGroups[d]}); } - // Those instructions groups left in the queue now need to be processed (FIFO) + // Those operations groups left in the queue now need to be processed (FIFO) // and their loops completed. for (unsigned i = 0, e = instGroupQueue.size(); i < e; ++i) { uint64_t ubShift = (instGroupQueue[i].first + tripCount) * step; @@ -341,7 +341,7 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef shifts, prologue = epilogue; } - // Erase the original for inst. + // Erase the original for op. forOp.erase(); if (unrollPrologueEpilogue && prologue) @@ -407,10 +407,10 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp, return failure(); // Generate the cleanup loop if trip count isn't a multiple of unrollFactor. - Instruction *forInst = forOp.getOperation(); + Operation *op = forOp.getOperation(); if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) { - FuncBuilder builder(forInst->getBlock(), ++Block::iterator(forInst)); - auto cleanupForInst = builder.clone(*forInst)->cast(); + FuncBuilder builder(op->getBlock(), ++Block::iterator(op)); + auto cleanupForInst = builder.clone(*op)->cast(); AffineMap cleanupMap; SmallVector cleanupOperands; getCleanupLoopLowerBound(forOp, unrollFactor, &cleanupMap, &cleanupOperands, @@ -435,7 +435,7 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp, // 'forOp'. FuncBuilder builder = forOp.getBodyBuilder(); - // Keep a pointer to the last non-terminator instruction in the original block + // Keep a pointer to the last non-terminator operation in the original block // so that we know what to clone (since we are doing this in-place). Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end(), 2); @@ -530,17 +530,17 @@ static void cloneLoopBodyInto(AffineForOp forOp, Value *oldIv, BlockAndValueMapping map; map.map(oldIv, newForOp.getInductionVar()); FuncBuilder b = newForOp.getBodyBuilder(); - for (auto &inst : *forOp.getBody()) { + for (auto &op : *forOp.getBody()) { // Step over newForOp in case it is nested under forOp. - if (&inst == newForOp.getOperation()) { + if (&op == newForOp.getOperation()) { continue; } - if (inst.isa()) { + if (op.isa()) { continue; } - auto *instClone = b.clone(inst, map); + auto *instClone = b.clone(op, map); unsigned idx = 0; - for (auto r : inst.getResults()) { + for (auto r : op.getResults()) { // Since we do a forward pass over the body, we iteratively augment // the `map` with everything we clone. map.map(r, instClone->getResult(idx++)); @@ -567,8 +567,8 @@ stripmineSink(AffineForOp forOp, uint64_t factor, auto scaledStep = originalStep * factor; forOp.setStep(scaledStep); - auto *forInst = forOp.getOperation(); - FuncBuilder b(forInst->getBlock(), ++Block::iterator(forInst)); + auto *op = forOp.getOperation(); + FuncBuilder b(op->getBlock(), ++Block::iterator(op)); // Lower-bound map creation. auto lbMap = forOp.getLowerBoundMap(); @@ -588,11 +588,11 @@ stripmineSink(AffineForOp forOp, uint64_t factor, auto newForOp = b.create(t.getLoc(), lbOperands, lbMap, ubOperands, ubMap, originalStep); cloneLoopBodyInto(t, forOp.getInductionVar(), newForOp); - // Remove all instructions from `t` except `newForOp`. + // Remove all operations from `t` except `newForOp`. auto rit = ++newForOp.getOperation()->getReverseIterator(); auto re = t.getBody()->rend(); - for (auto &inst : llvm::make_early_inc_range(llvm::make_range(rit, re))) { - inst.erase(); + for (auto &op : llvm::make_early_inc_range(llvm::make_range(rit, re))) { + op.erase(); } innerLoops.push_back(newForOp); } diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp index b5225d08827b..422d6b136ab3 100644 --- a/mlir/lib/Transforms/Utils/Utils.cpp +++ b/mlir/lib/Transforms/Utils/Utils.cpp @@ -37,7 +37,7 @@ using namespace mlir; /// Return true if this operation dereferences one or more memref's. // Temporary utility: will be replaced when this is modeled through // side-effects/op traits. TODO(b/117228571) -static bool isMemRefDereferencingOp(Instruction &op) { +static bool isMemRefDereferencingOp(Operation &op) { if (op.isa() || op.isa() || op.isa() || op.isa()) return true; @@ -48,8 +48,8 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef, ArrayRef extraIndices, AffineMap indexRemap, ArrayRef extraOperands, - Instruction *domInstFilter, - Instruction *postDomInstFilter) { + Operation *domInstFilter, + Operation *postDomInstFilter) { unsigned newMemRefRank = newMemRef->getType().cast().getRank(); (void)newMemRefRank; // unused in opt mode unsigned oldMemRefRank = oldMemRef->getType().cast().getRank(); @@ -76,7 +76,7 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef, llvm::make_unique(postDomInstFilter->getFunction()); // The ops where memref replacement succeeds are replaced with new ones. - SmallVector opsToErase; + SmallVector opsToErase; // Walk all uses of old memref. Operation using the memref gets replaced. for (auto &use : llvm::make_early_inc_range(oldMemRef->getUses())) { @@ -115,7 +115,7 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef, }; unsigned memRefOperandPos = getMemRefOperandPos(); - // Construct the new operation instruction using this memref. + // Construct the new operation using this memref. OperationState state(opInst->getContext(), opInst->getLoc(), opInst->getName()); state.setOperandListToResizable(opInst->hasResizableOperandsList()); @@ -192,9 +192,9 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef, return true; } -/// Given an operation instruction, inserts one or more single result affine +/// Given an operation, inserts one or more single result affine /// apply operations, results of which are exclusively used by this operation -/// instruction. The operands of these newly created affine apply ops are +/// operation. The operands of these newly created affine apply ops are /// guaranteed to be loop iterators or terminal symbols of a function. /// /// Before @@ -221,7 +221,7 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef, /// uses besides this opInst; otherwise returns the list of affine.apply /// operations created in output argument `sliceOps`. void mlir::createAffineComputationSlice( - Instruction *opInst, SmallVectorImpl *sliceOps) { + Operation *opInst, SmallVectorImpl *sliceOps) { // Collect all operands that are results of affine apply ops. SmallVector subOperands; subOperands.reserve(opInst->getNumOperands()); @@ -233,13 +233,13 @@ void mlir::createAffineComputationSlice( } // Gather sequence of AffineApplyOps reachable from 'subOperands'. - SmallVector affineApplyOps; + SmallVector affineApplyOps; getReachableAffineApplyOps(subOperands, affineApplyOps); // Skip transforming if there are no affine maps to compose. if (affineApplyOps.empty()) return; - // Check if all uses of the affine apply op's lie only in this op inst, in + // Check if all uses of the affine apply op's lie only in this op op, in // which case there would be nothing to do. bool localized = true; for (auto *op : affineApplyOps) { @@ -291,7 +291,7 @@ void mlir::createAffineComputationSlice( } void mlir::remapFunctionAttrs( - Instruction &op, const DenseMap &remappingTable) { + Operation &op, const DenseMap &remappingTable) { for (auto attr : op.getAttrs()) { // Do the remapping, if we got the same thing back, then it must contain // functions that aren't getting remapped. @@ -310,9 +310,8 @@ void mlir::remapFunctionAttrs( void mlir::remapFunctionAttrs( Function &fn, const DenseMap &remappingTable) { - // Look at all instructions in a Function. - fn.walk( - [&](Instruction *inst) { remapFunctionAttrs(*inst, remappingTable); }); + // Look at all operations in a Function. + fn.walk([&](Operation *op) { remapFunctionAttrs(*op, remappingTable); }); } void mlir::remapFunctionAttrs( diff --git a/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp b/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp index 5590dbad7f11..cf0684ef90d9 100644 --- a/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp +++ b/mlir/lib/Transforms/Vectorization/VectorizerTestPass.cpp @@ -77,7 +77,7 @@ static llvm::cl::opt clTestNormalizeMaps( llvm::cl::desc( "Enable testing the normalization of AffineAffineApplyOp " "where each AffineAffineApplyOp in the composition is a single output " - "instruction."), + "operation."), llvm::cl::cat(clOptionsCategory)); namespace { @@ -104,16 +104,16 @@ void VectorizerTestPass::testVectorShapeRatio() { clTestVectorShapeRatio.end()); auto subVectorType = VectorType::get(shape, FloatType::getF32(f->getContext())); - // Only filter instructions that operate on a strict super-vector and have one + // Only filter operations that operate on a strict super-vector and have one // return. This makes testing easier. - auto filter = [subVectorType](Instruction &inst) { + auto filter = [subVectorType](Operation &op) { assert(subVectorType.getElementType() == FloatType::getF32(subVectorType.getContext()) && "Only f32 supported for now"); - if (!matcher::operatesOnSuperVectors(inst, subVectorType)) { + if (!matcher::operatesOnSuperVectors(op, subVectorType)) { return false; } - if (inst.getNumResults() != 1) { + if (op.getNumResults() != 1) { return false; } return true; @@ -138,10 +138,10 @@ void VectorizerTestPass::testVectorShapeRatio() { } } -static std::string toString(Instruction *inst) { +static std::string toString(Operation *op) { std::string res; llvm::raw_string_ostream os(res); - inst->print(os); + op->print(os); return res; } @@ -150,9 +150,9 @@ static NestedPattern patternTestSlicingOps() { constexpr auto kTestSlicingOpName = "slicing-test-op"; using functional::map; using matcher::Op; - // Match all OpInstructions with the kTestSlicingOpName name. - auto filter = [](Instruction &inst) { - return inst.getName().getStringRef() == kTestSlicingOpName; + // Match all operations with the kTestSlicingOpName name. + auto filter = [](Operation &op) { + return op.getName().getStringRef() == kTestSlicingOpName; }; return Op(filter); } @@ -163,7 +163,7 @@ void VectorizerTestPass::testBackwardSlicing() { SmallVector matches; patternTestSlicingOps().match(f, &matches); for (auto m : matches) { - SetVector backwardSlice; + SetVector backwardSlice; getBackwardSlice(m.getMatchedOperation(), &backwardSlice); auto strs = map(toString, backwardSlice); outs() << "\nmatched: " << *m.getMatchedOperation() @@ -179,7 +179,7 @@ void VectorizerTestPass::testForwardSlicing() { SmallVector matches; patternTestSlicingOps().match(f, &matches); for (auto m : matches) { - SetVector forwardSlice; + SetVector forwardSlice; getForwardSlice(m.getMatchedOperation(), &forwardSlice); auto strs = map(toString, forwardSlice); outs() << "\nmatched: " << *m.getMatchedOperation() @@ -196,7 +196,7 @@ void VectorizerTestPass::testSlicing() { SmallVector matches; patternTestSlicingOps().match(f, &matches); for (auto m : matches) { - SetVector staticSlice = getSlice(m.getMatchedOperation()); + SetVector staticSlice = getSlice(m.getMatchedOperation()); auto strs = map(toString, staticSlice); outs() << "\nmatched: " << *m.getMatchedOperation() << " static slice: "; for (const auto &s : strs) { @@ -205,8 +205,8 @@ void VectorizerTestPass::testSlicing() { } } -static bool customOpWithAffineMapAttribute(Instruction &inst) { - return inst.getName().getStringRef() == +static bool customOpWithAffineMapAttribute(Operation &op) { + return op.getName().getStringRef() == VectorizerTestPass::kTestAffineMapOpName; } @@ -233,12 +233,10 @@ void VectorizerTestPass::testComposeMaps() { simplifyAffineMap(res).print(outs() << "\nComposed map: "); } -static bool affineApplyOp(Instruction &inst) { - return inst.isa(); -} +static bool affineApplyOp(Operation &op) { return op.isa(); } -static bool singleResultAffineApplyOpWithoutUses(Instruction &inst) { - auto app = inst.dyn_cast(); +static bool singleResultAffineApplyOpWithoutUses(Operation &op) { + auto app = op.dyn_cast(); return app && app.use_empty(); } diff --git a/mlir/lib/Transforms/Vectorize.cpp b/mlir/lib/Transforms/Vectorize.cpp index 8a7a7a6dbba2..98e4053c6336 100644 --- a/mlir/lib/Transforms/Vectorize.cpp +++ b/mlir/lib/Transforms/Vectorize.cpp @@ -166,7 +166,7 @@ using namespace mlir; /// references along fastest varying dimensions and loops with recursive nested /// patterns capturing imperfectly-nested loop nests; the SLP vectorizer, on /// the other hand, performs flat pattern matching inside a single unrolled loop -/// body and stitches together pieces of load and store instructions into full +/// body and stitches together pieces of load and store operations into full /// 1-D vectors. We envision that the SLP vectorizer is a good way to capture /// innermost loop, control-flow dependent patterns that super-vectorization may /// not be able to capture easily. In other words, super-vectorization does not @@ -662,13 +662,12 @@ namespace { struct VectorizationStrategy { SmallVector vectorSizes; - DenseMap loopToVectorDim; + DenseMap loopToVectorDim; }; } // end anonymous namespace -static void vectorizeLoopIfProfitable(Instruction *loop, - unsigned depthInPattern, +static void vectorizeLoopIfProfitable(Operation *loop, unsigned depthInPattern, unsigned patternDepth, VectorizationStrategy *strategy) { assert(patternDepth > depthInPattern && @@ -716,23 +715,23 @@ static LogicalResult analyzeProfitability(ArrayRef matches, namespace { struct VectorizationState { - /// Adds an entry of pre/post vectorization instructions in the state. - void registerReplacement(Instruction *key, Instruction *value); + /// Adds an entry of pre/post vectorization operations in the state. + void registerReplacement(Operation *key, Operation *value); /// When the current vectorization pattern is successful, this erases the - /// instructions that were marked for erasure in the proper order and resets + /// operations that were marked for erasure in the proper order and resets /// the internal state for the next pattern. void finishVectorizationPattern(); - // In-order tracking of original Instruction that have been vectorized. + // In-order tracking of original Operation that have been vectorized. // Erase in reverse order. - SmallVector toErase; - // Set of Instruction that have been vectorized (the values in the + SmallVector toErase; + // Set of Operation that have been vectorized (the values in the // vectorizationMap for hashed access). The vectorizedSet is used in - // particular to filter the instructions that have already been vectorized by + // particular to filter the operations that have already been vectorized by // this pattern, when iterating over nested loops in this pattern. - DenseSet vectorizedSet; - // Map of old scalar Instruction to new vectorized Instruction. - DenseMap vectorizationMap; + DenseSet vectorizedSet; + // Map of old scalar Operation to new vectorized Operation. + DenseMap vectorizationMap; // Map of old scalar Value to new vectorized Value. DenseMap replacementMap; // The strategy drives which loop to vectorize by which amount. @@ -742,17 +741,16 @@ struct VectorizationState { // operations that have been vectorized. They can be retrieved from // `vectorizationMap` but it is convenient to keep track of them in a separate // data structure. - DenseSet roots; - // Terminal instructions for the worklist in the vectorizeNonTerminals + DenseSet roots; + // Terminal operations for the worklist in the vectorizeNonTerminals // function. They consist of the subset of store operations that have been // vectorized. They can be retrieved from `vectorizationMap` but it is // convenient to keep track of them in a separate data structure. Since they // do not necessarily belong to use-def chains starting from loads (e.g // storing a constant), we need to handle them in a post-pass. - DenseSet terminals; - // Checks that the type of `inst` is StoreOp and adds it to the terminals - // set. - void registerTerminal(Instruction *inst); + DenseSet terminals; + // Checks that the type of `op` is StoreOp and adds it to the terminals set. + void registerTerminal(Operation *op); private: void registerReplacement(Value *key, Value *value); @@ -760,8 +758,7 @@ private: } // end namespace -void VectorizationState::registerReplacement(Instruction *key, - Instruction *value) { +void VectorizationState::registerReplacement(Operation *key, Operation *value) { LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ commit vectorized op: "); LLVM_DEBUG(key->print(dbgs())); LLVM_DEBUG(dbgs() << " into "); @@ -780,19 +777,19 @@ void VectorizationState::registerReplacement(Instruction *key, } } -void VectorizationState::registerTerminal(Instruction *inst) { - assert(inst->isa() && "terminal must be a StoreOp"); - assert(terminals.count(inst) == 0 && +void VectorizationState::registerTerminal(Operation *op) { + assert(op->isa() && "terminal must be a StoreOp"); + assert(terminals.count(op) == 0 && "terminal was already inserted previously"); - terminals.insert(inst); + terminals.insert(op); } void VectorizationState::finishVectorizationPattern() { while (!toErase.empty()) { - auto *inst = toErase.pop_back_val(); + auto *op = toErase.pop_back_val(); LLVM_DEBUG(dbgs() << "\n[early-vect] finishVectorizationPattern erase: "); - LLVM_DEBUG(inst->print(dbgs())); - inst->erase(); + LLVM_DEBUG(op->print(dbgs())); + op->erase(); } } @@ -857,13 +854,13 @@ static LogicalResult vectorizeAffineForOp(AffineForOp loop, int64_t step, using namespace functional; loop.setStep(step); - FilterFunctionType notVectorizedThisPattern = [state](Instruction &inst) { - if (!matcher::isLoadOrStore(inst)) { + FilterFunctionType notVectorizedThisPattern = [state](Operation &op) { + if (!matcher::isLoadOrStore(op)) { return false; } - return state->vectorizationMap.count(&inst) == 0 && - state->vectorizedSet.count(&inst) == 0 && - state->roots.count(&inst) == 0 && state->terminals.count(&inst) == 0; + return state->vectorizationMap.count(&op) == 0 && + state->vectorizedSet.count(&op) == 0 && + state->roots.count(&op) == 0 && state->terminals.count(&op) == 0; }; auto loadAndStores = matcher::Op(notVectorizedThisPattern); SmallVector loadAndStoresMatches; @@ -891,8 +888,8 @@ static LogicalResult vectorizeAffineForOp(AffineForOp loop, int64_t step, /// we can build a cost model and a search procedure. static FilterFunctionType isVectorizableLoopPtrFactory(unsigned fastestVaryingMemRefDimension) { - return [fastestVaryingMemRefDimension](Instruction &forInst) { - auto loop = forInst.cast(); + return [fastestVaryingMemRefDimension](Operation &forOp) { + auto loop = forOp.cast(); return isVectorizableLoopAlongFastestVaryingMemRefDim( loop, fastestVaryingMemRefDimension); }; @@ -943,14 +940,13 @@ vectorizeLoopsAndLoadsRecursively(NestedMatch oneMatch, /// element type. /// If `type` is not a valid vector type or if the scalar constant is not a /// valid vector element type, returns nullptr. -static Value *vectorizeConstant(Instruction *inst, ConstantOp constant, - Type type) { +static Value *vectorizeConstant(Operation *op, ConstantOp constant, Type type) { if (!type || !type.isa() || !VectorType::isValidElementType(constant.getType())) { return nullptr; } - FuncBuilder b(inst); - Location loc = inst->getLoc(); + FuncBuilder b(op); + Location loc = op->getLoc(); auto vectorType = type.cast(); auto attr = SplatElementsAttr::get(vectorType, constant.getValue()); auto *constantOpInst = constant.getOperation(); @@ -962,10 +958,10 @@ static Value *vectorizeConstant(Instruction *inst, ConstantOp constant, return b.createOperation(state)->getResult(0); } -/// Tries to vectorize a given operand `op` of Instruction `inst` during +/// Tries to vectorize a given operand `op` of Operation `op` during /// def-chain propagation or during terminal vectorization, by applying the /// following logic: -/// 1. if the defining instruction is part of the vectorizedSet (i.e. vectorized +/// 1. if the defining operation is part of the vectorizedSet (i.e. vectorized /// useby -def propagation), `op` is already in the proper vector form; /// 2. otherwise, the `op` may be in some other vector form that fails to /// vectorize atm (i.e. broadcasting required), returns nullptr to indicate @@ -983,7 +979,7 @@ static Value *vectorizeConstant(Instruction *inst, ConstantOp constant, /// vectorization is possible with the above logic. Returns nullptr otherwise. /// /// TODO(ntv): handle more complex cases. -static Value *vectorizeOperand(Value *operand, Instruction *inst, +static Value *vectorizeOperand(Value *operand, Operation *op, VectorizationState *state) { LLVM_DEBUG(dbgs() << "\n[early-vect]vectorize operand: "); LLVM_DEBUG(operand->print(dbgs())); @@ -1011,7 +1007,7 @@ static Value *vectorizeOperand(Value *operand, Instruction *inst, // 3. vectorize constant. if (auto constant = operand->getDefiningOp()->dyn_cast()) { return vectorizeConstant( - inst, constant, + op, constant, VectorType::get(state->strategy->vectorSizes, operand->getType())); } // 4. currently non-vectorizable. @@ -1020,7 +1016,7 @@ static Value *vectorizeOperand(Value *operand, Instruction *inst, return nullptr; }; -/// Encodes Instruction-specific behavior for vectorization. In general we +/// Encodes Operation-specific behavior for vectorization. In general we /// assume that all operands of an op must be vectorized but this is not always /// true. In the future, it would be nice to have a trait that describes how a /// particular operation vectorizes. For now we implement the case distinction @@ -1029,8 +1025,8 @@ static Value *vectorizeOperand(Value *operand, Instruction *inst, /// TODO(ntv): consider adding a trait to Op to describe how it gets vectorized. /// Maybe some Ops are not vectorizable or require some tricky logic, we cannot /// do one-off logic here; ideally it would be TableGen'd. -static Instruction *vectorizeOneInstruction(Instruction *opInst, - VectorizationState *state) { +static Operation *vectorizeOneOperation(Operation *opInst, + VectorizationState *state) { // Sanity checks. assert(!opInst->isa() && "all loads must have already been fully vectorized independently"); @@ -1079,9 +1075,8 @@ static Instruction *vectorizeOneInstruction(Instruction *opInst, // Create a clone of the op with the proper operands and return types. // TODO(ntv): The following assumes there is always an op with a fixed // name that works both in scalar mode and vector mode. - // TODO(ntv): Is it worth considering an Instruction.clone operation - // which changes the type so we can promote an Instruction with less - // boilerplate? + // TODO(ntv): Is it worth considering an Operation.clone operation which + // changes the type so we can promote an Operation with less boilerplate? FuncBuilder b(opInst); OperationState newOp(b.getContext(), opInst->getLoc(), opInst->getName().getStringRef(), vectorOperands, @@ -1100,31 +1095,31 @@ static Instruction *vectorizeOneInstruction(Instruction *opInst, /// replacementMap. If any such replacement is missing, vectorization fails. static LogicalResult vectorizeNonTerminals(VectorizationState *state) { // 1. create initial worklist with the uses of the roots. - SetVector worklist; + SetVector worklist; // Note: state->roots have already been vectorized and must not be vectorized - // again. This fits `getForwardSlice` which does not insert `inst` in the + // again. This fits `getForwardSlice` which does not insert `op` in the // result. // Note: we have to exclude terminals because some of their defs may not be // nested under the vectorization pattern (e.g. constants defined in an // encompassing scope). // TODO(ntv): Use a backward slice for terminals, avoid special casing and // merge implementations. - for (auto *inst : state->roots) { - getForwardSlice(inst, &worklist, [state](Instruction *inst) { - return state->terminals.count(inst) == 0; // propagate if not terminal + for (auto *op : state->roots) { + getForwardSlice(op, &worklist, [state](Operation *op) { + return state->terminals.count(op) == 0; // propagate if not terminal }); } // We merged multiple slices, topological order may not hold anymore. worklist = topologicalSort(worklist); for (unsigned i = 0; i < worklist.size(); ++i) { - auto *inst = worklist[i]; + auto *op = worklist[i]; LLVM_DEBUG(dbgs() << "\n[early-vect] vectorize use: "); - LLVM_DEBUG(inst->print(dbgs())); + LLVM_DEBUG(op->print(dbgs())); - // Create vector form of the instruction. - // Insert it just before inst, on success register inst as replaced. - auto *vectorizedInst = vectorizeOneInstruction(inst, state); + // Create vector form of the operation. + // Insert it just before op, on success register op as replaced. + auto *vectorizedInst = vectorizeOneOperation(op, state); if (!vectorizedInst) { return failure(); } @@ -1133,7 +1128,7 @@ static LogicalResult vectorizeNonTerminals(VectorizationState *state) { // Note that we cannot just call replaceAllUsesWith because it may // result in ops with mixed types, for ops whose operands have not all // yet been vectorized. This would be invalid IR. - state->registerReplacement(inst, vectorizedInst); + state->registerReplacement(op, vectorizedInst); } return success(); } @@ -1193,9 +1188,8 @@ static LogicalResult vectorizeRootMatch(NestedMatch m, return guard.failure(); } - // 2. Vectorize operations reached by use-def chains from root - // except the terminals (store instructions) that need to be - // post-processed separately. + // 2. Vectorize operations reached by use-def chains from root except the + // terminals (store operations) that need to be post-processed separately. // TODO(ntv): add more as we expand. if (failed(vectorizeNonTerminals(&state))) { LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ failed vectorizeNonTerminals"); @@ -1208,8 +1202,8 @@ static LogicalResult vectorizeRootMatch(NestedMatch m, // encompassing scope). // TODO(ntv): Use a backward slice for terminals, avoid special casing and // merge implementations. - for (auto *inst : state.terminals) { - if (!vectorizeOneInstruction(inst, &state)) { // nullptr == failure + for (auto *op : state.terminals) { + if (!vectorizeOneOperation(op, &state)) { // nullptr == failure LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ failed to vectorize terminals"); return guard.failure(); }