From 0f50414fa4553b1277684cb1dded84b334b35d51 Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Mon, 4 Feb 2019 13:48:44 -0800 Subject: [PATCH] Refactor common code getting memref access in getMemRefRegion - NFC - use getAccessMap() instead of repeating it - fold getMemRefRegion into MemRefRegion ctor (more natural, avoid heap allocation and unique_ptr where possible) - change extractForInductionVars - MutableArrayRef -> ArrayRef for the arguments. Since the method is just returning copies of 'Value *', the client can't mutate the pointers themselves; it's fine to mutate the 'Value''s themselves, but that doesn't mutate the pointers to those. - change the way extractForInductionVars returns (see b/123437690) PiperOrigin-RevId: 232359277 --- mlir/include/mlir/AffineOps/AffineOps.h | 8 +- mlir/include/mlir/Analysis/AffineAnalysis.h | 9 +- mlir/include/mlir/Analysis/Utils.h | 48 ++++---- mlir/lib/AffineOps/AffineOps.cpp | 9 +- mlir/lib/Analysis/AffineAnalysis.cpp | 3 +- mlir/lib/Analysis/Utils.cpp | 121 +++++++++----------- mlir/lib/Transforms/DmaGeneration.cpp | 7 +- mlir/lib/Transforms/LoopFusion.cpp | 7 +- mlir/lib/Transforms/LoopTiling.cpp | 3 +- mlir/lib/Transforms/MemRefDataFlowOpt.cpp | 5 +- 10 files changed, 107 insertions(+), 113 deletions(-) diff --git a/mlir/include/mlir/AffineOps/AffineOps.h b/mlir/include/mlir/AffineOps/AffineOps.h index 46bb91c1bcab..0e71221871b2 100644 --- a/mlir/include/mlir/AffineOps/AffineOps.h +++ b/mlir/include/mlir/AffineOps/AffineOps.h @@ -201,10 +201,10 @@ bool isForInductionVar(const Value *val); OpPointer getForInductionVarOwner(Value *val); ConstOpPointer getForInductionVarOwner(const Value *val); -/// Extracts the induction variables from a list of AffineForOps and returns -/// them. -SmallVector -extractForInductionVars(MutableArrayRef> forInsts); +/// Extracts the induction variables from a list of AffineForOps and places them +/// in the output argument `ivs`. +void extractForInductionVars(ArrayRef> forInsts, + SmallVectorImpl *ivs); /// AffineBound represents a lower or upper bound in the for instruction. /// This class does not own the underlying operands. Instead, it refers diff --git a/mlir/include/mlir/Analysis/AffineAnalysis.h b/mlir/include/mlir/Analysis/AffineAnalysis.h index ca420bab7e17..dd3e676eb177 100644 --- a/mlir/include/mlir/Analysis/AffineAnalysis.h +++ b/mlir/include/mlir/Analysis/AffineAnalysis.h @@ -117,8 +117,8 @@ bool getIndexSet(llvm::MutableArrayRef> forOps, /// Encapsulates a memref load or store access information. struct MemRefAccess { - const Value *memref; - const Instruction *opInst; + Value *memref; + Instruction *opInst; llvm::SmallVector indices; /// Constructs a MemRefAccess from a load or store operation instruction. @@ -126,6 +126,11 @@ struct MemRefAccess { // return MemRefAccess, i.e., loadOp->getAccess(), dmaOp->getRead/WriteAccess. explicit MemRefAccess(Instruction *opInst); + // Returns the rank of the memref associated with this access. + unsigned getRank() const; + // Returns true if this access is of a store op. + bool isStore() const; + /// Populates 'accessMap' with composition of AffineApplyOps reachable from // 'indices'. void getAccessMap(AffineValueMap *accessMap) const; diff --git a/mlir/include/mlir/Analysis/Utils.h b/mlir/include/mlir/Analysis/Utils.h index 54549fc8ef83..1dcbd759154f 100644 --- a/mlir/include/mlir/Analysis/Utils.h +++ b/mlir/include/mlir/Analysis/Utils.h @@ -76,8 +76,30 @@ unsigned getNestingDepth(const Instruction &stmt); // The last field is a 2-d FlatAffineConstraints symbolic in %i. // struct MemRefRegion { - MemRefRegion(Value *memref, Location loc, bool write) - : memref(memref), write(write), loc(loc) {} + explicit MemRefRegion(Location loc) : loc(loc) {} + + /// Computes the memory region accessed by this memref with the region + /// represented as constraints symbolic/parameteric in 'loopDepth' loops + /// surrounding opInst. Returns false if this fails due to yet unimplemented + /// cases. The computed region's 'cst' field has exactly as many dimensional + /// identifiers as the rank of the memref, and *potentially* additional + /// symbolic identifiers which could include any of the loop IVs surrounding + /// opInst up until 'loopDepth' and another additional Function symbols + /// involved with the access (for eg., those appear in affine_apply's, loop + /// bounds, etc.). + /// For example, the memref region for this operation at loopDepth = 1 will + /// be: + /// + /// for %i = 0 to 32 { + /// for %ii = %i to (d0) -> (d0 + 8) (%i) { + /// load %A[%ii] + /// } + /// } + /// + /// {memref = %A, write = false, {%i <= m0 <= %i + 7} } + /// The last field is a 2-d FlatAffineConstraints symbolic in %i. + /// + bool compute(Instruction *inst, unsigned loopDepth); FlatAffineConstraints *getConstraints() { return &cst; } const FlatAffineConstraints *getConstraints() const { return &cst; } @@ -132,28 +154,6 @@ struct MemRefRegion { FlatAffineConstraints cst; }; -/// Computes the memory region accessed by this memref with the region -/// represented as constraints symbolic/parameteric in 'loopDepth' loops -/// surrounding opInst. Returns nullptr if this fails due to yet unimplemented -/// cases. The computed region's 'cst' field has exactly as many dimensional -/// identifiers as the rank of the memref, and *potentially* additional symbolic -/// identifiers which could include any of the loop IVs surrounding opInst up -/// until 'loopDepth' and another additional Function symbols involved with -/// the access (for eg., those appear in affine_apply's, loop bounds, etc.). -/// For example, the memref region for this operation at loopDepth = 1 will be: -/// -/// for %i = 0 to 32 { -/// for %ii = %i to (d0) -> (d0 + 8) (%i) { -/// load %A[%ii] -/// } -/// } -/// -/// {memref = %A, write = false, {%i <= m0 <= %i + 7} } -/// The last field is a 2-d FlatAffineConstraints symbolic in %i. -/// -std::unique_ptr getMemRefRegion(Instruction *opInst, - unsigned loopDepth); - /// Returns the size of memref data in bytes if it's statically shaped, None /// otherwise. Optional getMemRefSizeInBytes(MemRefType memRefType); diff --git a/mlir/lib/AffineOps/AffineOps.cpp b/mlir/lib/AffineOps/AffineOps.cpp index 2e657cf7e17d..2ef96aa3d146 100644 --- a/mlir/lib/AffineOps/AffineOps.cpp +++ b/mlir/lib/AffineOps/AffineOps.cpp @@ -462,12 +462,11 @@ ConstOpPointer mlir::getForInductionVarOwner(const Value *val) { /// Extracts the induction variables from a list of AffineForOps and returns /// them. -SmallVector mlir::extractForInductionVars( - MutableArrayRef> forInsts) { - SmallVector results; +void mlir::extractForInductionVars(ArrayRef> forInsts, + SmallVectorImpl *ivs) { + ivs->reserve(forInsts.size()); for (auto forInst : forInsts) - results.push_back(forInst->getInductionVar()); - return results; + ivs->push_back(forInst->getInductionVar()); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp index fafa51269390..0a2b283738cc 100644 --- a/mlir/lib/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Analysis/AffineAnalysis.cpp @@ -557,7 +557,8 @@ void mlir::getReachableAffineApplyOps( // setExprStride(ArrayRef expr, int64_t stride) bool mlir::getIndexSet(MutableArrayRef> forOps, FlatAffineConstraints *domain) { - auto indices = extractForInductionVars(forOps); + SmallVector indices; + extractForInductionVars(forOps, &indices); // Reset while associated Values in 'indices' to the domain. domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices); for (auto forOp : forOps) { diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp index 652aaab0e1be..d27715333c8e 100644 --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -122,55 +122,36 @@ bool MemRefRegion::unionBoundingBox(const MemRefRegion &other) { // // TODO(bondhugula): extend this to any other memref dereferencing ops // (dma_start, dma_wait). -std::unique_ptr mlir::getMemRefRegion(Instruction *opInst, - unsigned loopDepth) { - unsigned rank; - std::unique_ptr region; - SmallVector indices; - if (auto loadOp = opInst->dyn_cast()) { - rank = loadOp->getMemRefType().getRank(); - indices.reserve(rank); - indices.append(loadOp->getIndices().begin(), loadOp->getIndices().end()); - region = std::make_unique(loadOp->getMemRef(), - loadOp->getLoc(), false); - } else if (auto storeOp = opInst->dyn_cast()) { - rank = storeOp->getMemRefType().getRank(); - indices.reserve(rank); - indices.append(storeOp->getIndices().begin(), storeOp->getIndices().end()); - region = std::make_unique(storeOp->getMemRef(), - storeOp->getLoc(), true); - } else { - assert(false && "expected load or store op"); - return nullptr; +bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth) { + assert((inst->isa() || inst->isa()) && + "load/store op expected"); + + MemRefAccess access(inst); + memref = access.memref; + write = access.isStore(); + + unsigned rank = access.getRank(); + + if (rank == 0) { + SmallVector, 4> ivs; + getLoopIVs(*inst, &ivs); + SmallVector regionSymbols; + extractForInductionVars(ivs, ®ionSymbols); + // A rank 0 memref has a 0-d region. + cst.reset(rank, loopDepth, 0, regionSymbols); + return true; } // Build the constraints for this region. - FlatAffineConstraints *regionCst = region->getConstraints(); - - if (rank == 0) { - // A rank 0 memref has a 0-d region. - SmallVector, 4> ivs; - getLoopIVs(*opInst, &ivs); - - SmallVector regionSymbols = extractForInductionVars(ivs); - regionCst->reset(0, loopDepth, 0, regionSymbols); - return region; - } - - FuncBuilder b(opInst); - auto idMap = b.getMultiDimIdentityMap(rank); - // Initialize 'accessValueMap' and compose with reachable AffineApplyOps. - fullyComposeAffineMapAndOperands(&idMap, &indices); - // Remove any duplicates. - canonicalizeMapAndOperands(&idMap, &indices); - AffineValueMap accessValueMap(idMap, indices); + AffineValueMap accessValueMap; + access.getAccessMap(&accessValueMap); AffineMap accessMap = accessValueMap.getAffineMap(); // We'll first associate the dims and symbols of the access map to the dims - // and symbols resp. of regionCst. This will change below once regionCst is + // and symbols resp. of cst. This will change below once cst is // fully constructed out. - regionCst->reset(accessMap.getNumDims(), accessMap.getNumSymbols(), 0, - accessValueMap.getOperands()); + cst.reset(accessMap.getNumDims(), accessMap.getNumSymbols(), 0, + accessValueMap.getOperands()); // Add equality constraints. unsigned numDims = accessMap.getNumDims(); @@ -178,65 +159,63 @@ std::unique_ptr mlir::getMemRefRegion(Instruction *opInst, // Add inequalties for loop lower/upper bounds. for (unsigned i = 0; i < numDims + numSymbols; ++i) { if (auto loop = getForInductionVarOwner(accessValueMap.getOperand(i))) { - // Note that regionCst can now have more dimensions than accessMap if the + // Note that cst can now have more dimensions than accessMap if the // bounds expressions involve outer loops or other symbols. // TODO(bondhugula): rewrite this to use getInstIndexSet; this way // conditionals will be handled when the latter supports it. - if (!regionCst->addAffineForOpDomain(loop)) - return nullptr; + if (!cst.addAffineForOpDomain(loop)) + return false; } else { // Has to be a valid symbol. auto *symbol = accessValueMap.getOperand(i); assert(symbol->isValidSymbol()); // Check if the symbol is a constant. - if (auto *opInst = symbol->getDefiningInst()) { - if (auto constOp = opInst->dyn_cast()) { - regionCst->setIdToConstant(*symbol, constOp->getValue()); + if (auto *inst = symbol->getDefiningInst()) { + if (auto constOp = inst->dyn_cast()) { + cst.setIdToConstant(*symbol, constOp->getValue()); } } } } // Add access function equalities to connect loop IVs to data dimensions. - if (!regionCst->composeMap(&accessValueMap)) { + if (!cst.composeMap(&accessValueMap)) { LLVM_DEBUG(llvm::dbgs() << "getMemRefRegion: compose affine map failed\n"); LLVM_DEBUG(accessValueMap.getAffineMap().dump()); - return nullptr; + return false; } // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which // this memref region is symbolic. SmallVector, 4> outerIVs; - getLoopIVs(*opInst, &outerIVs); + getLoopIVs(*inst, &outerIVs); assert(loopDepth <= outerIVs.size() && "invalid loop depth"); outerIVs.resize(loopDepth); for (auto *operand : accessValueMap.getOperands()) { OpPointer iv; if ((iv = getForInductionVarOwner(operand)) && llvm::is_contained(outerIVs, iv) == false) { - regionCst->projectOut(operand); + cst.projectOut(operand); } } // Project out any local variables (these would have been added for any // mod/divs). - regionCst->projectOut(regionCst->getNumDimAndSymbolIds(), - regionCst->getNumLocalIds()); + cst.projectOut(cst.getNumDimAndSymbolIds(), cst.getNumLocalIds()); // Set all identifiers appearing after the first 'rank' identifiers as // symbolic identifiers - so that the ones correspoding to the memref // dimensions are the dimensional identifiers for the memref region. - regionCst->setDimSymbolSeparation(regionCst->getNumDimAndSymbolIds() - rank); + cst.setDimSymbolSeparation(cst.getNumDimAndSymbolIds() - rank); // Constant fold any symbolic identifiers. - regionCst->constantFoldIdRange(/*pos=*/regionCst->getNumDimIds(), - /*num=*/regionCst->getNumSymbolIds()); + cst.constantFoldIdRange(/*pos=*/cst.getNumDimIds(), + /*num=*/cst.getNumSymbolIds()); - assert(regionCst->getNumDimIds() == rank && "unexpected MemRefRegion format"); + assert(cst.getNumDimIds() == rank && "unexpected MemRefRegion format"); LLVM_DEBUG(llvm::dbgs() << "Memory region:\n"); - LLVM_DEBUG(region->getConstraints()->dump()); - - return region; + LLVM_DEBUG(cst.dump()); + return true; } // TODO(mlir-team): improve/complete this when we have target data. @@ -282,19 +261,19 @@ bool mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, Instruction *opInst = loadOrStoreOp->getInstruction(); - auto region = getMemRefRegion(opInst, /*loopDepth=*/0); - if (!region) + MemRefRegion region(opInst->getLoc()); + if (!region.compute(opInst, /*loopDepth=*/0)) return false; LLVM_DEBUG(llvm::dbgs() << "Memory region"); - LLVM_DEBUG(region->getConstraints()->dump()); + LLVM_DEBUG(region.getConstraints()->dump()); bool outOfBounds = false; unsigned rank = loadOrStoreOp->getMemRefType().getRank(); // For each dimension, check for out of bounds. for (unsigned r = 0; r < rank; r++) { - FlatAffineConstraints ucst(*region->getConstraints()); + FlatAffineConstraints ucst(*region.getConstraints()); // Intersect memory region with constraint capturing out of bounds (both out // of upper and out of lower), and check if the constraint system is @@ -314,7 +293,7 @@ bool mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp, } // Check for a negative index. - FlatAffineConstraints lcst(*region->getConstraints()); + FlatAffineConstraints lcst(*region.getConstraints()); std::fill(ineq.begin(), ineq.end(), 0); // d_i <= -1; lcst.addConstantUpperBound(r, -1); @@ -521,6 +500,12 @@ MemRefAccess::MemRefAccess(Instruction *loadOrStoreOpInst) { } } +unsigned MemRefAccess::getRank() const { + return memref->getType().cast().getRank(); +} + +bool MemRefAccess::isStore() const { return opInst->isa(); } + /// Returns the nesting depth of this statement, i.e., the number of loops /// surrounding this statement. unsigned mlir::getNestingDepth(const Instruction &inst) { @@ -600,8 +585,8 @@ Optional mlir::getMemoryFootprintBytes(const Block &block, // all regions for a given memref instead of creating one region per // memory op. This way we would be allocating O(num of memref's) sets // instead of O(num of load/store op's). - auto region = getMemRefRegion(opInst, 0); - if (!region) { + auto region = std::make_unique(opInst->getLoc()); + if (!region->compute(opInst, /*loopDepth=*/0)) { LLVM_DEBUG(llvm::dbgs() << "Error obtaining memory region\n"); // TODO: stop the walk if an error occurred. error = true; diff --git a/mlir/lib/Transforms/DmaGeneration.cpp b/mlir/lib/Transforms/DmaGeneration.cpp index 92ae37670986..40d90c3e27ba 100644 --- a/mlir/lib/Transforms/DmaGeneration.cpp +++ b/mlir/lib/Transforms/DmaGeneration.cpp @@ -183,7 +183,8 @@ static bool getFullMemRefAsRegion(Instruction *opInst, unsigned numParamLoopIVs, SmallVector, 4> ivs; getLoopIVs(*opInst, &ivs); ivs.resize(numParamLoopIVs); - SmallVector symbols = extractForInductionVars(ivs); + SmallVector symbols; + extractForInductionVars(ivs, &symbols); regionCst->reset(rank, numParamLoopIVs, 0); regionCst->setIdValues(rank, rank + numParamLoopIVs, symbols); @@ -576,8 +577,8 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) { } // Compute the MemRefRegion accessed. - auto region = getMemRefRegion(opInst, dmaDepth); - if (!region) { + auto region = std::make_unique(opInst->getLoc()); + if (!region->compute(opInst, dmaDepth)) { LLVM_DEBUG(llvm::dbgs() << "Error obtaining memory region: semi-affine maps?\n"); LLVM_DEBUG(llvm::dbgs() << "over-approximating to the entire memref\n"); diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp index d7d69e569e5f..7a002168528b 100644 --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Transforms/LoopFusion.cpp @@ -921,7 +921,8 @@ static Value *createPrivateMemRef(OpPointer forOp, unsigned rank = oldMemRefType.getRank(); // Compute MemRefRegion for 'srcStoreOpInst' at depth 'dstLoopDepth'. - auto region = getMemRefRegion(srcStoreOpInst, dstLoopDepth); + MemRefRegion region(srcStoreOpInst->getLoc()); + region.compute(srcStoreOpInst, dstLoopDepth); SmallVector newShape; std::vector> lbs; SmallVector lbDivisors; @@ -929,11 +930,11 @@ static Value *createPrivateMemRef(OpPointer forOp, // Query 'region' for 'newShape' and lower bounds of MemRefRegion accessed // by 'srcStoreOpInst' at depth 'dstLoopDepth'. Optional numElements = - region->getConstantBoundingSizeAndShape(&newShape, &lbs, &lbDivisors); + region.getConstantBoundingSizeAndShape(&newShape, &lbs, &lbDivisors); assert(numElements.hasValue() && "non-constant number of elts in local buffer"); - const FlatAffineConstraints *cst = region->getConstraints(); + const FlatAffineConstraints *cst = region.getConstraints(); // 'outerIVs' holds the values that this memory region is symbolic/paramteric // on; this would correspond to loop IVs surrounding the level at which the // slice is being materialized. diff --git a/mlir/lib/Transforms/LoopTiling.cpp b/mlir/lib/Transforms/LoopTiling.cpp index 8b368e5f182a..758d434d25e0 100644 --- a/mlir/lib/Transforms/LoopTiling.cpp +++ b/mlir/lib/Transforms/LoopTiling.cpp @@ -201,7 +201,8 @@ UtilResult mlir::tileCodeGen(MutableArrayRef> band, // Move the loop body of the original nest to the new one. moveLoopBody(origLoops[origLoops.size() - 1], innermostPointLoop); - SmallVector origLoopIVs = extractForInductionVars(band); + SmallVector origLoopIVs; + extractForInductionVars(band, &origLoopIVs); SmallVector, 6> ids(origLoopIVs.begin(), origLoopIVs.end()); FlatAffineConstraints cst; getIndexSet(band, &cst); diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp index b2b69dc7b6d7..2d06a3273151 100644 --- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp +++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp @@ -178,8 +178,9 @@ void MemRefDataFlowOpt::visitInstruction(Instruction *opInst) { // is trivially loading from a single location at that depth; so there // isn't a need to call isRangeOneToOne. if (getNestingDepth(*storeOpInst) < loadOpDepth) { - auto region = getMemRefRegion(loadOpInst, nsLoops); - if (!region->getConstraints()->isRangeOneToOne( + MemRefRegion region(loadOpInst->getLoc()); + region.compute(loadOpInst, nsLoops); + if (!region.getConstraints()->isRangeOneToOne( /*start=*/0, /*limit=*/loadOp->getMemRefType().getRank())) break; }