forked from OSchip/llvm-project
[MLIR] NFC. affine data copy generate utility return value cleanup
Clean up return value on affineDataCopyGenerate utility. Return the actual success/failure status instead of the "number of bytes" which isn't being used in the codebase in any way. The success/failure status wasn't being sent out earlier. Differential Revision: https://reviews.llvm.org/D117209
This commit is contained in:
parent
8de0c1feca
commit
05f6e93938
|
@ -187,25 +187,26 @@ struct AffineCopyOptions {
|
|||
/// Performs explicit copying for the contiguous sequence of operations in the
|
||||
/// block iterator range [`begin', `end'), where `end' can't be past the
|
||||
/// terminator of the block (since additional operations are potentially
|
||||
/// inserted right before `end`. Returns the total size of fast memory space
|
||||
/// buffers used. `copyOptions` provides various parameters, and the output
|
||||
/// argument `copyNests` is the set of all copy nests inserted, each represented
|
||||
/// by its root affine.for. Since we generate alloc's and dealloc's for all fast
|
||||
/// buffers (before and after the range of operations resp. or at a hoisted
|
||||
/// position), all of the fast memory capacity is assumed to be available for
|
||||
/// processing this block range. When 'filterMemRef' is specified, copies are
|
||||
/// only generated for the provided MemRef.
|
||||
uint64_t affineDataCopyGenerate(Block::iterator begin, Block::iterator end,
|
||||
const AffineCopyOptions ©Options,
|
||||
Optional<Value> filterMemRef,
|
||||
DenseSet<Operation *> ©Nests);
|
||||
/// inserted right before `end`. `copyOptions` provides various parameters, and
|
||||
/// the output argument `copyNests` is the set of all copy nests inserted, each
|
||||
/// represented by its root affine.for. Since we generate alloc's and dealloc's
|
||||
/// for all fast buffers (before and after the range of operations resp. or at a
|
||||
/// hoisted position), all of the fast memory capacity is assumed to be
|
||||
/// available for processing this block range. When 'filterMemRef' is specified,
|
||||
/// copies are only generated for the provided MemRef. Returns success if the
|
||||
/// explicit copying succeeded for all memrefs on which affine load/stores were
|
||||
/// encountered.
|
||||
LogicalResult affineDataCopyGenerate(Block::iterator begin, Block::iterator end,
|
||||
const AffineCopyOptions ©Options,
|
||||
Optional<Value> filterMemRef,
|
||||
DenseSet<Operation *> ©Nests);
|
||||
|
||||
/// A convenience version of affineDataCopyGenerate for all ops in the body of
|
||||
/// an AffineForOp.
|
||||
uint64_t affineDataCopyGenerate(AffineForOp forOp,
|
||||
const AffineCopyOptions ©Options,
|
||||
Optional<Value> filterMemRef,
|
||||
DenseSet<Operation *> ©Nests);
|
||||
LogicalResult affineDataCopyGenerate(AffineForOp forOp,
|
||||
const AffineCopyOptions ©Options,
|
||||
Optional<Value> filterMemRef,
|
||||
DenseSet<Operation *> ©Nests);
|
||||
|
||||
/// Result for calling generateCopyForMemRegion.
|
||||
struct CopyGenerateResult {
|
||||
|
|
|
@ -66,7 +66,7 @@ struct AffineDataCopyGeneration
|
|||
}
|
||||
|
||||
void runOnFunction() override;
|
||||
LogicalResult runOnBlock(Block *block, DenseSet<Operation *> ©Nests);
|
||||
void runOnBlock(Block *block, DenseSet<Operation *> ©Nests);
|
||||
|
||||
// Constant zero index to avoid too many duplicates.
|
||||
Value zeroIndex = nullptr;
|
||||
|
@ -94,11 +94,10 @@ mlir::createAffineDataCopyGenerationPass() {
|
|||
/// ranges: each range is either a sequence of one or more operations starting
|
||||
/// and ending with an affine load or store op, or just an affine.forop (which
|
||||
/// could have other affine for op's nested within).
|
||||
LogicalResult
|
||||
AffineDataCopyGeneration::runOnBlock(Block *block,
|
||||
DenseSet<Operation *> ©Nests) {
|
||||
void AffineDataCopyGeneration::runOnBlock(Block *block,
|
||||
DenseSet<Operation *> ©Nests) {
|
||||
if (block->empty())
|
||||
return success();
|
||||
return;
|
||||
|
||||
uint64_t fastMemCapacityBytes =
|
||||
fastMemoryCapacity != std::numeric_limits<uint64_t>::max()
|
||||
|
@ -108,7 +107,7 @@ AffineDataCopyGeneration::runOnBlock(Block *block,
|
|||
fastMemorySpace, tagMemorySpace,
|
||||
fastMemCapacityBytes};
|
||||
|
||||
// Every affine.forop in the block starts and ends a block range for copying;
|
||||
// Every affine.for op in the block starts and ends a block range for copying;
|
||||
// in addition, a contiguous sequence of operations starting with a
|
||||
// load/store op but not including any copy nests themselves is also
|
||||
// identified as a copy block range. Straightline code (a contiguous chunk of
|
||||
|
@ -133,8 +132,8 @@ AffineDataCopyGeneration::runOnBlock(Block *block,
|
|||
// If you hit a non-copy for loop, we will split there.
|
||||
if ((forOp = dyn_cast<AffineForOp>(&*it)) && copyNests.count(forOp) == 0) {
|
||||
// Perform the copying up unti this 'for' op first.
|
||||
affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/it, copyOptions,
|
||||
/*filterMemRef=*/llvm::None, copyNests);
|
||||
(void)affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/it, copyOptions,
|
||||
/*filterMemRef=*/llvm::None, copyNests);
|
||||
|
||||
// Returns true if the footprint is known to exceed capacity.
|
||||
auto exceedsCapacity = [&](AffineForOp forOp) {
|
||||
|
@ -157,7 +156,7 @@ AffineDataCopyGeneration::runOnBlock(Block *block,
|
|||
if (recurseInner) {
|
||||
// We'll recurse and do the copies at an inner level for 'forInst'.
|
||||
// Recurse onto the body of this loop.
|
||||
(void)runOnBlock(forOp.getBody(), copyNests);
|
||||
runOnBlock(forOp.getBody(), copyNests);
|
||||
} else {
|
||||
// We have enough capacity, i.e., copies will be computed for the
|
||||
// portion of the block until 'it', and for 'it', which is 'forOp'. Note
|
||||
|
@ -167,8 +166,9 @@ AffineDataCopyGeneration::runOnBlock(Block *block,
|
|||
// Inner loop copies have their own scope - we don't thus update
|
||||
// consumed capacity. The footprint check above guarantees this inner
|
||||
// loop's footprint fits.
|
||||
affineDataCopyGenerate(/*begin=*/it, /*end=*/std::next(it), copyOptions,
|
||||
/*filterMemRef=*/llvm::None, copyNests);
|
||||
(void)affineDataCopyGenerate(/*begin=*/it, /*end=*/std::next(it),
|
||||
copyOptions,
|
||||
/*filterMemRef=*/llvm::None, copyNests);
|
||||
}
|
||||
// Get to the next load or store op after 'forOp'.
|
||||
curBegin = std::find_if(std::next(it), block->end(), [&](Operation &op) {
|
||||
|
@ -190,11 +190,10 @@ AffineDataCopyGeneration::runOnBlock(Block *block,
|
|||
assert(!curBegin->hasTrait<OpTrait::IsTerminator>() &&
|
||||
"can't be a terminator");
|
||||
// Exclude the affine.yield - hence, the std::prev.
|
||||
affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/std::prev(block->end()),
|
||||
copyOptions, /*filterMemRef=*/llvm::None, copyNests);
|
||||
(void)affineDataCopyGenerate(/*begin=*/curBegin,
|
||||
/*end=*/std::prev(block->end()), copyOptions,
|
||||
/*filterMemRef=*/llvm::None, copyNests);
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
void AffineDataCopyGeneration::runOnFunction() {
|
||||
|
@ -210,7 +209,7 @@ void AffineDataCopyGeneration::runOnFunction() {
|
|||
copyNests.clear();
|
||||
|
||||
for (auto &block : f)
|
||||
(void)runOnBlock(&block, copyNests);
|
||||
runOnBlock(&block, copyNests);
|
||||
|
||||
// Promote any single iteration loops in the copy nests and collect
|
||||
// load/stores to simplify.
|
||||
|
|
|
@ -2970,24 +2970,13 @@ static bool getFullMemRefAsRegion(Operation *op, unsigned numParamLoopIVs,
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Performs explicit copying for the contiguous sequence of operations in the
|
||||
/// block iterator range [`begin', `end'), where `end' can't be past the
|
||||
/// terminator of the block (since additional operations are potentially
|
||||
/// inserted right before `end`. Returns the total size of fast memory space
|
||||
/// buffers used. `copyOptions` provides various parameters, and the output
|
||||
/// argument `copyNests` is the set of all copy nests inserted, each represented
|
||||
/// by its root affine.for. Since we generate alloc's and dealloc's for all fast
|
||||
/// buffers (before and after the range of operations resp. or at a hoisted
|
||||
/// position), all of the fast memory capacity is assumed to be available for
|
||||
/// processing this block range. When 'filterMemRef' is specified, copies are
|
||||
/// only generated for the provided MemRef.
|
||||
uint64_t mlir::affineDataCopyGenerate(Block::iterator begin,
|
||||
Block::iterator end,
|
||||
const AffineCopyOptions ©Options,
|
||||
Optional<Value> filterMemRef,
|
||||
DenseSet<Operation *> ©Nests) {
|
||||
LogicalResult mlir::affineDataCopyGenerate(Block::iterator begin,
|
||||
Block::iterator end,
|
||||
const AffineCopyOptions ©Options,
|
||||
Optional<Value> filterMemRef,
|
||||
DenseSet<Operation *> ©Nests) {
|
||||
if (begin == end)
|
||||
return 0;
|
||||
return success();
|
||||
|
||||
assert(begin->getBlock() == std::prev(end)->getBlock() &&
|
||||
"Inconsistent block begin/end args");
|
||||
|
@ -3109,9 +3098,9 @@ uint64_t mlir::affineDataCopyGenerate(Block::iterator begin,
|
|||
});
|
||||
|
||||
if (error) {
|
||||
begin->emitError(
|
||||
"copy generation failed for one or more memref's in this block\n");
|
||||
return 0;
|
||||
LLVM_DEBUG(begin->emitError(
|
||||
"copy generation failed for one or more memref's in this block\n"));
|
||||
return failure();
|
||||
}
|
||||
|
||||
uint64_t totalCopyBuffersSizeInBytes = 0;
|
||||
|
@ -3147,18 +3136,18 @@ uint64_t mlir::affineDataCopyGenerate(Block::iterator begin,
|
|||
processRegions(writeRegions);
|
||||
|
||||
if (!ret) {
|
||||
begin->emitError(
|
||||
"copy generation failed for one or more memref's in this block\n");
|
||||
return totalCopyBuffersSizeInBytes;
|
||||
LLVM_DEBUG(begin->emitError(
|
||||
"copy generation failed for one or more memref's in this block\n"));
|
||||
return failure();
|
||||
}
|
||||
|
||||
// For a range of operations, a note will be emitted at the caller.
|
||||
AffineForOp forOp;
|
||||
uint64_t sizeInKib = llvm::divideCeil(totalCopyBuffersSizeInBytes, 1024);
|
||||
if (llvm::DebugFlag && (forOp = dyn_cast<AffineForOp>(&*begin))) {
|
||||
forOp.emitRemark()
|
||||
<< sizeInKib
|
||||
<< " KiB of copy buffers in fast memory space for this block\n";
|
||||
LLVM_DEBUG(forOp.emitRemark()
|
||||
<< sizeInKib
|
||||
<< " KiB of copy buffers in fast memory space for this block\n");
|
||||
}
|
||||
|
||||
if (totalCopyBuffersSizeInBytes > copyOptions.fastMemCapacityBytes) {
|
||||
|
@ -3167,15 +3156,15 @@ uint64_t mlir::affineDataCopyGenerate(Block::iterator begin,
|
|||
block->getParentOp()->emitWarning(str);
|
||||
}
|
||||
|
||||
return totalCopyBuffersSizeInBytes;
|
||||
return success();
|
||||
}
|
||||
|
||||
// A convenience version of affineDataCopyGenerate for all ops in the body of
|
||||
// an AffineForOp.
|
||||
uint64_t mlir::affineDataCopyGenerate(AffineForOp forOp,
|
||||
const AffineCopyOptions ©Options,
|
||||
Optional<Value> filterMemRef,
|
||||
DenseSet<Operation *> ©Nests) {
|
||||
LogicalResult mlir::affineDataCopyGenerate(AffineForOp forOp,
|
||||
const AffineCopyOptions ©Options,
|
||||
Optional<Value> filterMemRef,
|
||||
DenseSet<Operation *> ©Nests) {
|
||||
return affineDataCopyGenerate(forOp.getBody()->begin(),
|
||||
std::prev(forOp.getBody()->end()), copyOptions,
|
||||
filterMemRef, copyNests);
|
||||
|
|
|
@ -277,7 +277,6 @@ func @dma_unknown_size(%arg0: memref<?x?xf32>) {
|
|||
// size -- not yet implemented.
|
||||
// CHECK: affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
|
||||
affine.load %arg0[%i, %j] : memref<? x ? x f32>
|
||||
// expected-error@-6 {{copy generation failed for one or more memref's in this block}}
|
||||
}
|
||||
}
|
||||
return
|
||||
|
@ -297,7 +296,6 @@ func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
|
|||
// not yet implemented.
|
||||
// CHECK: affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref<1024x1024x1024xf32>
|
||||
%v = affine.load %arg0[%idx, %idy, %idz] : memref<1024 x 1024 x 1024 x f32>
|
||||
// expected-error@-10 {{copy generation failed for one or more memref's in this block}}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -90,12 +90,16 @@ void TestAffineDataCopy::runOnFunction() {
|
|||
/*fastMemCapacityBytes=*/32 * 1024 * 1024UL};
|
||||
DenseSet<Operation *> copyNests;
|
||||
if (clMemRefFilter) {
|
||||
affineDataCopyGenerate(loopNest, copyOptions, load.getMemRef(), copyNests);
|
||||
if (failed(affineDataCopyGenerate(loopNest, copyOptions, load.getMemRef(),
|
||||
copyNests)))
|
||||
return;
|
||||
} else if (clTestGenerateCopyForMemRegion) {
|
||||
CopyGenerateResult result;
|
||||
MemRefRegion region(loopNest.getLoc());
|
||||
(void)region.compute(load, /*loopDepth=*/0);
|
||||
(void)generateCopyForMemRegion(region, loopNest, copyOptions, result);
|
||||
if (failed(region.compute(load, /*loopDepth=*/0)))
|
||||
return;
|
||||
if (failed(generateCopyForMemRegion(region, loopNest, copyOptions, result)))
|
||||
return;
|
||||
}
|
||||
|
||||
// Promote any single iteration loops in the copy nests and simplify
|
||||
|
|
Loading…
Reference in New Issue