[mlir][memref] Improve canonicalization of memref.clone

The previous implementation did not handle casting behavior properly and
did not consider aliases.

Differential Revision: https://reviews.llvm.org/D102785
This commit is contained in:
Stephan Herhut 2021-05-21 16:13:02 +02:00
parent 117a247e8e
commit 90e55dfcf4
3 changed files with 107 additions and 27 deletions

View File

@ -498,32 +498,47 @@ struct SimplifyClones : public OpRewritePattern<CloneOp> {
Value source = cloneOp.input();
// Removes the clone operation and the corresponding dealloc and alloc
// operation (if any).
auto tryRemoveClone = [&](Operation *sourceOp, Operation *dealloc,
Operation *alloc) {
if (!sourceOp || !dealloc || !alloc ||
alloc->getBlock() != dealloc->getBlock())
return false;
rewriter.replaceOp(cloneOp, source);
rewriter.eraseOp(dealloc);
return true;
};
// This only finds dealloc operations for the immediate value. It should
// also consider aliases. That would also make the safety check below
// redundant.
Operation *cloneDeallocOp = findDealloc(cloneOp.output());
Operation *sourceDeallocOp = findDealloc(source);
// Removes unnecessary clones that are derived from the result of the clone
// op.
Operation *deallocOp = findDealloc(cloneOp.output());
Operation *sourceOp = source.getDefiningOp();
if (tryRemoveClone(sourceOp, deallocOp, sourceOp))
return success();
// If both are deallocated in the same block, their in-block lifetimes
// might not fully overlap, so we cannot decide which one to drop.
if (cloneDeallocOp && sourceDeallocOp &&
cloneDeallocOp->getBlock() == sourceDeallocOp->getBlock())
return failure();
// Removes unnecessary clones that are derived from the source of the clone
// op.
deallocOp = findDealloc(source);
if (tryRemoveClone(sourceOp, deallocOp, cloneOp))
return success();
Block *currentBlock = cloneOp->getBlock();
Operation *redundantDealloc = nullptr;
if (cloneDeallocOp && cloneDeallocOp->getBlock() == currentBlock) {
redundantDealloc = cloneDeallocOp;
} else if (sourceDeallocOp && sourceDeallocOp->getBlock() == currentBlock) {
redundantDealloc = sourceDeallocOp;
}
return failure();
if (!redundantDealloc)
return failure();
// Safety check that there are no other deallocations inbetween
// cloneOp and redundantDealloc, as otherwise we might deallocate an alias
// of source before the uses of the clone. With alias information, we could
// restrict this to only fail of the dealloc's operand is an alias
// of the source.
for (Operation *pos = cloneOp->getNextNode(); pos != redundantDealloc;
pos = pos->getNextNode()) {
auto effectInterface = dyn_cast<MemoryEffectOpInterface>(pos);
if (!effectInterface)
continue;
if (effectInterface.hasEffect<MemoryEffects::Free>())
return failure();
}
rewriter.replaceOpWithNewOp<memref::CastOp>(cloneOp, cloneOp.getType(),
source);
rewriter.eraseOp(redundantDealloc);
return success();
}
};

View File

@ -151,8 +151,10 @@ private:
}
// Recurse into all distinct regions and check for explicit control-flow
// loops.
for (Region &region : op->getRegions())
recurse(region.front(), current);
for (Region &region : op->getRegions()) {
if (!region.empty())
recurse(region.front(), current);
}
}
/// Recurses into explicit control-flow structures that are given by

View File

@ -1,4 +1,4 @@
// RUN: mlir-opt %s -canonicalize --split-input-file | FileCheck %s
// RUN: mlir-opt %s -canonicalize --split-input-file -allow-unregistered-dialect | FileCheck %s
// Test case: Basic folding of memref.tensor_load(memref.buffer_cast(t)) -> t
// CHECK-LABEL: func @tensor_load_of_buffer_cast(
@ -128,4 +128,67 @@ func @rank_reducing_subview_canonicalize(%arg0 : memref<?x?x?xf32>, %arg1 : inde
// CHECK-SAME: [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1]
// CHECK-SAME: : memref<?x?x?xf32> to memref<4x?xf32
// CHECK: %[[RESULT:.+]] = memref.cast %[[SUBVIEW]]
// CHEKC: return %[[RESULT]]
// CHECK: return %[[RESULT]]
// -----
// CHECK-LABEL: @clone_before_dealloc
// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
// CHECK-NEXT: return %[[ARG]]
%0 = memref.clone %arg0 : memref<?xf32> to memref<?xf32>
memref.dealloc %arg0 : memref<?xf32>
return %0 : memref<?xf32>
}
// -----
// CHECK-LABEL: @clone_before_dealloc
// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
// CHECK-NEXT: "use"(%arg0)
// CHECK-NEXT: return %[[ARG]]
%0 = memref.clone %arg0 : memref<?xf32> to memref<?xf32>
"use"(%0) : (memref<?xf32>) -> ()
memref.dealloc %0 : memref<?xf32>
return %arg0 : memref<?xf32>
}
// -----
// CHECK-LABEL: @clone_after_cast
// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
func @clone_after_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
// CHECK-NEXT: memref.clone %[[ARG]] : memref<?xf32> to memref<32xf32>
// CHECK-NOT: memref.cast
%0 = memref.cast %arg0 : memref<?xf32> to memref<32xf32>
%1 = memref.clone %0 : memref<32xf32> to memref<32xf32>
return %1 : memref<32xf32>
}
// -----
// CHECK-LABEL: @clone_and_cast
// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
func @clone_and_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
// CHECK-NEXT: %[[RES:.*]] = memref.cast %[[ARG]] : memref<?xf32> to memref<32xf32>
%0 = memref.clone %arg0 : memref<?xf32> to memref<32xf32>
// CHECK-NEXT: return %[[RES]]
memref.dealloc %arg0 : memref<?xf32>
return %0 : memref<32xf32>
}
// -----
// CHECK-LABEL: @alias_is_freed
func @alias_is_freed(%arg0 : memref<?xf32>) {
// CHECK: memref.clone
// CHECK: memref.dealloc
// CHECK: memref.dealloc
%0 = memref.cast %arg0 : memref<?xf32> to memref<32xf32>
%1 = memref.clone %0 : memref<32xf32> to memref<32xf32>
memref.dealloc %arg0 : memref<?xf32>
"use"(%1) : (memref<32xf32>) -> ()
memref.dealloc %1 : memref<32xf32>
return
}