forked from OSchip/llvm-project
[mlir][linalg][bufferize] Fix copy elision in `getResultBuffer`
A buffer copy may not be elided if the to-be-bufferized op is reading the data. Differential Revision: https://reviews.llvm.org/D116454
This commit is contained in:
parent
b2ed9f3f44
commit
b8d0753694
|
@ -405,8 +405,9 @@ mlir::linalg::comprehensive_bufferize::BufferizationState::getResultBuffer(
|
|||
if (auto bufferizableOp = options.dynCastBufferizableOp(lastWrite))
|
||||
if (!bufferizableOp.isMemoryWrite(lastWrite.cast<OpResult>(), *this))
|
||||
skipCopy = true;
|
||||
// Do not copy if the copied data is never read.
|
||||
if (!isValueRead(result))
|
||||
// Do not copy if the copied data is never read. (Neither by this op nor by
|
||||
// any following op.)
|
||||
if (!bufferizesToMemoryRead(*opOperand) && !isValueRead(result))
|
||||
skipCopy = true;
|
||||
// Do not copy if this op does not read the data, but writes it.
|
||||
if (bufferizesToMemoryWrite(*opOperand) &&
|
||||
|
|
|
@ -1189,3 +1189,39 @@ func @linalg_op_output_cannot_alias_with_input(
|
|||
return %r : tensor<?x?xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
#accesses = [
|
||||
affine_map<(i) -> (i)>
|
||||
]
|
||||
#trait = {
|
||||
indexing_maps = #accesses,
|
||||
iterator_types = ["parallel"]
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @op_is_reading_but_following_ops_are_not
|
||||
// CHECK-SAME: %[[t0:.*]]: memref<?xf32
|
||||
func @op_is_reading_but_following_ops_are_not(
|
||||
%t0 : tensor<?xf32> {linalg.inplaceable = false},
|
||||
%cst : f32)
|
||||
-> tensor<?xf32>
|
||||
{
|
||||
// Make sure that a copy is inserted here.
|
||||
// CHECK: %[[ALLOC:.*]] = memref.alloc
|
||||
// CHECK: linalg.copy(%[[t0]], %[[ALLOC]])
|
||||
// CHECK: linalg.generic {{.*}} outs(%[[ALLOC]] : memref
|
||||
%r0 =linalg.generic #trait outs (%t0 : tensor<?xf32>) {
|
||||
^bb(%0: f32) :
|
||||
%a = arith.addf %cst, %0 : f32
|
||||
linalg.yield %a : f32
|
||||
} -> (tensor<?xf32>)
|
||||
|
||||
// CHECK: linalg.generic {{.*}} outs(%[[ALLOC]] : memref
|
||||
%r1 = linalg.generic #trait outs (%r0 : tensor<?xf32>) {
|
||||
^bb(%0: f32) :
|
||||
linalg.yield %cst : f32
|
||||
} -> (tensor<?xf32>)
|
||||
|
||||
// CHECK: return %[[ALLOC]]
|
||||
return %r1 : tensor<?xf32>
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue