[mlir][Linalg] Revisit RAW dependence interference in comprehensive bufferize.

Previously, comprehensive bufferize would consider all aliasing reads and writes to
the result buffer and matching operand. This resulted in spurious dependences
being considered and resulted in too many unnecessary copies.

Instead, this revision revisits the gathering of read and write alias sets.
This results in fewer alloc and copies.
An exhaustive test cases is added that considers all possible permutations of
`matmul(extract_slice(fill), extract_slice(fill), ...)`.
This commit is contained in:
Nicolas Vasilache 2021-09-20 14:03:55 +00:00
parent c8eed8f9a7
commit 0d2c54e851
3 changed files with 784 additions and 86 deletions

View File

@ -743,16 +743,39 @@ public:
/// Set the inPlace bufferization spec to false.
void bufferizeOutOfPlace(OpResult result);
/// Return true if it is possible to find an inplace write W among the uses of
/// aliasInfo[result], and a read R among the uses of aliasInfo[result],
/// such that W and R interfere.
/// Return true if it is possible to find an inplace write W among `usesWrite`
/// and a read R among `usesRead`, such that W and R interfere.
/// Such a (W, R) pair is an interference to the inplace bufferization of
/// rootWrite when:
/// opResult when:
/// 1. R is not known properly dominate W (i.e. the effects of the write may
/// be visible from R).
/// 2. one cannot find an intermediate clobbering write `C` to W, such that
/// C interleaved between W and R (i.e. W -> C -> R where -> denotes
/// dominance).
bool wouldCreateReadAfterWriteInterference(
Operation *opToBufferize, DenseSet<OpOperand *> &usesRead,
DenseSet<OpOperand *> &usesWrite, const DominanceInfo &domInfo) const;
/// Assume that result bufferizes in-place with one of the operation's
/// operands. Return true if it is possible to find an inplace write W (resp.
/// a read R) among the uses of `aliasInfo[result]`, and a read R (resp. an
/// inplace write W) among the uses of
/// `aliasInfo[getAliasingOpOperand(result)]`, such that W and R interfere.
/// Interference detection is needed to determine which cases may bufferize
/// inplace without interferences. Such cases comprise:
///
/// ```
/// %0 = op_to_bufferize(%1)
/// read(%1)
///
/// %0 = op_to_bufferize(%1)
/// write(%0)
/// read(%1)
///
/// %0 = op_to_bufferize(%1)
/// write(%1)
/// read(%0)
/// ```
bool
wouldCreateReadAfterWriteInterference(OpResult result,
const DominanceInfo &domInfo) const;
@ -828,29 +851,29 @@ private:
///
/// Case discussion:
/// ================
/// Case 1: rootRead is produced by opToBufferize,
/// Case 2: rootWrite is produced by opToBufferize,
/// Case 1: opOperand is produced by opToBufferize,
/// Case 2: opResult is produced by opToBufferize,
/// Common case:
/// - aliasingReadOp is a read to an alias of rootRead.
/// - aliasingWriteOp is an inplace write to an alias of rootWrite.
/// - aliasingReadOp is a read to an alias of opOperand.
/// - aliasingWriteOp is an inplace write to an alias of opResult.
/// - aliasingWriteOp dominates aliasingReadOp.
///
/// ```
/// // Either case 1:
/// %rootRead = opToBufferize(%rootWrite)
/// aliasingWriteOp(%aliasingWrite = alias(%rootWrite)) // inplace
/// aliasingReadOp( %aliasingRead = alias(%rootRead))
/// %opOperand = opToBufferize(%opResult)
/// aliasingWriteOp(%aliasingWrite = alias(%opResult)) // inplace
/// aliasingReadOp( %aliasingRead = alias(%opOperand))
/// ```
///
/// ```
/// // Or case 2:
/// %rootWrite = opToBufferize(%rootRead)
/// aliasingWriteOp(%aliasingWrite = alias(%rootWrite)) // inplace
/// aliasingReadOp( %aliasingRead = alias(%rootRead))
/// %opResult = opToBufferize(%opOperand)
/// aliasingWriteOp(%aliasingWrite = alias(%opResult)) // inplace
/// aliasingReadOp( %aliasingRead = alias(%opOperand))
/// ```
///
/// Capture possible cases where `aliasingWriteOp(alias(%rootWrite))` has no
/// visible effect on `aliasingReadOp(alias(%rootRead))`.
/// Capture possible cases where `aliasingWriteOp(alias(%opResult))` has no
/// visible effect on `aliasingReadOp(alias(%opOperand))`.
bool isClobberedWriteBeforeRead(Operation *opToBufferize,
OpOperand &aliasingRead,
OpOperand &aliasingWrite,
@ -969,71 +992,11 @@ void BufferizationAliasInfo::bufferizeOutOfPlace(OpResult result) {
setInPlaceOpResult(result, InPlaceSpec::False);
}
/// Return true if it is possible to find an inplace write W among the uses of
/// aliasInfo[result], and a read R among the uses of aliasInfo[result],
/// such that W and R interfere.
/// Such a (W, R) pair is an interference to the inplace bufferization of
/// rootWrite when:
/// 1. R is not known to properly dominate W (i.e. the effects of the write
/// may be visible from R).
/// 2. one cannot find an intermediate clobbering write `C` to W, such that
/// C interleaved between W and R (i.e. W -> C -> R where -> denotes
/// dominance).
/// Return true if it is possible to find an inplace write W among `usesWrite`
/// and a read R among `usesRead`, such that W and R interfere.
bool BufferizationAliasInfo::wouldCreateReadAfterWriteInterference(
OpResult result, const DominanceInfo &domInfo) const {
Optional<OpOperand *> maybeAliasingOperand = getAliasingOpOperand(result);
if (!maybeAliasingOperand)
return false;
Operation *opToBufferize = result.getDefiningOp();
Value rootWrite = result;
Value rootRead = (*maybeAliasingOperand)->get();
LDBG("----Start wouldCreateReadAfterWriteInterference\n");
LDBG("--------consider all aliases to root read: " << printValueInfo(rootRead)
<< "\n");
LDBG("--------consider all aliases to root write: "
<< printValueInfo(rootWrite) << "\n");
// If `result` were to be bufferized in place, all the aliases of `rootRead`
// and `rootWrite` would immediately alias with each other and could create
// RaW hazards.
// Therefore, for each alias of either `rootRead` or `rootWrite`, we collect:
// 1. all of the reads of any alias.
// 2. all the write uses of any alias that are already known to bufferize
// inplace.
// 3. all the write uses of any alias that belong to `opToBufferize`: as if
// `opToBufferize` were bufferized inplace.
DenseSet<OpOperand *> usesRead, usesWrite;
for (Value v : {rootRead, rootWrite}) {
for (Value alias : getAliases(v)) {
for (auto &use : alias.getUses()) {
// Read to a value that aliases v.
if (bufferizesToMemoryRead(use)) {
LDBG("------------bufferizesToMemoryRead: "
<< use.getOwner()->getName().getStringRef() << "\n");
usesRead.insert(&use);
}
// Inplace write to a value that aliases v.
if (bufferizesToMemoryWrite(use, InPlaceSpec::True)) {
LDBG("------------bufferizesToMemoryWrite: "
<< use.getOwner()->getName().getStringRef() << "\n");
usesWrite.insert(&use);
}
}
}
}
// Additionally: consider writes to a value that aliases rootRead and belongs
// to opToBufferize. This simulates that opToBufferize bufferizes inplace.
for (OpOperand &use : opToBufferize->getOpOperands()) {
if (aliasInfo.isEquivalent(rootRead, use.get()) &&
bufferizesToMemoryWrite(use)) {
LDBG("------------bufferizesToMemoryWrite: "
<< use.getOwner()->getName().getStringRef() << "\n");
usesWrite.insert(&use);
}
}
Operation *opToBufferize, DenseSet<OpOperand *> &usesRead,
DenseSet<OpOperand *> &usesWrite, const DominanceInfo &domInfo) const {
for (OpOperand *uRead : usesRead) {
Operation *aliasingReadOp = uRead->getOwner();
LDBG("----++++aliasRead -> #"
@ -1061,7 +1024,8 @@ bool BufferizationAliasInfo::wouldCreateReadAfterWriteInterference(
// At this point, aliasingWriteOp properly dominates aliasingReadOp or
// there is no clear dominance and we need to be conservative.
LDBG("---->found RaW interference between:\n");
LDBG(" Source value -> " << printValueInfo(rootRead) << '\n');
LDBG(" OpToBufferize -> " << printOperationInfo(opToBufferize)
<< '\n');
LDBG(" Interfering write -> #"
<< uWrite->getOperandNumber() << ":"
<< printOperationInfo(aliasingWriteOp) << '\n');
@ -1073,7 +1037,6 @@ bool BufferizationAliasInfo::wouldCreateReadAfterWriteInterference(
LDBG("---->clobbered! -> skip\n");
continue;
}
LDBG("---->not clobbered -> found an interference\n");
return true;
}
@ -1082,6 +1045,111 @@ bool BufferizationAliasInfo::wouldCreateReadAfterWriteInterference(
return false;
}
/// Return true if it is possible to find an inplace write W among the uses of
/// aliasInfo[result], and a read R among the uses of aliasInfo[result],
/// such that W and R interfere.
/// Such a (W, R) pair is an interference to the inplace bufferization of
/// opResult when:
/// 1. R is not known to properly dominate W (i.e. the effects of the write
/// may be visible from R).
/// 2. one cannot find an intermediate clobbering write `C` to W, such that
/// C interleaved between W and R (i.e. W -> C -> R where -> denotes
/// dominance).
bool BufferizationAliasInfo::wouldCreateReadAfterWriteInterference(
OpResult result, const DominanceInfo &domInfo) const {
Optional<OpOperand *> maybeAliasingOperand = getAliasingOpOperand(result);
if (!maybeAliasingOperand)
return false;
Operation *opToBufferize = result.getDefiningOp();
Value opResult = result;
Value opOperand = (*maybeAliasingOperand)->get();
LDBG("----Start wouldCreateReadAfterWriteInterference\n");
LDBG("--------consider all aliases to root read: "
<< printValueInfo(opOperand) << "\n");
LDBG("--------consider all aliases to root write: "
<< printValueInfo(opResult) << "\n");
/// Helper function to iterate on aliases of `root` and capture the reads.
auto getAliasingReads = [&](DenseSet<OpOperand *> &res, Value root) {
for (Value alias : getAliases(root)) {
for (auto &use : alias.getUses()) {
// Read to a value that aliases root.
if (bufferizesToMemoryRead(use)) {
LDBG("------------bufferizesToMemoryRead: "
<< use.getOwner()->getName().getStringRef() << "\n");
res.insert(&use);
}
}
}
};
/// Helper function to iterate on aliases of `root` and capture the writes.
auto getAliasingInplaceWrites = [&](DenseSet<OpOperand *> &res, Value root) {
for (Value alias : getAliases(root)) {
for (auto &use : alias.getUses()) {
// Inplace write to a value that aliases root.
if (bufferizesToMemoryWrite(use, InPlaceSpec::True)) {
LDBG("------------bufferizesToMemoryWrite: "
<< use.getOwner()->getName().getStringRef() << "\n");
res.insert(&use);
}
}
}
};
// Check if we can find any interference between reads to aliases[`opOperand`]
// and writes to aliases[`opResult`]. This handles the case:
//
// ```
// %0 = op_to_bufferize_maybe_inplace(%1)
// %2 = some_alias(%0)
// inplace_write(%2)
// %3 = some_alias(%1)
// read(%3)
// ```
DenseSet<OpOperand *> usesRead, usesWrite;
LDBG("--------\n");
LDBG("--------Test reads(opOperand) vs writes(opResult)\n");
getAliasingReads(usesRead, opOperand);
getAliasingInplaceWrites(usesWrite, opResult);
// Additionally, `result` is not yet bufferized and we need to check for
// interferences as if it were bufferized inplace: add `maybeAliasingOperand`
// if it is a write. This handles the case:
//
// ```
// %0 = op_to_bufferize_maybe_inplace(%1)
// %2 = some_alias(%1)
// read(%2)
// ```
if (bufferizesToMemoryWrite(**maybeAliasingOperand))
usesWrite.insert(*maybeAliasingOperand);
if (wouldCreateReadAfterWriteInterference(opToBufferize, usesRead, usesWrite,
domInfo))
return true;
// Check if we can find any interference between writes to
// aliases[`opOperand`] and reads to aliases[`opResult`]. This handles the
// case:
//
// ```
// %0 = op_to_bufferize_maybe_inplace(%1)
// %2 = some_alias(%1)
// inplace_write(%2)
// %3 = some_alias(%0)
// read(%3)
// ```
LDBG("--------\n");
LDBG("--------Test reads(opResult) vs writes(opOperand)\n");
usesRead.clear();
usesWrite.clear();
getAliasingReads(usesRead, opResult);
getAliasingInplaceWrites(usesWrite, opOperand);
return wouldCreateReadAfterWriteInterference(opToBufferize, usesRead,
usesWrite, domInfo);
}
/// Return true if the source of a `insertSliceOp` bufferizes to an
/// equivalent ExtractSliceOp that bufferizes inplace.
bool BufferizationAliasInfo::isSourceEquivalentToAMatchingInplaceExtractSliceOp(

View File

@ -0,0 +1,630 @@
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize=test-analysis-only -split-input-file | FileCheck %s
/// All combinations of matmul(fill(extract(init_tensor)), fill(extract(%init_tensor)), %arg2)
/// These should all be inplaceable except the first op.
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_1234(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_1243(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_1324(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_1342(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_1423(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_1432(%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_2134(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_2143(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_2314(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_2341(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_2413(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_2431(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_3124(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_3142(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_3214(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_3241(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_3412(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_3421(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_4123(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_4132(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_4213(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_4231(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_4312(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
// -----
// CHECK-LABEL: func @fill_extract_matmul_
func @fill_extract_matmul_4321(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
%cst_0 = constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}

View File

@ -639,7 +639,7 @@ builtin.func @matmul_on_tensors(
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_results_attr__ = ["false"]}
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_results_attr__ = ["false"]}
// CHECK-SAME: {__inplace_results_attr__ = ["true"]}
%8 = linalg.fill(%cst_0, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%11 = linalg.fill(%cst_1, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
@ -673,9 +673,9 @@ builtin.func @matmul_on_tensors(
%7 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_results_attr__ = ["true"]}
// CHECK-SAME: {__inplace_results_attr__ = ["false"]}
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_results_attr__ = ["false"]
// CHECK-SAME: {__inplace_results_attr__ = ["true"]
%8 = linalg.fill(%cst_0, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
%10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
@ -683,7 +683,7 @@ builtin.func @matmul_on_tensors(
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_results_attr__ = ["true"]}
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_results_attr__ = ["false"]
// CHECK-SAME: {__inplace_results_attr__ = ["true"]
%11 = linalg.fill(%cst_1, %7) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
%12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
%13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>