[mlir][linalg][bufferize][NFC] Analyze OpOperands instead of OpResults

With this change, the analysis takes a look at OpOperands instead of OpResults. OpOperands can bufferize out-of-place (even if they have no aliasing OpResults). The analysis does no longer care about OpResults. Previously, only OpResults could bufferize out-of-place, so OpOperands that have no aliasing OpResults were never copied by Comprehensive Bufferize. This does not fit wwell with the new CallOp bufferization that is introduced in a subsequent change. In essence, called FuncOps can then be treated as "black boxes" that may read/write to any bbArg, even if they do not return anything. Differential Revision: https://reviews.llvm.org/D115706
2022-01-08 00:56:13 +09:00 · 2022-01-08 00:56:13 +09:00 · 8e2b6aac32
parent e56a9c9b5b
commit 8e2b6aac32
11 changed files with 527 additions and 467 deletions
--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h
@ -204,10 +204,10 @@ public:

  /// Set the inPlace bufferization spec to true.
  /// Merge result's and operand's aliasing sets and iterate to a fixed point.
-  void bufferizeInPlace(OpResult result, OpOperand &operand);
+  void bufferizeInPlace(OpOperand &operand, BufferizationState &state);

  /// Set the inPlace bufferization spec to false.
-  void bufferizeOutOfPlace(OpResult result);
+  void bufferizeOutOfPlace(OpOperand &operand);

  /// Return true if `v1` and `v2` bufferize to equivalent buffers.
  bool areEquivalentBufferizedValues(Value v1, Value v2) const {
@ -234,10 +234,10 @@ public:
  void applyOnAliases(Value v, function_ref<void(Value)> fun) const;

  /// Mark a value as in-place bufferized.
-  void markInPlace(OpResult v) { inplaceBufferized.insert(v); }
+  void markInPlace(OpOperand &o) { inplaceBufferized.insert(&o); }

  /// Return `true` if a value was marked as in-place bufferized.
-  bool isInPlace(OpResult opResult) const;
+  bool isInPlace(OpOperand &opOperand) const;

 private:
  /// llvm::EquivalenceClasses wants comparable elements. This comparator uses
@ -255,7 +255,7 @@ private:
  EquivalenceClassRangeType getAliases(Value v) const;

  /// Set of all OpResults that were decided to bufferize in-place.
-  llvm::DenseSet<OpResult> inplaceBufferized;
+  llvm::DenseSet<OpOperand *> inplaceBufferized;

  /// Auxiliary structure to store all the values a given value may alias with.
  /// Alias information is "may be" conservative: In the presence of branches, a
@ -382,7 +382,7 @@ public:
  Value lookupBuffer(RewriterBase &rewriter, Value tensor) const;

  /// Return `true` if the given OpResult has been decided to bufferize inplace.
-  bool isInPlace(OpResult opResult) const;
+  bool isInPlace(OpOperand &opOperand) const;

  /// Return the result buffer (memref) for a given OpResult (tensor). Allocate
  /// a new buffer and copy over data from the existing buffer if out-of-place
--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td
+++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td
@ -104,18 +104,14 @@ def BufferizableOpInterface : OpInterface<"BufferizableOpInterface"> {
      >,
      InterfaceMethod<
        /*desc=*/[{
-          Return `true` if the given OpResult must bufferize in-place with its
-          corresponding aliasing OpOperand. Alias sets and inplace attributes
-          will be set up accordingly before making any other bufferization
-          decisions. This method will never be called on OpResults that do not
-          have a tensor type.
-
-          Note: This method may not return `true` if the given OpResult does not
-          have an aliasing OpOperand.
+          Return `true` if the given OpOperand must bufferize in-place. Alias
+          sets and inplace attributes will be set up accordingly before making
+          any other bufferization decisions. This method will never be called on
+          OpOperands that do not have a tensor type.
        }],
        /*retType=*/"bool",
        /*methodName=*/"mustBufferizeInPlace",
-        /*args=*/(ins "OpResult":$opResult,
+        /*args=*/(ins "OpOperand &":$opOperand,
                      "const BufferizationState &":$state),
        /*methodBody=*/"",
        /*defaultImplementation=*/[{
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp
@ -129,27 +129,22 @@ void BufferizationAliasInfo::insertNewBufferEquivalence(Value newValue,
 }

 /// Return `true` if a value was marked as in-place bufferized.
-bool BufferizationAliasInfo::isInPlace(OpResult opResult) const {
-  return inplaceBufferized.contains(opResult);
+bool BufferizationAliasInfo::isInPlace(OpOperand &operand) const {
+  return inplaceBufferized.contains(&operand);
 }

 /// Set the inPlace bufferization spec to true.
-void BufferizationAliasInfo::bufferizeInPlace(OpResult result,
-                                              OpOperand &operand) {
-  LLVM_DEBUG(llvm::dbgs() << "bufferizeInPlace: ");
-  LLVM_DEBUG(result.print(llvm::dbgs()));
-
-  markInPlace(result);
-  aliasInfo.unionSets(result, operand.get());
+void BufferizationAliasInfo::bufferizeInPlace(OpOperand &operand,
+                                              BufferizationState &state) {
+  markInPlace(operand);
+  if (OpResult result = state.getAliasingOpResult(operand))
+    aliasInfo.unionSets(result, operand.get());
 }

 /// Set the inPlace bufferization spec to false.
-void BufferizationAliasInfo::bufferizeOutOfPlace(OpResult result) {
-  LLVM_DEBUG(llvm::dbgs() << "bufferizeOutOfPlace: ");
-  LLVM_DEBUG(result.print(llvm::dbgs()));
-
-  if (inplaceBufferized.contains(result))
-    inplaceBufferized.erase(result);
+void BufferizationAliasInfo::bufferizeOutOfPlace(OpOperand &operand) {
+  assert(!inplaceBufferized.contains(&operand) &&
+         "OpOperand was already decided to bufferize inplace");
 }

 /// Apply `fun` to all the members of the equivalence class of `v`.
@ -339,16 +334,13 @@ mlir::linalg::comprehensive_bufferize::BufferizationState::BufferizationState(
  op->walk([&](BufferizableOpInterface bufferizableOp) {
    if (!options.isOpAllowed(bufferizableOp))
      return WalkResult::skip();
-    for (OpResult opResult : bufferizableOp->getOpResults()) {
-      if (opResult.getType().isa<TensorType>())
-        if (bufferizableOp.mustBufferizeInPlace(opResult, *this)) {
-          SmallVector<OpOperand *> operands =
-              bufferizableOp.getAliasingOpOperand(opResult, *this);
-          assert(!operands.empty() &&
-                 "expected that OpResult has aliasing OpOperand");
-          for (OpOperand *operand : operands)
-            aliasInfo.unionAliasSets(operand->get(), opResult);
-          aliasInfo.markInPlace(opResult);
+    for (OpOperand &opOperand : bufferizableOp->getOpOperands()) {
+      if (opOperand.get().getType().isa<TensorType>())
+        if (bufferizableOp.mustBufferizeInPlace(opOperand, *this)) {
+          if (OpResult opResult =
+                  bufferizableOp.getAliasingOpResult(opOperand, *this))
+            aliasInfo.unionAliasSets(opOperand.get(), opResult);
+          aliasInfo.markInPlace(opOperand);
        }
    }
    return WalkResult::advance();
@ -380,7 +372,7 @@ mlir::linalg::comprehensive_bufferize::BufferizationState::getResultBuffer(
    return FailureOr<Value>(op->emitError("result buffer is ambiguous"));

  // If bufferizing out-of-place, allocate a new buffer.
-  if (!aliasInfo.isInPlace(result)) {
+  if (!aliasInfo.isInPlace(*opOperand)) {
    // Ops with multiple aliasing operands can currently not bufferize
    // out-of-place.
    assert(
@ -624,8 +616,8 @@ Value mlir::linalg::comprehensive_bufferize::BufferizationState::lookupBuffer(
 }

 bool mlir::linalg::comprehensive_bufferize::BufferizationState::isInPlace(
-    OpResult opResult) const {
-  return aliasInfo.isInPlace(opResult);
+    OpOperand &opOperand) const {
+  return aliasInfo.isInPlace(opOperand);
 }

 MemRefType mlir::linalg::comprehensive_bufferize::getContiguousMemRefType(
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp
@ -48,6 +48,19 @@ struct ToMemrefOpInterface
    return true;
  }

+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               const BufferizationState &state) const {
+    // It is unknown whether the resulting MemRef will be written or not.
+    return true;
+  }
+
+  bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
+                            const BufferizationState &state) const {
+    // ToMemrefOps always bufferize inplace.
+    // TODO: Remove ToMemrefOps from the analysis.
+    return true;
+  }
+
  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
                               const BufferizationState &state) const {
    return OpResult();
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
@ -74,24 +74,25 @@ static bool isaTensor(Type t) { return t.isa<TensorType>(); }
 //===----------------------------------------------------------------------===//

 /// Attribute marker to specify op results that can be bufferized inPlace.
-constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_results_attr__";
+constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_operands_attr__";

-/// Mark whether OpResult can actually be bufferized inplace.
-/// If `inPlace` is `true`, the use-def chain analysis has guaranteed that no
-/// subsequent write would occur to the bufferized tensor value (i.e. the result
-/// can be bufferized inplace).
-static void setInPlaceOpResult(OpResult opResult, bool inPlace) {
-  if (!opResult)
-    return;
-
-  Operation *op = opResult.getOwner();
+/// Mark whether OpOperand will be bufferized inplace.
+static void setInPlaceOpOperand(OpOperand &opOperand, bool inPlace) {
+  Operation *op = opOperand.getOwner();
  auto attr =
      op->getAttr(kInPlaceResultsAttrName).dyn_cast_or_null<ArrayAttr>();
-  SmallVector<StringRef> inPlaceVector =
-      attr ? SmallVector<StringRef>(
-                 llvm::to_vector<4>(attr.getAsValueRange<StringAttr>()))
-           : SmallVector<StringRef>(op->getNumResults(), "false");
-  inPlaceVector[opResult.getResultNumber()] = inPlace ? "true" : "false";
+  SmallVector<StringRef> inPlaceVector;
+  if (attr) {
+    inPlaceVector = SmallVector<StringRef>(
+        llvm::to_vector<4>(attr.getAsValueRange<StringAttr>()));
+  } else {
+    inPlaceVector = SmallVector<StringRef>(op->getNumOperands(), "none");
+    for (OpOperand &opOperand : op->getOpOperands())
+      if (opOperand.get().getType().isa<TensorType>())
+        inPlaceVector[opOperand.getOperandNumber()] = "false";
+  }
+
+  inPlaceVector[opOperand.getOperandNumber()] = inPlace ? "true" : "false";
  op->setAttr(kInPlaceResultsAttrName,
              OpBuilder(op).getStrArrayAttr(inPlaceVector));
 }
@ -104,21 +105,11 @@ static void setInPlaceOpResult(OpResult opResult, bool inPlace) {
 static bool isInplaceMemoryWrite(OpOperand &opOperand,
                                 const BufferizationAliasInfo &aliasInfo,
                                 BufferizationState &state) {
-  // The analysis does not know what happens to the result of a ToMemrefOp, so
-  // we assume that it is written to.
-  // TODO: This is a conservative implementation. This rule will have to be
-  // relaxed for partial bufferization.
-  if (isa<bufferization::ToMemrefOp>(opOperand.getOwner()))
-    return true;
-  // OpOperands without an aliasing OpResult do not write.
-  OpResult opResult = state.getAliasingOpResult(opOperand);
-  if (!opResult)
-    return false;
  // OpOperands that do not bufferize to a memory write do not write in-place.
  if (!state.bufferizesToMemoryWrite(opOperand))
    return false;
  // Check current bufferization decisions.
-  return aliasInfo.isInPlace(opResult);
+  return aliasInfo.isInPlace(opOperand);
 }

 /// Return true if, under current bufferization decisions, the buffer of `value`
@ -128,8 +119,8 @@ static bool aliasesNonWritableBuffer(Value value,
                                     BufferizationState &state) {
  bool foundNonWritableBuffer = false;
  aliasInfo.applyOnAliases(value, [&](Value v) {
-    // Query BufferizableOpInterface to see if the OpResult is writable.
-    // TODO: Out-of-place bufferized OpResult could be considered writable.
+    // Query BufferizableOpInterface to see if the value is writable.
+    // TODO: Out-of-place bufferized value could be considered writable.
    if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(v))
      if (bufferizableOp && bufferizableOp.isWritable(v, state))
        return;
@ -309,8 +300,8 @@ static bool hasReadAfterWriteInterference(
  return false;
 }

-/// Return true if bufferizing result inplace would create a conflict. A read R
-/// and a write W of the same alias set is a conflict if inplace bufferization
+/// Return true if bufferizing `operand` inplace would create a conflict. A read
+/// R and a write W of the same alias set is a conflict if inplace bufferization
 /// of W changes the value read by R to a value different from the one that
 /// would be expected by tracing back R's origin through SSA use-def chains.
 /// A conflict can only be introduced by a new alias and/or an inplace
@ -338,21 +329,10 @@ static bool hasReadAfterWriteInterference(
 /// Note: If `checkConsistencyOnly`, this function may be called with a null
 /// OpResult. In that case, only the consistency of bufferization decisions
 /// involving aliases of the given OpOperand are checked.
-bool wouldCreateReadAfterWriteInterference(
-    OpOperand &operand, OpResult result, const DominanceInfo &domInfo,
-    BufferizationState &state, const BufferizationAliasInfo &aliasInfo,
+static bool wouldCreateReadAfterWriteInterference(
+    OpOperand &operand, const DominanceInfo &domInfo, BufferizationState &state,
+    const BufferizationAliasInfo &aliasInfo,
    bool checkConsistencyOnly = false) {
-#ifndef NDEBUG
-  if (result) {
-    SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(result);
-    assert(llvm::find(opOperands, &operand) != opOperands.end() &&
-           "operand and result do not match");
-  } else {
-    assert(checkConsistencyOnly &&
-           "result not provided, can only check consistency");
-  }
-#endif // NDEBUG
-
  // Helper function to iterate on aliases of `root` and capture the reads.
  auto getAliasingReads = [&](DenseSet<OpOperand *> &res, Value root) {
    aliasInfo.applyOnAliases(root, [&](Value alias) {
@ -376,11 +356,11 @@ bool wouldCreateReadAfterWriteInterference(
  // Collect reads and writes of all aliases of OpOperand and OpResult.
  DenseSet<OpOperand *> usesRead, usesWrite;
  getAliasingReads(usesRead, operand.get());
-  if (result)
-    getAliasingReads(usesRead, result);
  getAliasingInplaceWrites(usesWrite, operand.get());
-  if (result)
+  if (OpResult result = state.getAliasingOpResult(operand)) {
+    getAliasingReads(usesRead, result);
    getAliasingInplaceWrites(usesWrite, result);
+  }
  if (!checkConsistencyOnly && state.bufferizesToMemoryWrite(operand))
    usesWrite.insert(&operand);

@ -388,18 +368,12 @@ bool wouldCreateReadAfterWriteInterference(
                                       aliasInfo);
 }

-/// Return true if bufferizing `opOperand` inplace with `opResult` would create
-/// a write to a non-writable buffer.
+/// Return true if bufferizing `opOperand` inplace would create a write to a
+/// non-writable buffer.
 static bool
-wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult,
+wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand,
                                    const BufferizationAliasInfo &aliasInfo,
                                    BufferizationState &state) {
-#ifndef NDEBUG
-  SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(opResult);
-  assert(llvm::find(opOperands, &opOperand) != opOperands.end() &&
-         "operand and result do not match");
-#endif // NDEBUG
-
  // Certain buffers are not writeable:
  //   1. A function bbArg that is not inplaceable or
  //   2. A constant op.
@ -409,9 +383,12 @@ wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult,
    return false;

  // This is a problem only if the buffer is written to via some alias.
-  bool hasWrite = aliasesInPlaceWrite(opResult, aliasInfo, state) ||
-                  aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) ||
+  bool hasWrite = aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) ||
                  state.bufferizesToMemoryWrite(opOperand);
+
+  if (OpResult opResult = state.getAliasingOpResult(opOperand))
+    hasWrite |= aliasesInPlaceWrite(opResult, aliasInfo, state);
+
  return hasWrite;
 }

@ -419,30 +396,23 @@ wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult,
 // Bufferization analyses.
 //===----------------------------------------------------------------------===//

-/// Determine if `operand` can be bufferized in-place with `result`.
+/// Determine if `operand` can be bufferized in-place.
 static LogicalResult bufferizableInPlaceAnalysisImpl(
-    OpOperand &operand, OpResult result, BufferizationAliasInfo &aliasInfo,
+    OpOperand &operand, BufferizationAliasInfo &aliasInfo,
    BufferizationState &state, const DominanceInfo &domInfo) {
-#ifndef NDEBUG
-  SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(result);
-  assert(llvm::find(opOperands, &operand) != opOperands.end() &&
-         "operand and result do not match");
-#endif // NDEBUG
-
  bool foundInterference =
-      wouldCreateWriteToNonWritableBuffer(operand, result, aliasInfo, state) ||
-      wouldCreateReadAfterWriteInterference(operand, result, domInfo, state,
-                                            aliasInfo);
+      wouldCreateWriteToNonWritableBuffer(operand, aliasInfo, state) ||
+      wouldCreateReadAfterWriteInterference(operand, domInfo, state, aliasInfo);

  if (foundInterference)
-    aliasInfo.bufferizeOutOfPlace(result);
+    aliasInfo.bufferizeOutOfPlace(operand);
  else
-    aliasInfo.bufferizeInPlace(result, operand);
+    aliasInfo.bufferizeInPlace(operand, state);

  return success();
 }

-/// Analyze the `ops` to determine which OpResults are inplaceable. Walk ops in
+/// Analyze the `ops` to determine which OpOperands are inplaceable. Walk ops in
 /// reverse and bufferize ops greedily. This is a good starter heuristic.
 ///
 /// Even if an op does not read or write, it may still create an alias when
@ -478,11 +448,9 @@ static LogicalResult inPlaceAnalysis(SmallVector<Operation *> &ops,
    for (OpOperand &opOperand : op->getOpOperands())
      if (opOperand.get().getType().isa<TensorType>())
        if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
-          if (OpResult opResult =
-                  bufferizableOp.getAliasingOpResult(opOperand, state))
-            if (failed(bufferizableInPlaceAnalysisImpl(
-                    opOperand, opResult, aliasInfo, state, domInfo)))
-              return failure();
+          if (failed(bufferizableInPlaceAnalysisImpl(opOperand, aliasInfo,
+                                                     state, domInfo)))
+            return failure();

  return success();
 }
@ -520,15 +488,12 @@ static void equivalenceAnalysis(SmallVector<Operation *> &ops,
    if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
      for (OpResult opResult : op->getOpResults())
        if (opResult.getType().isa<TensorType>())
-          if (aliasInfo.isInPlace(opResult)) {
-            SmallVector<OpOperand *> opOperands =
-                bufferizableOp.getAliasingOpOperand(opResult, state);
-            if (!opOperands.empty())
+          for (OpOperand *opOperand :
+               bufferizableOp.getAliasingOpOperand(opResult, state))
+            if (state.isInPlace(*opOperand))
              if (bufferizableOp.bufferRelation(opResult, aliasInfo, state) ==
                  BufferRelation::Equivalent)
-                for (OpOperand *opOperand : opOperands)
-                  aliasInfo.unionEquivalenceClasses(opResult, opOperand->get());
-          }
+                aliasInfo.unionEquivalenceClasses(opResult, opOperand->get());
 }

 /// Analyze equivalence of tied OpResult/OpOperand pairs of all ops contained
@ -559,15 +524,12 @@ checkAliasInfoConsistency(Operation *op, const DominanceInfo &domInfo,
    if (auto bufferizableOp = options.dynCastBufferizableOp(op))
      for (OpOperand &opOperand : op->getOpOperands())
        if (opOperand.get().getType().isa<TensorType>()) {
-          OpResult opResult =
-              bufferizableOp.getAliasingOpResult(opOperand, state);
          if (wouldCreateReadAfterWriteInterference(
-                  opOperand, opResult, domInfo, state, aliasInfo,
+                  opOperand, domInfo, state, aliasInfo,
                  /*checkConsistencyOnly=*/true)) {
-            // This error can happen for two reasons. Either the input IR
-            // already has a read-after-write conflict. Or certain
-            // "mustBufferizeInPlace" interface methods are implemented
-            // incorrectly.
+            // This error can happen if certain "mustBufferizeInPlace" interface
+            // methods are implemented incorrectly, such that the IR already has
+            // a RaW conflict before making any bufferization decisions.
            inconsistentOp = op;
            return WalkResult::interrupt();
          }
@ -576,10 +538,6 @@ checkAliasInfoConsistency(Operation *op, const DominanceInfo &domInfo,
  });

  if (walkResult.wasInterrupted())
-    // This can currently happen in one situation: When a tensor is passed into
-    // a ToMemrefOp and read by another op consecutively. ToMemrefOps are
-    // currently handled conservatively. Once a tensor is passed into a
-    // ToMemrefOp, it may longer be read.
    return inconsistentOp->emitError("input IR has RaW conflict");
  return success();
 }
@ -587,11 +545,13 @@ checkAliasInfoConsistency(Operation *op, const DominanceInfo &domInfo,
 /// Annotate the IR with the result of the analysis. For testing/debugging only.
 static void
 annotateOpsWithBufferizationMarkers(Operation *op,
-                                    const BufferizationAliasInfo &aliasInfo) {
+                                    const BufferizationAliasInfo &aliasInfo,
+                                    BufferizationState &state) {
  op->walk([&](Operation *op) {
-    for (OpResult opResult : op->getResults())
-      if (opResult.getType().isa<TensorType>())
-        setInPlaceOpResult(opResult, aliasInfo.isInPlace(opResult));
+    if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
+      for (OpOperand &opOperand : op->getOpOperands())
+        if (opOperand.get().getType().isa<TensorType>())
+          setInPlaceOpOperand(opOperand, aliasInfo.isInPlace(opOperand));
  });
 }

@ -688,7 +648,7 @@ LogicalResult mlir::linalg::comprehensive_bufferize::runComprehensiveBufferize(

  // Annotate operations if we only want to report the analysis.
  if (options.testAnalysisOnly) {
-    annotateOpsWithBufferizationMarkers(op, aliasInfo);
+    annotateOpsWithBufferizationMarkers(op, aliasInfo, state);
    return success();
  }

--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp
@ -397,6 +397,14 @@ struct YieldOpInterface
    return OpResult();
  }

+  bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
+                            const BufferizationState &state) const {
+    // Yield operands always bufferize inplace. Otherwise, an alloc + copy
+    // may be generated inside the block. We should not return/yield allocations
+    // when possible.
+    return true;
+  }
+
  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                          const BufferizationState &state) const {
    auto yieldOp = cast<linalg::YieldOp>(op);
@ -447,22 +455,26 @@ mlir::linalg::comprehensive_bufferize::linalg_ext::InitTensorEliminationStep::

  WalkResult status = op->walk([&](Operation *op) {
    for (OpOperand &operand : op->getOpOperands()) {
+      // Skip operands that do not bufferize inplace.
+      if (!aliasInfo.isInPlace(operand))
+        continue;
      // Is this a matching OpOperand?
      if (!anchorMatchFunc(operand))
        continue;
-
      SetVector<Value> maybeInitTensor =
          state.findValueInReverseUseDefChain(operand.get(), [&](Value val) {
            // Continue traversal until this function returns true.
            OpResult opResult = val.dyn_cast<OpResult>();
            if (!opResult)
              return true;
-            if (!aliasInfo.isInPlace(opResult))
-              return true;
-            // Only equivalent tensors are supported at the moment.
-            // TODO: Support cases such as extract_slice(init_tensor).
            SmallVector<OpOperand *> opOperands =
                state.getAliasingOpOperand(opResult);
+            if (!llvm::all_of(opOperands, [&](OpOperand *operand) {
+                  return aliasInfo.isInPlace(*operand);
+                }))
+              return true;
+            // Only equivalent tensors are supported at the moment.
+            // TODO: Support cases such as extract_slice(init_tensor)
            return !llvm::all_of(opOperands, [&](OpOperand *operand) {
              return aliasInfo.areEquivalentBufferizedValues(operand->get(),
                                                             opResult);
@ -542,7 +554,7 @@ LogicalResult mlir::linalg::comprehensive_bufferize::linalg_ext::
        if (!insertSliceOp)
          return false;
        // Only inplace bufferized InsertSliceOps are eligible.
-        if (!aliasInfo.isInPlace(insertSliceOp->getOpResult(0)))
+        if (!aliasInfo.isInPlace(insertSliceOp->getOpOperand(1) /*dest*/))
          return false;
        return &operand == &insertSliceOp->getOpOperand(0) /*source*/;
      },
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
@ -590,6 +590,11 @@ struct CallOpInterface
    return true;
  }

+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               const BufferizationState &state) const {
+    return false;
+  }
+
  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
                               const BufferizationState &state) const {
    // CallOpInterface is special, it needs to wait for the callee to be
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp
@ -42,14 +42,6 @@ struct ExecuteRegionOpInterface
    return {&yieldOp->getOpOperand(resultNum)};
  }

-  bool mustBufferizeInPlace(Operation *op, OpResult opResult,
-                            const BufferizationState &state) const {
-    // ExecuteRegionOp results always bufferize in-place. Since they have no
-    // OpOperands, they are mostly ignored by the analysis once alias sets are
-    // set up.
-    return true;
-  }
-
  // TODO: For better bufferization results, this could return `true` only if
  // there is a memory write in the region.
  bool isMemoryWrite(Operation *op, OpResult opResult,
@ -129,13 +121,6 @@ struct IfOpInterface
    return true;
  }

-  bool mustBufferizeInPlace(Operation *op, OpResult opResult,
-                            const BufferizationState &state) const {
-    // IfOp results always bufferize in-place. Since they have no OpOperands,
-    // they are mostly ignored by the analysis once alias sets are set up.
-    return true;
-  }
-
  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                          const BufferizationState &state) const {
    auto ifOp = cast<scf::IfOp>(op);
@ -430,9 +415,21 @@ struct YieldOpInterface

  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
                               const BufferizationState &state) const {
+    if (isa<scf::IfOp>(op->getParentOp()))
+      return op->getParentOp()->getResult(opOperand.getOperandNumber());
+    if (isa<scf::ExecuteRegionOp>(op->getParentOp()))
+      return op->getParentOp()->getResult(opOperand.getOperandNumber());
    return OpResult();
  }

+  bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
+                            const BufferizationState &state) const {
+    // Yield operands always bufferize inplace. Otherwise, an alloc + copy
+    // may be generated inside the block. We should not return/yield allocations
+    // when possible.
+    return true;
+  }
+
  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                          const BufferizationState &state) const {
    auto yieldOp = cast<scf::YieldOp>(op);
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp
@ -149,7 +149,7 @@ struct ExtractSliceOpInterface
        extractSliceOp.result().getType().cast<RankedTensorType>();

    // If not inplaceable, alloc.
-    bool inplace = state.isInPlace(extractSliceOp->getResult(0));
+    bool inplace = state.isInPlace(extractSliceOp->getOpOperand(0));
    Value alloc;
    if (!inplace) {
      FailureOr<Value> allocOrFailure =
--- a/mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
@ -17,14 +17,15 @@ func @fill_extract_matmul_1234(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -43,14 +44,15 @@ func @fill_extract_matmul_1243(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -68,14 +70,15 @@ func @fill_extract_matmul_1324(%arg0: tensor<518x518xf32> {linalg.buffer_layout
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -93,14 +96,15 @@ func @fill_extract_matmul_1342(%arg0: tensor<518x518xf32> {linalg.buffer_layout
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -118,15 +122,15 @@ func @fill_extract_matmul_1423(%arg0: tensor<518x518xf32> {linalg.buffer_layout
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -144,15 +148,15 @@ func @fill_extract_matmul_1432(%arg0: tensor<518x518xf32> {linalg.buffer_layout
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -171,14 +175,15 @@ func @fill_extract_matmul_2134(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -197,14 +202,15 @@ func @fill_extract_matmul_2143(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -223,15 +229,15 @@ func @fill_extract_matmul_2314(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -250,14 +256,15 @@ func @fill_extract_matmul_2341(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -276,14 +283,15 @@ func @fill_extract_matmul_2413(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -302,14 +310,15 @@ func @fill_extract_matmul_2431(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -328,14 +337,15 @@ func @fill_extract_matmul_3124(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -354,14 +364,15 @@ func @fill_extract_matmul_3142(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -380,14 +391,15 @@ func @fill_extract_matmul_3214(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -406,14 +418,15 @@ func @fill_extract_matmul_3241(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -432,14 +445,15 @@ func @fill_extract_matmul_3412(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -458,14 +472,15 @@ func @fill_extract_matmul_3421(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -484,15 +499,15 @@ func @fill_extract_matmul_4123(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -511,14 +526,15 @@ func @fill_extract_matmul_4132(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -537,15 +553,15 @@ func @fill_extract_matmul_4213(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -564,15 +580,15 @@ func @fill_extract_matmul_4231(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -591,14 +607,15 @@ func @fill_extract_matmul_4312(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
@ -617,14 +634,15 @@ func @fill_extract_matmul_4321(
  %cst_0 = arith.constant 1.000000e+00 : f32
  %0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>

-  // CHECK: {__inplace_results_attr__ = ["false"]}
-  // CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
+  // CHECK: {__inplace_operands_attr__ = ["false"]}
  %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["true"]}
  %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
+  // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
  %1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
-
+  // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
  return %5 : tensor<256x256xf32>
 }
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir