[mlir][linalg][bufferize][NFC] Move tensor interface impl to new build target

This makes ComprehensiveBufferize entirely independent of the tensor dialect. Differential Revision: https://reviews.llvm.org/D114217
2021-11-24 18:20:00 +09:00 · 2021-11-24 18:20:00 +09:00 · bb273a35a0
parent 8ef460fc51
commit bb273a35a0
10 changed files with 555 additions and 462 deletions
--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h
@ -322,6 +322,26 @@ struct PostAnalysisStep {
                            SmallVector<Operation *> &newOps) = 0;
 };

+/// Return a contiguous MemRefType (i.e. with canonical/empty layout map)
+/// with the same shape as `shapedType` and specified `layout` and
+/// `addressSpace`.
+MemRefType getContiguousMemRefType(ShapedType shapedType,
+                                   MemRefLayoutAttrInterface layout = {},
+                                   Attribute memorySpace = {});
+
+/// Return a contiguous MemRefType (i.e. with canonical/empty layout map)
+/// with the same shape as `shapedType` and specified `layout` and
+/// `addressSpace` or an UnrankedMemRefType otherwise.
+Type getContiguousOrUnrankedMemRefType(Type type,
+                                       MemRefLayoutAttrInterface layout = {},
+                                       Attribute memorySpace = {});
+
+/// Return a MemRefType to which the `tensorType` can be bufferized in a
+/// composable fashion. The layout must be the most dynamic possible and
+/// canonicalize away once bufferization is finished.
+MemRefType getDynamicMemRefType(RankedTensorType tensorType,
+                                unsigned addressSpace = 0);
+
 } // namespace comprehensive_bufferize
 } // namespace linalg
 } // namespace mlir
--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.h
+++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.h
@ -1,3 +1,11 @@
+//===- LinalgInterfaceImpl.h - Linalg Impl. of BufferizableOpInterface ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
 #ifndef MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_LINALG_INTERFACE_IMPL_H
 #define MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_LINALG_INTERFACE_IMPL_H

--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h
+++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h
@ -0,0 +1,27 @@
+//===- LinalgInterfaceImpl.h - Linalg Impl. of BufferizableOpInterface ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_TENSOR_INTERFACE_IMPL_H
+#define MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_TENSOR_INTERFACE_IMPL_H
+
+namespace mlir {
+
+class DialectRegistry;
+
+namespace linalg {
+namespace comprehensive_bufferize {
+namespace tensor_ext {
+
+void registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry);
+
+} // namespace tensor_ext
+} // namespace comprehensive_bufferize
+} // namespace linalg
+} // namespace mlir
+
+#endif // MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_TENSOR_INTERFACE_IMPL_H
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp
@ -12,6 +12,7 @@
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Operation.h"
+#include "mlir/IR/TypeUtilities.h"
 #include "mlir/IR/Value.h"
 #include "llvm/Support/Debug.h"

@ -528,3 +529,31 @@ void mlir::linalg::comprehensive_bufferize::BufferizationState::
    op->erase();
  obsoleteOps.clear();
 }
+
+MemRefType mlir::linalg::comprehensive_bufferize::getContiguousMemRefType(
+    ShapedType shapedType, MemRefLayoutAttrInterface layout,
+    Attribute memorySpace) {
+  return MemRefType::get(shapedType.getShape(), shapedType.getElementType(),
+                         layout, memorySpace);
+}
+
+Type mlir::linalg::comprehensive_bufferize::getContiguousOrUnrankedMemRefType(
+    Type type, MemRefLayoutAttrInterface layout, Attribute memorySpace) {
+  if (type.isa<RankedTensorType, MemRefType>())
+    return getContiguousMemRefType(type.cast<ShapedType>(), layout,
+                                   memorySpace);
+  assert(!layout && "expected empty layout with UnrankedMemRefType");
+  return UnrankedMemRefType::get(getElementTypeOrSelf(type), memorySpace);
+}
+
+MemRefType mlir::linalg::comprehensive_bufferize::getDynamicMemRefType(
+    RankedTensorType tensorType, unsigned addressSpace) {
+  // TODO: address space decisions to connect with the actual alloc.
+  int64_t dynamicOffset = ShapedType::kDynamicStrideOrOffset;
+  SmallVector<int64_t> dynamicStrides(tensorType.getRank(),
+                                      ShapedType::kDynamicStrideOrOffset);
+  AffineMap stridedLayout = makeStridedLinearLayoutMap(
+      dynamicStrides, dynamicOffset, tensorType.getContext());
+  return MemRefType::get(tensorType.getShape(), tensorType.getElementType(),
+                         stridedLayout, addressSpace);
+}
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
@ -2,6 +2,7 @@ set(LLVM_OPTIONAL_SOURCES
  BufferizableOpInterface.cpp
  ComprehensiveBufferize.cpp
  LinalgInterfaceImpl.cpp
+  TensorInterfaceImpl.cpp
 )

 add_mlir_dialect_library(MLIRBufferizableOpInterface
@ -25,6 +26,16 @@ add_mlir_dialect_library(MLIRLinalgBufferizableOpInterfaceImpl
  MLIRTensor
 )

+add_mlir_dialect_library(MLIRTensorBufferizableOpInterfaceImpl
+  TensorInterfaceImpl.cpp
+
+  LINK_LIBS PUBLIC
+  MLIRBufferizableOpInterface
+  MLIRIR
+  MLIRMemRef
+  MLIRTensor
+)
+
 add_mlir_dialect_library(MLIRComprehensiveBufferize
  ComprehensiveBufferize.cpp

@ -37,6 +48,5 @@ add_mlir_dialect_library(MLIRComprehensiveBufferize
  MLIRSCF
  MLIRStandard
  MLIRStandardOpsTransforms
-  MLIRTensor
  MLIRVector
 )
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
@ -587,45 +587,6 @@ getEquivalentEnclosingFuncBBArg(Value v,
 // Bufferization-specific MemRefType support.
 //===----------------------------------------------------------------------===//

-/// Return a contiguous MemRefType (i.e. with canonical/empty layout map)
-/// with the same shape as `shapedType` and specified `layout` and
-/// `addressSpace`.
-static MemRefType getContiguousMemRefType(ShapedType shapedType,
-                                          MemRefLayoutAttrInterface layout = {},
-                                          Attribute memorySpace = {}) {
-  return MemRefType::get(shapedType.getShape(), shapedType.getElementType(),
-                         layout, memorySpace);
-}
-
-/// Return a contiguous MemRefType (i.e. with canonical/empty layout map)
-/// with the same shape as `shapedType` and specified `layout` and
-/// `addressSpace` or an UnrankedMemRefType otherwise.
-static Type
-getContiguousOrUnrankedMemRefType(Type type,
-                                  MemRefLayoutAttrInterface layout = {},
-                                  Attribute memorySpace = {}) {
-  if (type.isa<RankedTensorType, MemRefType>())
-    return getContiguousMemRefType(type.cast<ShapedType>(), layout,
-                                   memorySpace);
-  assert(!layout && "expected empty layout with UnrankedMemRefType");
-  return UnrankedMemRefType::get(getElementTypeOrSelf(type), memorySpace);
-}
-
-/// Return a MemRefType to which the `tensorType` can be bufferized in a
-/// composable fashion. The layout must be the most dynamic possible and
-/// canonicalize away once bufferization is finished.
-static MemRefType getDynamicMemRefType(RankedTensorType tensorType,
-                                       unsigned addressSpace = 0) {
-  // TODO: address space decisions to connect with the actual alloc.
-  int64_t dynamicOffset = ShapedType::kDynamicStrideOrOffset;
-  SmallVector<int64_t> dynamicStrides(tensorType.getRank(),
-                                      ShapedType::kDynamicStrideOrOffset);
-  AffineMap stridedLayout = makeStridedLinearLayoutMap(
-      dynamicStrides, dynamicOffset, tensorType.getContext());
-  return MemRefType::get(tensorType.getShape(), tensorType.getElementType(),
-                         stridedLayout, addressSpace);
-}
-
 /// Return the FunctionType with `argumentTypes` and `resultTypes` where each
 /// tensor is replaced by the corresponding buffer type.
 /// In order for all the callers to agree, this *must* bufferize to the most
@ -1965,420 +1926,6 @@ struct ReturnOpInterface

 } // namespace std_ext

-namespace tensor_ext {
-
-struct CastOpInterface
-    : public BufferizableOpInterface::ExternalModel<CastOpInterface,
-                                                    tensor::CastOp> {
-  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
-    return false;
-  }
-
-  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
-    return false;
-  }
-
-  SmallVector<OpOperand *> getAliasingOpOperand(Operation *op,
-                                                OpResult opResult) const {
-    return {&op->getOpOperand(0)};
-  }
-
-  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
-    return op->getResult(0);
-  }
-
-  BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const {
-    return BufferRelation::Equivalent;
-  }
-
-  LogicalResult bufferize(Operation *op, OpBuilder &b,
-                          BufferizationState &state) const {
-    auto castOp = cast<tensor::CastOp>(op);
-
-    // Take a guard before anything else.
-    OpBuilder::InsertionGuard g(b);
-    b.setInsertionPoint(castOp);
-
-    Value resultBuffer = getResultBuffer(b, castOp->getResult(0), state);
-    if (!resultBuffer)
-      return failure();
-    Type sourceType = resultBuffer.getType();
-    auto rankedMemRefType = sourceType.dyn_cast<MemRefType>();
-    auto unrankedMemRefType = sourceType.dyn_cast<UnrankedMemRefType>();
-    assert(rankedMemRefType || unrankedMemRefType);
-    Attribute memorySpace = rankedMemRefType
-                                ? rankedMemRefType.getMemorySpace()
-                                : unrankedMemRefType.getMemorySpace();
-    TensorType tensorType = castOp.getResult().getType().cast<TensorType>();
-    MemRefLayoutAttrInterface layout =
-        rankedMemRefType && tensorType.isa<RankedTensorType>()
-            ? rankedMemRefType.getLayout()
-            : MemRefLayoutAttrInterface();
-    Type memRefType = getContiguousOrUnrankedMemRefType(
-        castOp.getResult().getType(), layout, memorySpace);
-    Value res =
-        b.create<memref::CastOp>(castOp.getLoc(), memRefType, resultBuffer);
-    state.aliasInfo.insertNewBufferEquivalence(res, castOp.getResult());
-    state.mapBuffer(castOp.getResult(), res);
-    return success();
-  }
-};
-
-struct DimOpInterface
-    : public BufferizableOpInterface::ExternalModel<DimOpInterface,
-                                                    tensor::DimOp> {
-  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
-    return true;
-  }
-
-  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
-    return false;
-  }
-
-  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
-    return OpResult();
-  }
-
-  LogicalResult bufferize(Operation *op, OpBuilder &b,
-                          BufferizationState &state) const {
-    auto dimOp = cast<tensor::DimOp>(op);
-
-    // Take a guard before anything else.
-    OpBuilder::InsertionGuard g(b);
-    b.setInsertionPoint(dimOp);
-
-    if (dimOp.source().getType().isa<RankedTensorType>()) {
-      Value v = state.lookupBuffer(dimOp.source());
-      dimOp.result().replaceAllUsesWith(
-          b.create<memref::DimOp>(dimOp.getLoc(), v, dimOp.index()));
-    }
-    return success();
-  }
-};
-
-struct ExtractSliceOpInterface
-    : public BufferizableOpInterface::ExternalModel<ExtractSliceOpInterface,
-                                                    tensor::ExtractSliceOp> {
-  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
-    return false;
-  }
-
-  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
-    return false;
-  }
-
-  SmallVector<OpOperand *> getAliasingOpOperand(Operation *op,
-                                                OpResult opResult) const {
-    return {&op->getOpOperand(0) /*source*/};
-  }
-
-  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
-    return &opOperand == &op->getOpOperand(0) /*source*/
-               ? op->getResult(0)
-               : OpResult();
-  }
-
-  BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const {
-    return BufferRelation::None;
-  }
-
-  LogicalResult bufferize(Operation *op, OpBuilder &b,
-                          BufferizationState &state) const {
-    auto extractSliceOp = cast<tensor::ExtractSliceOp>(op);
-    LDBG("bufferize: " << *extractSliceOp << '\n');
-
-    // Take a guard before anything else.
-    OpBuilder::InsertionGuard g(b);
-    b.setInsertionPoint(extractSliceOp);
-
-    Location loc = extractSliceOp.getLoc();
-    Value srcMemref = state.lookupBuffer(extractSliceOp.source());
-    auto srcMemrefType = srcMemref.getType().cast<MemRefType>();
-    auto dstTensorType =
-        extractSliceOp.result().getType().cast<RankedTensorType>();
-
-    // If not inplaceable, alloc.
-    bool inplace = state.aliasInfo.isInPlace(extractSliceOp->getResult(0));
-    Value alloc;
-    if (!inplace)
-      alloc = createNewAllocDeallocPairForShapedValue(
-          b, loc, extractSliceOp.result(), state);
-
-    // Bufferize to subview.
-    auto subviewMemRefType =
-        memref::SubViewOp::inferRankReducedResultType(
-            dstTensorType.getRank(), srcMemrefType,
-            extractSliceOp.getMixedOffsets(), extractSliceOp.getMixedSizes(),
-            extractSliceOp.getMixedStrides())
-            .cast<MemRefType>();
-    Value subView = b.create<memref::SubViewOp>(
-        loc, subviewMemRefType, srcMemref, extractSliceOp.getMixedOffsets(),
-        extractSliceOp.getMixedSizes(), extractSliceOp.getMixedStrides());
-    // Insert new alias.
-    state.aliasInfo.insertNewBufferAlias(subView, srcMemref);
-
-    /// If not inplaceable, copy.
-    if (!inplace) {
-      // Do not copy if the copied data is never read.
-      if (isValueRead(extractSliceOp.result()))
-        state.allocationFns.memCpyFn(b, extractSliceOp.getLoc(), subView,
-                                     alloc);
-      subView = alloc;
-    }
-
-    state.mapBuffer(extractSliceOp.result(), subView);
-    return success();
-  }
-};
-
-struct ExtractOpInterface
-    : public BufferizableOpInterface::ExternalModel<ExtractOpInterface,
-                                                    tensor::ExtractOp> {
-  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
-    return true;
-  }
-
-  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
-    return false;
-  }
-
-  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
-    return OpResult();
-  }
-
-  LogicalResult bufferize(Operation *op, OpBuilder &b,
-                          BufferizationState &state) const {
-    auto extractOp = cast<tensor::ExtractOp>(op);
-
-    // Take a guard before anything else.
-    OpBuilder::InsertionGuard g(b);
-    b.setInsertionPoint(extractOp);
-
-    Location loc = extractOp.getLoc();
-    Value srcMemref = state.lookupBuffer(extractOp.tensor());
-    Value l = b.create<memref::LoadOp>(loc, srcMemref, extractOp.indices());
-    extractOp.replaceAllUsesWith(l);
-    return success();
-  }
-};
-
-/// Return true if the (ExtractSliceOp, InsertSliceOp) pair match (i.e.
-/// equivalent operand / result and same offset/sizes/strides specification).
-///
-/// This is one particular type of relationship between ops on tensors that
-/// reduce to an equivalence on buffers. This should be generalized and
-/// exposed as interfaces on the proper types.
-static bool
-areEquivalentExtractSliceOps(const BufferizationAliasInfo &aliasInfo,
-                             ExtractSliceOp st, InsertSliceOp sti) {
-  if (!st || !sti)
-    return false;
-  if (!aliasInfo.areEquivalentBufferizedValues(st.source(), sti.dest()))
-    return false;
-  if (!sameOffsetsSizesAndStrides(st, sti, isEqualConstantIntOrValue))
-    return false;
-  return true;
-}
-
-/// Return true if the source of a `insertSliceOp` bufferizes to an
-/// equivalent ExtractSliceOp that bufferizes inplace.
-static bool isSourceEquivalentToAMatchingInplaceExtractSliceOp(
-    const BufferizationAliasInfo &aliasInfo, InsertSliceOp insertSliceOp) {
-  LDBG("isSourceEquivalentToAMatchingInplaceExtractSliceOp: " << *insertSliceOp
-                                                              << '\n');
-  bool foundOp = false;
-  aliasInfo.applyOnEquivalenceClass(insertSliceOp.source(), [&](Value value) {
-    auto extractSliceOp = value.getDefiningOp<ExtractSliceOp>();
-    if (extractSliceOp &&
-        areEquivalentExtractSliceOps(aliasInfo, extractSliceOp,
-                                     insertSliceOp) &&
-        aliasInfo.isInPlace(extractSliceOp->getResult(0))) {
-      LDBG("\tfound: " << extractSliceOp.getOperation() << '\n');
-      foundOp = true;
-    }
-  });
-
-  if (!foundOp)
-    LDBG("\tnot equivalent\n");
-
-  return foundOp;
-}
-
-/// Return true if `value` is originating from an ExtractSliceOp that matches
-/// the given InsertSliceOp.
-static bool hasMatchingExtractSliceOp(const BufferizationAliasInfo &aliasInfo,
-                                      Value value, InsertSliceOp insertOp) {
-  auto condition = [&](Value val) {
-    if (auto extractOp = val.getDefiningOp<ExtractSliceOp>())
-      if (areEquivalentExtractSliceOps(aliasInfo, extractOp, insertOp))
-        return true;
-    return false;
-  };
-
-  return llvm::all_of(findValueInReverseUseDefChain(value, condition),
-                      condition);
-}
-
-struct InsertSliceOpInterface
-    : public BufferizableOpInterface::ExternalModel<InsertSliceOpInterface,
-                                                    tensor::InsertSliceOp> {
-  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
-    return true;
-  }
-
-  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
-    return &opOperand == &op->getOpOperand(1) /*dest*/;
-  }
-
-  SmallVector<OpOperand *> getAliasingOpOperand(Operation *op,
-                                                OpResult opResult) const {
-    return {&op->getOpOperand(1) /*dest*/};
-  }
-
-  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
-    return &opOperand == &op->getOpOperand(1) /*dest*/
-               ? op->getResult(0)
-               : OpResult();
-  }
-
-  BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const {
-    return BufferRelation::Equivalent;
-  }
-
-  bool isNotConflicting(Operation *op, OpOperand *uRead,
-                        OpOperand *uConflictingWrite,
-                        const BufferizationAliasInfo &aliasInfo) const {
-    Operation *readingOp = uRead->getOwner();
-    Operation *conflictingWritingOp = uConflictingWrite->getOwner();
-
-    // Special rules for matching ExtractSliceOp/InsertSliceOp pairs. If
-    // uRead is an InsertSliceOp...
-    if (auto insertSliceOp = dyn_cast<InsertSliceOp>(readingOp)) {
-      // As an example, consider the following IR.
-      //
-      // %0 = tensor.extract_slice %t[%a, %b][%c, %d][1, 1] {inplace = [true] }
-      // %1 = linalg.fill %cst, %0 {inplace= [true] }
-      // %2 = tensor.insert_slice %1 into %t[%a, %b][%c, %d][1, 1]
-      //     {inplace= [true] }
-
-      // TODO: Use insertSliceOp.getDestOpOperand etc. when available.
-      if (uRead == &insertSliceOp->getOpOperand(1) /*dest*/ &&
-          hasMatchingExtractSliceOp(aliasInfo, uConflictingWrite->get(),
-                                    insertSliceOp))
-        // Case 1: The main insight is that InsertSliceOp reads only part of
-        // the destination tensor. The overwritten area is not read. If
-        // uConflictingWrite writes into exactly the memory location that is
-        // being read by uRead, this is not a conflict.
-        //
-        // In the above example:
-        // uRead             = OpOperand 1 (%t) of tensor.insert_slice
-        // uConflictingWrite = OpOperand 1 (%0) of linalg.fill
-        //
-        // The read of %t does not conflict with the write of the FillOp
-        // (same aliases!) because the area that the FillOp operates on is
-        // exactly the one that is *not* read via %t.
-        return true;
-
-      if (uRead == &insertSliceOp->getOpOperand(0) /*source*/ &&
-          uConflictingWrite == &insertSliceOp->getOpOperand(1) /*dest*/ &&
-          hasMatchingExtractSliceOp(aliasInfo, uRead->get(), insertSliceOp))
-        // Case 2: The read of the source tensor and the write to the dest
-        // tensor via an InsertSliceOp is not a conflict if the read is
-        // reading exactly that part of an equivalent tensor that the
-        // InsertSliceOp is writing.
-        //
-        // In the above example:
-        // uRead             = OpOperand 0 (%1) of tensor.insert_slice
-        // uConflictingWrite = OpOperand 1 (%t) of tensor.insert_slice
-        return true;
-    }
-
-    // If uConflictingWrite is an InsertSliceOp...
-    if (auto insertSliceOp = dyn_cast<InsertSliceOp>(conflictingWritingOp))
-      // As an example, consider the following IR.
-      //
-      // %0 = tensor.extract_slice %t[%a, %b][%c, %d][1, 1] {inplace = [true] }
-      // %1 = linalg.fill %cst, %0 {inplace= [true] }
-      // %2 = tensor.insert_slice %1 into %t[%a, %b][%c, %d][1, 1]
-      //     {inplace= [true] }
-      // %3 = vector.transfer_read %1, %cst
-      //
-      // In the above example:
-      // uRead             = OpOperand 0 (%1) of vector.transfer_read
-      // uConflictingWrite = OpOperand 1 (%t) of tensor.insert_slice
-      // lastWrite         = %1
-      //
-      // This is not a conflict because the InsertSliceOp overwrites the
-      // memory segment of %1 with the exact same data. (Effectively, there
-      // is no memory write here.)
-      if (uConflictingWrite == &insertSliceOp->getOpOperand(1) /*dest*/ &&
-          aliasInfo.areEquivalentBufferizedValues(uRead->get(),
-                                                  insertSliceOp.source()) &&
-          hasMatchingExtractSliceOp(aliasInfo, insertSliceOp.source(),
-                                    insertSliceOp))
-        return true;
-
-    return false;
-  }
-
-  LogicalResult bufferize(Operation *op, OpBuilder &b,
-                          BufferizationState &state) const {
-    // insert_slice ops arise from tiling and bufferizing them out-of-place is
-    // generally a deal breaker. When used with loops, this ends up cloning the
-    // whole tensor on every single iteration and is a symptom of a
-    // catastrophically bad scheduling decision.
-    // TODO: be very loud about it or even consider failing the pass.
-    auto insertSliceOp = cast<tensor::InsertSliceOp>(op);
-    LDBG("bufferize: " << *insertSliceOp << '\n');
-
-    // Take a guard before anything else.
-    OpBuilder::InsertionGuard g(b);
-    b.setInsertionPoint(insertSliceOp);
-    Location loc = insertSliceOp.getLoc();
-
-    // When bufferizing out-of-place, `getResultBuffer` allocates.
-    Value dstMemref = getResultBuffer(b, insertSliceOp->getResult(0), state);
-    if (!dstMemref)
-      return failure();
-
-    // A copy of the source buffer is needed if either:
-    //   - The producer of `source` is not inplace. This is the case where a
-    //     slice is computed out of place into the inplace full tensor.
-    //   - The result is not inplace. This is the case where the whole tensor is
-    //     cloned and the clone needs to be updated.
-    // TODO: Is this necessary?
-    bool needCopy = !isSourceEquivalentToAMatchingInplaceExtractSliceOp(
-                        state.aliasInfo, insertSliceOp) ||
-                    !state.aliasInfo.isInPlace(insertSliceOp->getResult(0));
-    if (needCopy) {
-      LDBG("insert_slice needs extra source copy: " << insertSliceOp.source()
-                                                    << " -> copy\n");
-      // Take a subview of the dst.
-      auto dstMemrefType = dstMemref.getType().cast<MemRefType>();
-      auto subviewMemRefType =
-          memref::SubViewOp::inferRankReducedResultType(
-              insertSliceOp.getSourceType().getRank(), dstMemrefType,
-              insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(),
-              insertSliceOp.getMixedStrides())
-              .cast<MemRefType>();
-      Value subView = b.create<memref::SubViewOp>(
-          loc, subviewMemRefType, dstMemref, insertSliceOp.getMixedOffsets(),
-          insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides());
-      // Insert new alias.
-      state.aliasInfo.insertNewBufferAlias(subView, dstMemref);
-      // Copy tensor.
-      Value srcMemref = state.lookupBuffer(insertSliceOp.source());
-      state.allocationFns.memCpyFn(b, insertSliceOp.getLoc(), srcMemref,
-                                   subView);
-    }
-
-    state.mapBuffer(insertSliceOp.result(), dstMemref);
-    return success();
-  }
-};
-
-} // namespace tensor_ext
-
 namespace vector_ext {

 struct TransferReadOpInterface
@ -2484,13 +2031,6 @@ void registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry) {
  registry.addOpInterface<scf::YieldOp, scf_ext::YieldOpInterface>();
  registry.addOpInterface<CallOp, std_ext::CallOpInterface>();
  registry.addOpInterface<ReturnOp, std_ext::ReturnOpInterface>();
-  registry.addOpInterface<tensor::CastOp, tensor_ext::CastOpInterface>();
-  registry.addOpInterface<tensor::DimOp, tensor_ext::DimOpInterface>();
-  registry.addOpInterface<tensor::ExtractSliceOp,
-                          tensor_ext::ExtractSliceOpInterface>();
-  registry.addOpInterface<tensor::ExtractOp, tensor_ext::ExtractOpInterface>();
-  registry.addOpInterface<tensor::InsertSliceOp,
-                          tensor_ext::InsertSliceOpInterface>();
  registry.addOpInterface<vector::TransferReadOp,
                          vector_ext::TransferReadOpInterface>();
  registry.addOpInterface<vector::TransferWriteOp,
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp
@ -0,0 +1,437 @@
+//===- TensorInterfaceImpl.cpp - Tensor Impl. of BufferizableOpInterface --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h"
+#include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/Operation.h"
+
+namespace mlir {
+namespace linalg {
+namespace comprehensive_bufferize {
+namespace tensor_ext {
+
+using tensor::ExtractSliceOp;
+using tensor::InsertSliceOp;
+
+struct CastOpInterface
+    : public BufferizableOpInterface::ExternalModel<CastOpInterface,
+                                                    tensor::CastOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
+    return false;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
+    return false;
+  }
+
+  SmallVector<OpOperand *> getAliasingOpOperand(Operation *op,
+                                                OpResult opResult) const {
+    return {&op->getOpOperand(0)};
+  }
+
+  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
+    return op->getResult(0);
+  }
+
+  BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const {
+    return BufferRelation::Equivalent;
+  }
+
+  LogicalResult bufferize(Operation *op, OpBuilder &b,
+                          BufferizationState &state) const {
+    auto castOp = cast<tensor::CastOp>(op);
+
+    // Take a guard before anything else.
+    OpBuilder::InsertionGuard g(b);
+    b.setInsertionPoint(castOp);
+
+    Value resultBuffer = getResultBuffer(b, castOp->getResult(0), state);
+    if (!resultBuffer)
+      return failure();
+    Type sourceType = resultBuffer.getType();
+    auto rankedMemRefType = sourceType.dyn_cast<MemRefType>();
+    auto unrankedMemRefType = sourceType.dyn_cast<UnrankedMemRefType>();
+    assert(rankedMemRefType || unrankedMemRefType);
+    Attribute memorySpace = rankedMemRefType
+                                ? rankedMemRefType.getMemorySpace()
+                                : unrankedMemRefType.getMemorySpace();
+    TensorType tensorType = castOp.getResult().getType().cast<TensorType>();
+    MemRefLayoutAttrInterface layout =
+        rankedMemRefType && tensorType.isa<RankedTensorType>()
+            ? rankedMemRefType.getLayout()
+            : MemRefLayoutAttrInterface();
+    Type memRefType = getContiguousOrUnrankedMemRefType(
+        castOp.getResult().getType(), layout, memorySpace);
+    Value res =
+        b.create<memref::CastOp>(castOp.getLoc(), memRefType, resultBuffer);
+    state.aliasInfo.insertNewBufferEquivalence(res, castOp.getResult());
+    state.mapBuffer(castOp.getResult(), res);
+    return success();
+  }
+};
+
+struct DimOpInterface
+    : public BufferizableOpInterface::ExternalModel<DimOpInterface,
+                                                    tensor::DimOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
+    return true;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
+    return false;
+  }
+
+  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
+    return OpResult();
+  }
+
+  LogicalResult bufferize(Operation *op, OpBuilder &b,
+                          BufferizationState &state) const {
+    auto dimOp = cast<tensor::DimOp>(op);
+
+    // Take a guard before anything else.
+    OpBuilder::InsertionGuard g(b);
+    b.setInsertionPoint(dimOp);
+
+    if (dimOp.source().getType().isa<RankedTensorType>()) {
+      Value v = state.lookupBuffer(dimOp.source());
+      dimOp.result().replaceAllUsesWith(
+          b.create<memref::DimOp>(dimOp.getLoc(), v, dimOp.index()));
+    }
+    return success();
+  }
+};
+
+struct ExtractSliceOpInterface
+    : public BufferizableOpInterface::ExternalModel<ExtractSliceOpInterface,
+                                                    tensor::ExtractSliceOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
+    return false;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
+    return false;
+  }
+
+  SmallVector<OpOperand *> getAliasingOpOperand(Operation *op,
+                                                OpResult opResult) const {
+    return {&op->getOpOperand(0) /*source*/};
+  }
+
+  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
+    return &opOperand == &op->getOpOperand(0) /*source*/
+               ? op->getResult(0)
+               : OpResult();
+  }
+
+  BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const {
+    return BufferRelation::None;
+  }
+
+  LogicalResult bufferize(Operation *op, OpBuilder &b,
+                          BufferizationState &state) const {
+    auto extractSliceOp = cast<tensor::ExtractSliceOp>(op);
+
+    // Take a guard before anything else.
+    OpBuilder::InsertionGuard g(b);
+    b.setInsertionPoint(extractSliceOp);
+
+    Location loc = extractSliceOp.getLoc();
+    Value srcMemref = state.lookupBuffer(extractSliceOp.source());
+    auto srcMemrefType = srcMemref.getType().cast<MemRefType>();
+    auto dstTensorType =
+        extractSliceOp.result().getType().cast<RankedTensorType>();
+
+    // If not inplaceable, alloc.
+    bool inplace = state.aliasInfo.isInPlace(extractSliceOp->getResult(0));
+    Value alloc;
+    if (!inplace)
+      alloc = state.allocationFns.createAllocDeallocFn(
+          b, loc, extractSliceOp.result(), state);
+
+    // Bufferize to subview.
+    auto subviewMemRefType =
+        memref::SubViewOp::inferRankReducedResultType(
+            dstTensorType.getRank(), srcMemrefType,
+            extractSliceOp.getMixedOffsets(), extractSliceOp.getMixedSizes(),
+            extractSliceOp.getMixedStrides())
+            .cast<MemRefType>();
+    Value subView = b.create<memref::SubViewOp>(
+        loc, subviewMemRefType, srcMemref, extractSliceOp.getMixedOffsets(),
+        extractSliceOp.getMixedSizes(), extractSliceOp.getMixedStrides());
+    // Insert new alias.
+    state.aliasInfo.insertNewBufferAlias(subView, srcMemref);
+
+    /// If not inplaceable, copy.
+    if (!inplace) {
+      // Do not copy if the copied data is never read.
+      if (isValueRead(extractSliceOp.result()))
+        state.allocationFns.memCpyFn(b, extractSliceOp.getLoc(), subView,
+                                     alloc);
+      subView = alloc;
+    }
+
+    state.mapBuffer(extractSliceOp.result(), subView);
+    return success();
+  }
+};
+
+struct ExtractOpInterface
+    : public BufferizableOpInterface::ExternalModel<ExtractOpInterface,
+                                                    tensor::ExtractOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
+    return true;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
+    return false;
+  }
+
+  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
+    return OpResult();
+  }
+
+  LogicalResult bufferize(Operation *op, OpBuilder &b,
+                          BufferizationState &state) const {
+    auto extractOp = cast<tensor::ExtractOp>(op);
+
+    // Take a guard before anything else.
+    OpBuilder::InsertionGuard g(b);
+    b.setInsertionPoint(extractOp);
+
+    Location loc = extractOp.getLoc();
+    Value srcMemref = state.lookupBuffer(extractOp.tensor());
+    Value l = b.create<memref::LoadOp>(loc, srcMemref, extractOp.indices());
+    extractOp.replaceAllUsesWith(l);
+    return success();
+  }
+};
+
+/// Return true if the (ExtractSliceOp, InsertSliceOp) pair match (i.e.
+/// equivalent operand / result and same offset/sizes/strides specification).
+///
+/// This is one particular type of relationship between ops on tensors that
+/// reduce to an equivalence on buffers. This should be generalized and
+/// exposed as interfaces on the proper types.
+static bool
+areEquivalentExtractSliceOps(const BufferizationAliasInfo &aliasInfo,
+                             ExtractSliceOp st, InsertSliceOp sti) {
+  if (!st || !sti)
+    return false;
+  if (!aliasInfo.areEquivalentBufferizedValues(st.source(), sti.dest()))
+    return false;
+  if (!sameOffsetsSizesAndStrides(st, sti, isEqualConstantIntOrValue))
+    return false;
+  return true;
+}
+
+/// Return true if the source of a `insertSliceOp` bufferizes to an
+/// equivalent ExtractSliceOp that bufferizes inplace.
+static bool isSourceEquivalentToAMatchingInplaceExtractSliceOp(
+    const BufferizationAliasInfo &aliasInfo, InsertSliceOp insertSliceOp) {
+  bool foundOp = false;
+  aliasInfo.applyOnEquivalenceClass(insertSliceOp.source(), [&](Value value) {
+    auto extractSliceOp = value.getDefiningOp<ExtractSliceOp>();
+    if (extractSliceOp &&
+        areEquivalentExtractSliceOps(aliasInfo, extractSliceOp,
+                                     insertSliceOp) &&
+        aliasInfo.isInPlace(extractSliceOp->getResult(0))) {
+      foundOp = true;
+    }
+  });
+  return foundOp;
+}
+
+/// Return true if `value` is originating from an ExtractSliceOp that matches
+/// the given InsertSliceOp.
+static bool hasMatchingExtractSliceOp(const BufferizationAliasInfo &aliasInfo,
+                                      Value value, InsertSliceOp insertOp) {
+  auto condition = [&](Value val) {
+    if (auto extractOp = val.getDefiningOp<ExtractSliceOp>())
+      if (areEquivalentExtractSliceOps(aliasInfo, extractOp, insertOp))
+        return true;
+    return false;
+  };
+
+  return llvm::all_of(findValueInReverseUseDefChain(value, condition),
+                      condition);
+}
+
+struct InsertSliceOpInterface
+    : public BufferizableOpInterface::ExternalModel<InsertSliceOpInterface,
+                                                    tensor::InsertSliceOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const {
+    return true;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const {
+    return &opOperand == &op->getOpOperand(1) /*dest*/;
+  }
+
+  SmallVector<OpOperand *> getAliasingOpOperand(Operation *op,
+                                                OpResult opResult) const {
+    return {&op->getOpOperand(1) /*dest*/};
+  }
+
+  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const {
+    return &opOperand == &op->getOpOperand(1) /*dest*/
+               ? op->getResult(0)
+               : OpResult();
+  }
+
+  BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const {
+    return BufferRelation::Equivalent;
+  }
+
+  bool isNotConflicting(Operation *op, OpOperand *uRead,
+                        OpOperand *uConflictingWrite,
+                        const BufferizationAliasInfo &aliasInfo) const {
+    Operation *readingOp = uRead->getOwner();
+    Operation *conflictingWritingOp = uConflictingWrite->getOwner();
+
+    // Special rules for matching ExtractSliceOp/InsertSliceOp pairs. If
+    // uRead is an InsertSliceOp...
+    if (auto insertSliceOp = dyn_cast<InsertSliceOp>(readingOp)) {
+      // As an example, consider the following IR.
+      //
+      // %0 = tensor.extract_slice %t[%a, %b][%c, %d][1, 1] {inplace = [true] }
+      // %1 = linalg.fill %cst, %0 {inplace= [true] }
+      // %2 = tensor.insert_slice %1 into %t[%a, %b][%c, %d][1, 1]
+      //     {inplace= [true] }
+
+      // TODO: Use insertSliceOp.getDestOpOperand etc. when available.
+      if (uRead == &insertSliceOp->getOpOperand(1) /*dest*/ &&
+          hasMatchingExtractSliceOp(aliasInfo, uConflictingWrite->get(),
+                                    insertSliceOp))
+        // Case 1: The main insight is that InsertSliceOp reads only part of
+        // the destination tensor. The overwritten area is not read. If
+        // uConflictingWrite writes into exactly the memory location that is
+        // being read by uRead, this is not a conflict.
+        //
+        // In the above example:
+        // uRead             = OpOperand 1 (%t) of tensor.insert_slice
+        // uConflictingWrite = OpOperand 1 (%0) of linalg.fill
+        //
+        // The read of %t does not conflict with the write of the FillOp
+        // (same aliases!) because the area that the FillOp operates on is
+        // exactly the one that is *not* read via %t.
+        return true;
+
+      if (uRead == &insertSliceOp->getOpOperand(0) /*source*/ &&
+          uConflictingWrite == &insertSliceOp->getOpOperand(1) /*dest*/ &&
+          hasMatchingExtractSliceOp(aliasInfo, uRead->get(), insertSliceOp))
+        // Case 2: The read of the source tensor and the write to the dest
+        // tensor via an InsertSliceOp is not a conflict if the read is
+        // reading exactly that part of an equivalent tensor that the
+        // InsertSliceOp is writing.
+        //
+        // In the above example:
+        // uRead             = OpOperand 0 (%1) of tensor.insert_slice
+        // uConflictingWrite = OpOperand 1 (%t) of tensor.insert_slice
+        return true;
+    }
+
+    // If uConflictingWrite is an InsertSliceOp...
+    if (auto insertSliceOp = dyn_cast<InsertSliceOp>(conflictingWritingOp))
+      // As an example, consider the following IR.
+      //
+      // %0 = tensor.extract_slice %t[%a, %b][%c, %d][1, 1] {inplace = [true] }
+      // %1 = linalg.fill %cst, %0 {inplace= [true] }
+      // %2 = tensor.insert_slice %1 into %t[%a, %b][%c, %d][1, 1]
+      //     {inplace= [true] }
+      // %3 = vector.transfer_read %1, %cst
+      //
+      // In the above example:
+      // uRead             = OpOperand 0 (%1) of vector.transfer_read
+      // uConflictingWrite = OpOperand 1 (%t) of tensor.insert_slice
+      // lastWrite         = %1
+      //
+      // This is not a conflict because the InsertSliceOp overwrites the
+      // memory segment of %1 with the exact same data. (Effectively, there
+      // is no memory write here.)
+      if (uConflictingWrite == &insertSliceOp->getOpOperand(1) /*dest*/ &&
+          aliasInfo.areEquivalentBufferizedValues(uRead->get(),
+                                                  insertSliceOp.source()) &&
+          hasMatchingExtractSliceOp(aliasInfo, insertSliceOp.source(),
+                                    insertSliceOp))
+        return true;
+
+    return false;
+  }
+
+  LogicalResult bufferize(Operation *op, OpBuilder &b,
+                          BufferizationState &state) const {
+    // insert_slice ops arise from tiling and bufferizing them out-of-place is
+    // generally a deal breaker. When used with loops, this ends up cloning the
+    // whole tensor on every single iteration and is a symptom of a
+    // catastrophically bad scheduling decision.
+    // TODO: be very loud about it or even consider failing the pass.
+    auto insertSliceOp = cast<tensor::InsertSliceOp>(op);
+
+    // Take a guard before anything else.
+    OpBuilder::InsertionGuard g(b);
+    b.setInsertionPoint(insertSliceOp);
+    Location loc = insertSliceOp.getLoc();
+
+    // When bufferizing out-of-place, `getResultBuffer` allocates.
+    Value dstMemref = getResultBuffer(b, insertSliceOp->getResult(0), state);
+    if (!dstMemref)
+      return failure();
+
+    // A copy of the source buffer is needed if either:
+    //   - The producer of `source` is not inplace. This is the case where a
+    //     slice is computed out of place into the inplace full tensor.
+    //   - The result is not inplace. This is the case where the whole tensor is
+    //     cloned and the clone needs to be updated.
+    // TODO: Is this necessary?
+    bool needCopy = !isSourceEquivalentToAMatchingInplaceExtractSliceOp(
+                        state.aliasInfo, insertSliceOp) ||
+                    !state.aliasInfo.isInPlace(insertSliceOp->getResult(0));
+    if (needCopy) {
+      // Take a subview of the dst.
+      auto dstMemrefType = dstMemref.getType().cast<MemRefType>();
+      auto subviewMemRefType =
+          memref::SubViewOp::inferRankReducedResultType(
+              insertSliceOp.getSourceType().getRank(), dstMemrefType,
+              insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(),
+              insertSliceOp.getMixedStrides())
+              .cast<MemRefType>();
+      Value subView = b.create<memref::SubViewOp>(
+          loc, subviewMemRefType, dstMemref, insertSliceOp.getMixedOffsets(),
+          insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides());
+      // Insert new alias.
+      state.aliasInfo.insertNewBufferAlias(subView, dstMemref);
+      // Copy tensor.
+      Value srcMemref = state.lookupBuffer(insertSliceOp.source());
+      state.allocationFns.memCpyFn(b, insertSliceOp.getLoc(), srcMemref,
+                                   subView);
+    }
+
+    state.mapBuffer(insertSliceOp.result(), dstMemref);
+    return success();
+  }
+};
+
+} // namespace tensor_ext
+} // namespace comprehensive_bufferize
+} // namespace linalg
+} // namespace mlir
+
+void mlir::linalg::comprehensive_bufferize::tensor_ext::
+    registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry) {
+  registry.addOpInterface<tensor::CastOp, tensor_ext::CastOpInterface>();
+  registry.addOpInterface<tensor::DimOp, tensor_ext::DimOpInterface>();
+  registry.addOpInterface<tensor::ExtractSliceOp,
+                          tensor_ext::ExtractSliceOpInterface>();
+  registry.addOpInterface<tensor::ExtractOp, tensor_ext::ExtractOpInterface>();
+  registry.addOpInterface<tensor::InsertSliceOp,
+                          tensor_ext::InsertSliceOpInterface>();
+}
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@ -49,6 +49,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
  MLIRStandardOpsTransforms
  MLIRStandardToLLVM
  MLIRTensor
+  MLIRTensorBufferizableOpInterfaceImpl
  MLIRTransforms
  MLIRTransformUtils
  MLIRVector
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@ -10,6 +10,7 @@
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.h"
+#include "mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
@ -38,6 +39,7 @@ struct LinalgComprehensiveModuleBufferize
                arith::ArithmeticDialect, StandardOpsDialect, AffineDialect>();
    registerBufferizableOpInterfaceExternalModels(registry);
    linalg_ext::registerBufferizableOpInterfaceExternalModels(registry);
+    tensor_ext::registerBufferizableOpInterfaceExternalModels(registry);
  }
 };
 } // end namespace
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@ -6326,6 +6326,25 @@ cc_library(
    ],
 )

+cc_library(
+    name = "TensorBufferizableOpInterfaceImpl",
+    srcs = [
+        "lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp",
+    ],
+    hdrs = [
+        "include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h",
+    ],
+    includes = ["include"],
+    deps = [
+        ":BufferizableOpInterface",
+        ":IR",
+        ":MemRefDialect",
+        ":Support",
+        ":TensorDialect",
+        "//llvm:Support",
+    ],
+)
+
 td_library(
    name = "LinalgDocTdFiles",
    srcs = ["include/mlir/Dialect/Linalg/IR/LinalgDoc.td"],
@ -6545,6 +6564,7 @@ cc_library(
        ":StandardOps",
        ":StandardOpsTransforms",
        ":Support",
+        ":TensorBufferizableOpInterfaceImpl",
        ":TensorDialect",
        ":TransformUtils",
        ":VectorOps",
@ -6575,7 +6595,6 @@ cc_library(
        ":SCFDialect",
        ":StandardOps",
        ":Support",
-        ":TensorDialect",
        ":TransformUtils",
        ":VectorOps",
        "//llvm:Support",