[mlir] Move memref.[tensor_load|buffer_cast|clone] to "bufferization" dialect.

https://llvm.discourse.group/t/rfc-dialect-for-bufferization-related-ops/4712 Differential Revision: https://reviews.llvm.org/D114552
2021-11-25 11:42:16 +01:00 · 2021-11-25 11:42:16 +01:00 · 57470abc41
parent 43dc6d5d57
commit 57470abc41
106 changed files with 1580 additions and 1250 deletions
--- a/mlir/docs/BufferDeallocationInternals.md
+++ b/mlir/docs/BufferDeallocationInternals.md
@ -236,12 +236,12 @@ func @branch(%arg0: i1) {
  cond_br %arg0, ^bb1, ^bb2
 ^bb1:
  %1 = memref.alloc() : memref<2xf32>
-  %3 = memref.clone %1 : (memref<2xf32>) -> (memref<2xf32>)
+  %3 = bufferization.clone %1 : (memref<2xf32>) -> (memref<2xf32>)
  memref.dealloc %1 : memref<2xf32> // %1 can be safely freed here
  br ^bb3(%3 : memref<2xf32>)
 ^bb2:
  use(%0)
-  %4 = memref.clone %0 : (memref<2xf32>) -> (memref<2xf32>)
+  %4 = bufferization.clone %0 : (memref<2xf32>) -> (memref<2xf32>)
  br ^bb3(%4 : memref<2xf32>)
 ^bb3(%2: memref<2xf32>):
  …
@ -309,7 +309,7 @@ func @condBranchDynamicTypeNested(
  cond_br %arg0, ^bb1, ^bb2(%arg3 : index)
 ^bb1:
  // temp buffer required due to alias %3
-  %5 = memref.clone %arg1 : (memref<?xf32>) -> (memref<?xf32>)
+  %5 = bufferization.clone %arg1 : (memref<?xf32>) -> (memref<?xf32>)
  br ^bb6(%5 : memref<?xf32>)
 ^bb2(%0: index):
  %1 = memref.alloc(%0) : memref<?xf32>
@ -320,7 +320,7 @@ func @condBranchDynamicTypeNested(
 ^bb4:
  br ^bb5(%1 : memref<?xf32>)
 ^bb5(%2: memref<?xf32>):
-  %6 = memref.clone %1 : (memref<?xf32>) -> (memref<?xf32>)
+  %6 = bufferization.clone %1 : (memref<?xf32>) -> (memref<?xf32>)
  memref.dealloc %1 : memref<?xf32>
  br ^bb6(%6 : memref<?xf32>)
 ^bb6(%3: memref<?xf32>):
@ -477,15 +477,15 @@ func @inner_region_control_flow_div(
  %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
  %1 = custom.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>)
   then(%arg2 : memref<?x?xf32>) {
-    %4 = memref.clone %arg2 : (memref<?x?xf32>) -> (memref<?x?xf32>)
+    %4 = bufferization.clone %arg2 : (memref<?x?xf32>) -> (memref<?x?xf32>)
    custom.region_if_yield %4 : memref<?x?xf32>
   } else(%arg3 : memref<?x?xf32>) {
    %2 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
-    %5 = memref.clone %2 : (memref<?x?xf32>) -> (memref<?x?xf32>)
+    %5 = bufferization.clone %2 : (memref<?x?xf32>) -> (memref<?x?xf32>)
    memref.dealloc %2 : memref<?x?xf32>
    custom.region_if_yield %5 : memref<?x?xf32>
   } join(%arg4: memref<?x?xf32>) {
-    %4 = memref.clone %arg4 : (memref<?x?xf32>) -> (memref<?x?xf32>)
+    %4 = bufferization.clone %arg4 : (memref<?x?xf32>) -> (memref<?x?xf32>)
    memref.dealloc %arg4 : memref<?x?xf32>
    custom.region_if_yield %4 : memref<?x?xf32>
   }
@ -553,21 +553,21 @@ func @loop_nested_if(
  %step: index,
  %buf: memref<2xf32>,
  %res: memref<2xf32>) {
-  %4 = memref.clone %buf : (memref<2xf32>) -> (memref<2xf32>)
+  %4 = bufferization.clone %buf : (memref<2xf32>) -> (memref<2xf32>)
  %0 = scf.for %i = %lb to %ub step %step
    iter_args(%iterBuf = %4) -> memref<2xf32> {
    %1 = arith.cmpi "eq", %i, %ub : index
    %2 = scf.if %1 -> (memref<2xf32>) {
      %3 = memref.alloc() : memref<2xf32> // makes %2 a critical alias
      use(%3)
-      %5 = memref.clone %3 : (memref<2xf32>) -> (memref<2xf32>)
+      %5 = bufferization.clone %3 : (memref<2xf32>) -> (memref<2xf32>)
      memref.dealloc %3 : memref<2xf32>
      scf.yield %5 : memref<2xf32>
    } else {
-      %6 = memref.clone %iterBuf : (memref<2xf32>) -> (memref<2xf32>)
+      %6 = bufferization.clone %iterBuf : (memref<2xf32>) -> (memref<2xf32>)
      scf.yield %6 : memref<2xf32>
    }
-    %7 = memref.clone %2 : (memref<2xf32>) -> (memref<2xf32>)
+    %7 = bufferization.clone %2 : (memref<2xf32>) -> (memref<2xf32>)
    memref.dealloc %2 : memref<2xf32>
    memref.dealloc %iterBuf : memref<2xf32> // free backedge iteration variable
    scf.yield %7 : memref<2xf32>
@ -626,7 +626,7 @@ and can be removed.
 ```mlir
 func @dynamic_allocation(%arg0: index, %arg1: index) -> memref<?x?xf32> {
  %1 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
-  %2 = memref.clone %1 : (memref<?x?xf32>) -> (memref<?x?xf32>)
+  %2 = bufferization.clone %1 : (memref<?x?xf32>) -> (memref<?x?xf32>)
  memref.dealloc %1 : memref<?x?xf32>
  return %2 : memref<?x?xf32>
 }
@ -667,7 +667,7 @@ func @reuseTarget(%arg0: memref<2xf32>, %result: memref<2xf32>){
    %tmp2 = math.exp %gen2_arg0 : f32
    test.yield %tmp2 : f32
  }: memref<2xf32>, memref<2xf32>
-  %result = memref.clone %temp : (memref<2xf32>) -> (memref<2xf32>)
+  %result = bufferization.clone %temp : (memref<2xf32>) -> (memref<2xf32>)
  memref.dealloc %temp : memref<2xf32>
  return
 }
@ -693,8 +693,8 @@ func @reuseTarget(%arg0: memref<2xf32>, %result: memref<2xf32>){
 ## Known Limitations

 BufferDeallocation introduces additional clones from “memref” dialect
-(“memref.clone”). Analogous, all deallocations use the “memref” dialect-free
-operation “memref.dealloc”. The actual copy process is realized using
-“test.copy”. Furthermore, buffers are essentially immutable after their creation
-in a block. Another limitations are known in the case using unstructered control
-flow.
+(“bufferization.clone”). Analogous, all deallocations use the “memref”
+dialect-free operation “memref.dealloc”. The actual copy process is realized
+using “test.copy”. Furthermore, buffers are essentially immutable after their
+creation in a block. Another limitations are known in the case using
+unstructered control flow.
--- a/mlir/docs/Bufferization.md
+++ b/mlir/docs/Bufferization.md
@ -191,8 +191,8 @@ One convenient utility provided by the MLIR bufferization infrastructure is the
 `BufferizeTypeConverter`, which comes pre-loaded with the necessary conversions
 and materializations between `tensor` and `memref`.

-In this case, the `MemRefOpsDialect` is marked as legal, so the
-`memref.tensor_load` and `memref.buffer_cast` ops, which are inserted
+In this case, the `BufferizationOpsDialect` is marked as legal, so the
+`bufferization.to_tensor` and `bufferization.to_memref` ops, which are inserted
 automatically by the dialect conversion framework as materializations, are
 legal. There is a helper `populateBufferizeMaterializationLegality`
 ([code](https://github.com/llvm/llvm-project/blob/a0b65a7bcd6065688189b3d678c42ed6af9603db/mlir/include/mlir/Transforms/Bufferize.h#L53))
@ -252,9 +252,9 @@ from the program.

 The easiest way to write a finalizing bufferize pass is to not write one at all!
 MLIR provides a pass `finalizing-bufferize` which eliminates the
-`memref.tensor_load` / `memref.buffer_cast` materialization ops inserted by
-partial bufferization passes and emits an error if that is not sufficient to
-remove all tensors from the program.
+`bufferization.to_tensor` / `bufferization.to_memref` materialization ops
+inserted by partial bufferization passes and emits an error if that is not
+sufficient to remove all tensors from the program.

 This pass is sufficient when partial bufferization passes have bufferized all
 the ops in the program, leaving behind only the materializations. When possible,
@ -272,7 +272,7 @@ downstream projects structured this way. This structure is not recommended in
 new code. A helper, `populateEliminateBufferizeMaterializationsPatterns`
 ([code](https://github.com/llvm/llvm-project/blob/a0b65a7bcd6065688189b3d678c42ed6af9603db/mlir/include/mlir/Transforms/Bufferize.h#L58))
 is available for such passes to provide patterns that eliminate
-`memref.tensor_load` and `memref.buffer_cast`.
+`bufferization.to_tensor` and `bufferization.to_memref`.

 ## Changes since [the talk](#the-talk)

--- a/mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.td
@ -14,7 +14,8 @@ include "mlir/Pass/PassBase.td"
 def ArithmeticBufferize : FunctionPass<"arith-bufferize"> {
  let summary = "Bufferize Arithmetic dialect ops.";
  let constructor = "mlir::arith::createArithmeticBufferizePass()";
-  let dependentDialects = ["memref::MemRefDialect"];
+  let dependentDialects = ["bufferization::BufferizationDialect",
+                           "memref::MemRefDialect"];
 }

 def ArithmeticExpandOps : FunctionPass<"arith-expand"> {
--- a/mlir/include/mlir/Dialect/Bufferization/IR/AllocationOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/AllocationOpInterface.h
@ -13,7 +13,6 @@
 #ifndef MLIR_DIALECT_BUFFERIZATION_IR_ALLOCATIONOPINTERFACE_H_
 #define MLIR_DIALECT_BUFFERIZATION_IR_ALLOCATIONOPINTERFACE_H_

-#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/Builders.h"

 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h.inc"
--- a/mlir/include/mlir/Dialect/Bufferization/IR/AllocationOpInterface.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/AllocationOpInterface.td
@ -50,10 +50,7 @@ def AllocationOpInterface : OpInterface<"AllocationOpInterface"> {
      }],
      "::mlir::Optional<::mlir::Value>", "buildClone",
      (ins "::mlir::OpBuilder&":$builder, "::mlir::Value":$alloc), [{}],
-      /*defaultImplementation=*/[{
-          return builder.create<memref::CloneOp>(alloc.getLoc(), alloc)
-            .getResult();
-      }]
+      /*defaultImplementation=*/[{ return llvm::None; }]
    >
  ];
 }
--- a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
@ -0,0 +1,29 @@
+//===- Bufferization.h - Bufferization dialect ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATION_H_
+#define MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATION_H_
+
+#include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+
+//===----------------------------------------------------------------------===//
+// Bufferization Dialect
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Bufferization/IR/BufferizationOpsDialect.h.inc"
+
+//===----------------------------------------------------------------------===//
+// Bufferization Dialect Operations
+//===----------------------------------------------------------------------===//
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Bufferization/IR/BufferizationOps.h.inc"
+
+#endif // MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATION_H_
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td
@ -0,0 +1,31 @@
+//===- BufferizationBase.td - Bufferization dialect base ---*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BUFFERIZATION_BASE
+#define BUFFERIZATION_BASE
+
+include "mlir/IR/OpBase.td"
+
+def Bufferization_Dialect : Dialect {
+  let name = "bufferization";
+  let cppNamespace = "::mlir::bufferization";
+  let description = [{
+    Bufferization in MLIR is the process of converting the `tensor` type to the
+    `memref` type.
+    The `bufferization` dialect is intended to collect operations/interfaces
+    specific to the bufferization passes.
+
+    Overview of the bufferization infrastructure and important conceptual
+    details related to using the MLIR dialect conversion infrastructure can be
+    found in [bufferization](Bufferization.md) and [buffer
+    deallocation](BufferDeallocationInternals.md).
+  }];
+  let dependentDialects = ["memref::MemRefDialect", "tensor::TensorDialect"];
+}
+
+#endif // BUFFERIZATION_BASE
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
@ -0,0 +1,159 @@
+//===- BufferizationOps.td - Bufferization op definitions ----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BUFFERIZATION_OPS
+#define BUFFERIZATION_OPS
+
+include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.td"
+include "mlir/Dialect/Bufferization/IR/BufferizationBase.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/CopyOpInterface.td"
+
+class Bufferization_Op<string mnemonic, list<OpTrait> traits = []>
+    : Op<Bufferization_Dialect, mnemonic, traits>;
+
+//===----------------------------------------------------------------------===//
+// CloneOp
+//===----------------------------------------------------------------------===//
+
+def Bufferization_CloneOp : Bufferization_Op<"clone", [
+    CopyOpInterface,
+    DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
+    DeclareOpInterfaceMethods<AllocationOpInterface, ["buildDealloc", "buildClone"]>
+  ]> {
+  let builders = [
+    OpBuilder<(ins "Value":$value), [{
+      return build($_builder, $_state, value.getType(), value);
+    }]>];
+
+  let description = [{
+    Clones the data in the input view into an implicitly defined output view.
+
+    Usage:
+
+    ```mlir
+    %arg1 = bufferization.clone %arg0 : memref<?xf32> to memref<?xf32>
+    ```
+
+    Valid implementations of this operation may alias the input and output
+    views or create an actual copy. Mutating the source or result
+    of the clone operation after the clone operation thus leads to undefined
+    behavior.
+  }];
+
+  let arguments = (ins Arg<AnyRankedOrUnrankedMemRef, "", []>:$input);
+  let results = (outs Arg<AnyRankedOrUnrankedMemRef, "", []>:$output);
+
+  let extraClassDeclaration = [{
+    Value getSource() { return input(); }
+    Value getTarget() { return output(); }
+  }];
+
+  let assemblyFormat = "$input attr-dict `:` type($input) `to` type($output)";
+
+  let hasFolder = 1;
+  let verifier = ?;
+  let hasCanonicalizer = 1;
+}
+//===----------------------------------------------------------------------===//
+// ToTensorOp
+//===----------------------------------------------------------------------===//
+
+def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor",
+    [SameOperandsAndResultShape, SameOperandsAndResultElementType,
+     TypesMatchWith<"result type matches tensor equivalent of 'memref'",
+                    "memref", "result",
+                    "memref::getTensorTypeFromMemRefType($_self)">]> {
+  let summary = "memref to tensor operation";
+  let description = [{
+    Create a tensor from a memref, making an independent copy of the element
+    data. The result value is a tensor whose shape and element type match the
+    memref operand.
+
+    The opposite of this op is to_memref. Together, these two ops are
+    useful for source/target materializations when doing type conversions
+    involving tensors and memrefs.
+
+    Example:
+
+    ```mlir
+    // Produces a value of tensor<4x?xf32> type.
+    %12 = bufferization.to_tensor %10 : memref<4x?xf32, #layout, memspace0>
+    ```
+
+    If tensor load is used in the bufferization steps, mutating the source
+    buffer after loading leads to undefined behavior.
+  }];
+
+  let arguments = (ins Arg<AnyRankedOrUnrankedMemRef,
+                       "the reference to load from", [MemRead]>:$memref);
+  let results = (outs AnyTensor:$result);
+  // MemrefToTensor is fully verified by traits.
+  let verifier = ?;
+
+  let builders = [
+    OpBuilder<(ins "Value":$memref), [{
+      $_state.addOperands(memref);
+      $_state.addTypes(memref::getTensorTypeFromMemRefType(memref.getType()));
+    }]>];
+
+  let extraClassDeclaration = [{
+    /// The result of a to_tensor is always a tensor.
+    TensorType getType() {
+      Type resultType = getResult().getType();
+      if (resultType.isa<TensorType>())
+        return resultType.cast<TensorType>();
+      return {};
+    }
+  }];
+
+  let assemblyFormat = "$memref attr-dict `:` type($memref)";
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// ToMemrefOp
+//===----------------------------------------------------------------------===//
+
+def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref",
+    [SameOperandsAndResultShape, SameOperandsAndResultElementType, NoSideEffect,
+     TypesMatchWith<"type of 'tensor' is the tensor equivalent of 'memref'",
+                    "memref", "tensor",
+                    "memref::getTensorTypeFromMemRefType($_self)">]> {
+  let summary = "tensor to memref cast operation";
+  let description = [{
+    Casts a tensor to a memref.
+
+    ```mlir
+    // Result type is tensor<4x?xf32>
+    %12 = bufferization.to_memref %10 : memref<4x?xf32, #map0, 42>
+    ```
+
+    Note, that mutating the result of the to_buffer operation leads to
+    undefined behavior.
+
+    This operation is a specialized variant of the built-in
+    unrealized_conversion_cast and is intended for use in the context of
+    gradual bufferization.
+  }];
+
+  let arguments = (ins AnyTensor:$tensor);
+  let results = (outs AnyRankedOrUnrankedMemRef:$memref);
+  // This op is fully verified by traits.
+  let verifier = ?;
+
+  let assemblyFormat = "$tensor attr-dict `:` type($memref)";
+
+  let hasFolder = 1;
+  let hasCanonicalizer = 1;
+}
+
+#endif // BUFFERIZATION_OPS
--- a/mlir/include/mlir/Dialect/Bufferization/IR/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/CMakeLists.txt
@ -1 +1,3 @@
+add_mlir_dialect(BufferizationOps bufferization)
+add_mlir_doc(BufferizationOps BufferizationOps Dialects/ -gen-dialect-doc)
 add_mlir_interface(AllocationOpInterface)
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@ -147,8 +147,9 @@ def LinalgBufferize : Pass<"linalg-bufferize", "FuncOp"> {
  let summary = "Bufferize the linalg dialect";
  let constructor = "mlir::createLinalgBufferizePass()";
  let dependentDialects = [
-    "linalg::LinalgDialect",
    "AffineDialect",
+    "bufferization::BufferizationDialect",
+    "linalg::LinalgDialect",
    "memref::MemRefDialect",
  ];
 }
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
@ -10,7 +10,6 @@
 #define MLIR_DIALECT_MEMREF_IR_MEMREF_H_

 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
-#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/Interfaces/CallInterfaces.h"
@ -32,6 +31,19 @@ raw_ostream &operator<<(raw_ostream &os, const Range &range);
 /// with `b` at location `loc`.
 SmallVector<Range, 8> getOrCreateRanges(OffsetSizeAndStrideOpInterface op,
                                        OpBuilder &b, Location loc);
+
+namespace memref {
+
+/// This is a common utility used for patterns of the form
+/// "someop(memref.cast) -> someop". It folds the source of any memref.cast
+/// into the root operation directly.
+LogicalResult foldMemRefCast(Operation *op, Value inner = nullptr);
+
+/// Return an unranked/ranked tensor type for the given unranked/ranked memref
+/// type.
+Type getTensorTypeFromMemRefType(Type type);
+
+} // namespace memref
 } // namespace mlir

 //===----------------------------------------------------------------------===//
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefBase.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefBase.td
@ -19,7 +19,7 @@ def MemRef_Dialect : Dialect {
    manipulation ops, which are not strongly associated with any particular
    other dialect or domain abstraction.
  }];
-  let dependentDialects = ["arith::ArithmeticDialect", "tensor::TensorDialect"];
+  let dependentDialects = ["arith::ArithmeticDialect"];
  let hasConstantMaterializer = 1;
 }

--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@ -285,43 +285,6 @@ def MemRef_AllocaScopeReturnOp : MemRef_Op<"alloca_scope.return",
  let verifier = ?;
 }

-//===----------------------------------------------------------------------===//
-// BufferCastOp
-//===----------------------------------------------------------------------===//
-
-def MemRef_BufferCastOp : MemRef_Op<"buffer_cast",
-    [SameOperandsAndResultShape, SameOperandsAndResultElementType, NoSideEffect,
-     TypesMatchWith<"type of 'tensor' is the tensor equivalent of 'memref'",
-                    "memref", "tensor",
-                    "getTensorTypeFromMemRefType($_self)">]> {
-  let summary = "tensor to memref cast operation";
-  let description = [{
-    Casts a tensor to a memref.
-
-    ```mlir
-    // Result type is tensor<4x?xf32>
-    %12 = memref.buffer_cast %10 : memref<4x?xf32, #map0, 42>
-    ```
-
-    Note, that mutating the result of the buffer cast operation leads to
-    undefined behavior.
-
-    This operation is a specialized variant of the built-in
-    unrealized_conversion_cast and is intended for use in the context of
-    gradual bufferization.
-  }];
-
-  let arguments = (ins AnyTensor:$tensor);
-  let results = (outs AnyRankedOrUnrankedMemRef:$memref);
-  // This op is fully verified by traits.
-  let verifier = ?;
-
-  let assemblyFormat = "$tensor attr-dict `:` type($memref)";
-
-  let hasFolder = 1;
-  let hasCanonicalizer = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // CastOp
 //===----------------------------------------------------------------------===//
@ -408,49 +371,6 @@ def MemRef_CastOp : MemRef_Op<"cast", [
  let hasFolder = 1;
 }

-//===----------------------------------------------------------------------===//
-// CloneOp
-//===----------------------------------------------------------------------===//
-
-def CloneOp : MemRef_Op<"clone", [
-    CopyOpInterface,
-    DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
-  ]> {
-  let builders = [
-    OpBuilder<(ins "Value":$value), [{
-      return build($_builder, $_state, value.getType(), value);
-    }]>];
-
-  let description = [{
-    Clones the data in the input view into an implicitly defined output view.
-
-    Usage:
-
-    ```mlir
-    %arg1 = memref.clone %arg0 : memref<?xf32> to memref<?xf32>
-    ```
-
-    Valid implementations of this operation may alias the input and output
-    views or create an actual copy. Mutating the source or result
-    of the clone operation after the clone operation thus leads to undefined
-    behavior.
-  }];
-
-  let arguments = (ins Arg<AnyRankedOrUnrankedMemRef, "", []>:$input);
-  let results = (outs Arg<AnyRankedOrUnrankedMemRef, "", []>:$output);
-
-  let extraClassDeclaration = [{
-    Value getSource() { return input(); }
-    Value getTarget() { return output(); }
-  }];
-
-  let assemblyFormat = "$input attr-dict `:` type($input) `to` type($output)";
-
-  let hasFolder = 1;
-  let verifier = ?;
-  let hasCanonicalizer = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // CopyOp
 //===----------------------------------------------------------------------===//
@ -940,7 +860,6 @@ def LoadOp : MemRef_Op<"load",
    operand_range getIndices() { return {operand_begin() + 1, operand_end()}; }
  }];

-  let hasCanonicalizer = 1;
  let hasFolder = 1;

  let assemblyFormat = "$memref `[` $indices `]` attr-dict `:` type($memref)";
@ -1593,64 +1512,6 @@ def SubViewOp : BaseOpWithOffsetSizesAndStrides<
  let hasFolder = 1;
 }

-//===----------------------------------------------------------------------===//
-// TensorLoadOp
-//===----------------------------------------------------------------------===//
-
-def TensorLoadOp : MemRef_Op<"tensor_load",
-    [SameOperandsAndResultShape, SameOperandsAndResultElementType,
-     TypesMatchWith<"result type matches tensor equivalent of 'memref'",
-                    "memref", "result",
-                    "getTensorTypeFromMemRefType($_self)">]> {
-  let summary = "tensor load operation";
-  let description = [{
-    Create a tensor from a memref, making an independent copy of the element
-    data. The result value is a tensor whose shape and element type match the
-    memref operand.
-
-    The opposite of this op is buffer_cast. Together, these two ops are
-    useful for source/target materializations when doing type conversions
-    involving tensors and memrefs.
-
-    Example:
-
-    ```mlir
-    // Produces a value of tensor<4x?xf32> type.
-    %12 = memref.tensor_load %10 : memref<4x?xf32, #layout, memspace0>
-    ```
-
-    If tensor load is used in the bufferization steps, mutating the source
-    buffer after loading leads to undefined behavior.
-  }];
-
-  let arguments = (ins Arg<AnyRankedOrUnrankedMemRef,
-                       "the reference to load from", [MemRead]>:$memref);
-  let results = (outs AnyTensor:$result);
-  // TensorLoadOp is fully verified by traits.
-  let verifier = ?;
-
-  let builders = [
-    OpBuilder<(ins "Value":$memref), [{
-      $_state.addOperands(memref);
-      $_state.addTypes(getTensorTypeFromMemRefType(memref.getType()));
-    }]>];
-
-  let extraClassDeclaration = [{
-    /// The result of a tensor_load is always a tensor.
-    TensorType getType() {
-      Type resultType = getResult().getType();
-      if (resultType.isa<TensorType>())
-        return resultType.cast<TensorType>();
-      return {};
-    }
-  }];
-
-  let assemblyFormat = "$memref attr-dict `:` type($memref)";
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // TensorStoreOp
 //===----------------------------------------------------------------------===//
--- a/mlir/include/mlir/Dialect/SCF/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Passes.td
@ -14,7 +14,8 @@ include "mlir/Pass/PassBase.td"
 def SCFBufferize : FunctionPass<"scf-bufferize"> {
  let summary = "Bufferize the scf dialect.";
  let constructor = "mlir::createSCFBufferizePass()";
-  let dependentDialects = ["memref::MemRefDialect"];
+  let dependentDialects = ["bufferization::BufferizationDialect",
+                           "memref::MemRefDialect"];
 }

 // Note: Making these canonicalization patterns would require a dependency
--- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td
@ -25,6 +25,7 @@ def ShapeToShapeLowering : FunctionPass<"shape-to-shape-lowering"> {
 def ShapeBufferize : FunctionPass<"shape-bufferize"> {
  let summary = "Bufferize the shape dialect.";
  let constructor = "mlir::createShapeBufferizePass()";
-  let dependentDialects = ["memref::MemRefDialect"];
+  let dependentDialects = ["bufferization::BufferizationDialect",
+                           "memref::MemRefDialect"];
 }
 #endif // MLIR_DIALECT_SHAPE_TRANSFORMS_PASSES
--- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
@ -55,6 +55,7 @@ def Sparsification : Pass<"sparsification", "ModuleOp"> {
  let dependentDialects = [
    "AffineDialect",
    "arith::ArithmeticDialect",
+    "bufferization::BufferizationDialect",
    "LLVM::LLVMDialect",
    "memref::MemRefDialect",
    "scf::SCFDialect",
@ -105,6 +106,7 @@ def SparseTensorConversion : Pass<"sparse-tensor-conversion", "ModuleOp"> {
  let constructor = "mlir::createSparseTensorConversionPass()";
  let dependentDialects = [
    "arith::ArithmeticDialect",
+    "bufferization::BufferizationDialect",
    "LLVM::LLVMDialect",
    "linalg::LinalgDialect",
    "memref::MemRefDialect",
--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
@ -14,7 +14,8 @@ include "mlir/Pass/PassBase.td"
 def StdBufferize : FunctionPass<"std-bufferize"> {
  let summary = "Bufferize the std dialect";
  let constructor = "mlir::createStdBufferizePass()";
-  let dependentDialects = ["memref::MemRefDialect", "scf::SCFDialect"];
+  let dependentDialects = ["bufferization::BufferizationDialect",
+                           "memref::MemRefDialect", "scf::SCFDialect"];
 }

 def StdExpandOps : FunctionPass<"std-expand"> {
@ -47,7 +48,8 @@ def FuncBufferize : Pass<"func-bufferize", "ModuleOp"> {
    whether they need rewriting.
  }];
  let constructor = "mlir::createFuncBufferizePass()";
-  let dependentDialects = ["memref::MemRefDialect"];
+  let dependentDialects = ["bufferization::BufferizationDialect",
+                           "memref::MemRefDialect"];
 }

 def TensorConstantBufferize : Pass<"tensor-constant-bufferize", "ModuleOp"> {
@ -61,7 +63,8 @@ def TensorConstantBufferize : Pass<"tensor-constant-bufferize", "ModuleOp"> {
    function granularity.
  }];
  let constructor = "mlir::createTensorConstantBufferizePass()";
-  let dependentDialects = ["memref::MemRefDialect"];
+  let dependentDialects = ["bufferization::BufferizationDialect",
+                           "memref::MemRefDialect"];
  let options = [
    Option<"alignment", "alignment", "unsigned", /*default=*/"0",
           "Create global memrefs with a specified alignment">,
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td
@ -14,7 +14,11 @@ include "mlir/Pass/PassBase.td"
 def TensorBufferize : FunctionPass<"tensor-bufferize"> {
  let summary = "Bufferize the `tensor` dialect";
  let constructor = "mlir::createTensorBufferizePass()";
-  let dependentDialects = ["scf::SCFDialect", "memref::MemRefDialect"];
+  let dependentDialects = [
+    "bufferization::BufferizationDialect",
+    "memref::MemRefDialect",
+    "scf::SCFDialect"
+  ];
 }

 #endif // MLIR_DIALECT_TENSOR_TRANSFORMS_PASSES
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@ -20,6 +20,7 @@
 #include "mlir/Dialect/ArmNeon/ArmNeonDialect.h"
 #include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
 #include "mlir/Dialect/Async/IR/Async.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
 #include "mlir/Dialect/EmitC/IR/EmitC.h"
@ -57,6 +58,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
                  amx::AMXDialect,
                  arm_neon::ArmNeonDialect,
                  async::AsyncDialect,
+                  bufferization::BufferizationDialect,
                  complex::ComplexDialect,
                  DLTIDialect,
                  emitc::EmitCDialect,
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@ -11,6 +11,7 @@
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/IntegerSet.h"
--- a/mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp
@ -9,6 +9,7 @@
 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
 #include "mlir/Dialect/Arithmetic/Transforms/Passes.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"

 using namespace mlir;
--- a/mlir/lib/Dialect/Arithmetic/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Arithmetic/Transforms/PassDetail.h
@ -15,6 +15,10 @@ namespace mlir {

 class StandardOpsDialect;

+namespace bufferization {
+class BufferizationDialect;
+} // end namespace bufferization
+
 namespace memref {
 class MemRefDialect;
 } // end namespace memref
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+
+using namespace mlir;
+using namespace mlir::bufferization;
+
+#include "mlir/Dialect/Bufferization/IR/BufferizationOpsDialect.cpp.inc"
+
+//===----------------------------------------------------------------------===//
+// Bufferization Dialect
+//===----------------------------------------------------------------------===//
+
+void mlir::bufferization::BufferizationDialect::initialize() {
+  addOperations<
+#define GET_OP_LIST
+#include "mlir/Dialect/Bufferization/IR/BufferizationOps.cpp.inc"
+      >();
+}
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@ -0,0 +1,305 @@
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
+
+using namespace mlir;
+using namespace mlir::bufferization;
+
+//===----------------------------------------------------------------------===//
+// CloneOp
+//===----------------------------------------------------------------------===//
+
+void CloneOp::getEffects(
+    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+        &effects) {
+  effects.emplace_back(MemoryEffects::Read::get(), input(),
+                       SideEffects::DefaultResource::get());
+  effects.emplace_back(MemoryEffects::Write::get(), output(),
+                       SideEffects::DefaultResource::get());
+  effects.emplace_back(MemoryEffects::Allocate::get(), output(),
+                       SideEffects::DefaultResource::get());
+}
+
+OpFoldResult CloneOp::fold(ArrayRef<Attribute> operands) {
+  return succeeded(memref::foldMemRefCast(*this)) ? getResult() : Value();
+}
+
+namespace {
+
+/// Merge the clone and its source (by converting the clone to a cast) when
+/// possible.
+struct SimplifyClones : public OpRewritePattern<CloneOp> {
+  using OpRewritePattern<CloneOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(CloneOp cloneOp,
+                                PatternRewriter &rewriter) const override {
+    if (cloneOp.use_empty()) {
+      rewriter.eraseOp(cloneOp);
+      return success();
+    }
+
+    Value source = cloneOp.input();
+
+    // This only finds dealloc operations for the immediate value. It should
+    // also consider aliases. That would also make the safety check below
+    // redundant.
+    llvm::Optional<Operation *> maybeCloneDeallocOp =
+        findDealloc(cloneOp.output());
+    // Skip if either of them has > 1 deallocate operations.
+    if (!maybeCloneDeallocOp.hasValue())
+      return failure();
+    llvm::Optional<Operation *> maybeSourceDeallocOp = findDealloc(source);
+    if (!maybeSourceDeallocOp.hasValue())
+      return failure();
+    Operation *cloneDeallocOp = *maybeCloneDeallocOp;
+    Operation *sourceDeallocOp = *maybeSourceDeallocOp;
+
+    // If both are deallocated in the same block, their in-block lifetimes
+    // might not fully overlap, so we cannot decide which one to drop.
+    if (cloneDeallocOp && sourceDeallocOp &&
+        cloneDeallocOp->getBlock() == sourceDeallocOp->getBlock())
+      return failure();
+
+    Block *currentBlock = cloneOp->getBlock();
+    Operation *redundantDealloc = nullptr;
+    if (cloneDeallocOp && cloneDeallocOp->getBlock() == currentBlock) {
+      redundantDealloc = cloneDeallocOp;
+    } else if (sourceDeallocOp && sourceDeallocOp->getBlock() == currentBlock) {
+      redundantDealloc = sourceDeallocOp;
+    }
+
+    if (!redundantDealloc)
+      return failure();
+
+    // Safety check that there are no other deallocations inbetween
+    // cloneOp and redundantDealloc, as otherwise we might deallocate an alias
+    // of source before the uses of the clone. With alias information, we could
+    // restrict this to only fail of the dealloc's operand is an alias
+    // of the source.
+    for (Operation *pos = cloneOp->getNextNode(); pos != redundantDealloc;
+         pos = pos->getNextNode()) {
+      auto effectInterface = dyn_cast<MemoryEffectOpInterface>(pos);
+      if (!effectInterface)
+        continue;
+      if (effectInterface.hasEffect<MemoryEffects::Free>())
+        return failure();
+    }
+
+    rewriter.replaceOpWithNewOp<memref::CastOp>(cloneOp, cloneOp.getType(),
+                                                source);
+    rewriter.eraseOp(redundantDealloc);
+    return success();
+  }
+};
+
+} // namespace.
+
+void CloneOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+                                          MLIRContext *context) {
+  results.insert<SimplifyClones>(context);
+}
+
+//===----------------------------------------------------------------------===//
+// ToTensorOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult ToTensorOp::fold(ArrayRef<Attribute>) {
+  if (auto toMemref = memref().getDefiningOp<ToMemrefOp>())
+    // Approximate alias analysis by conservatively folding only when no there
+    // is no interleaved operation.
+    if (toMemref->getBlock() == this->getOperation()->getBlock() &&
+        toMemref->getNextNode() == this->getOperation())
+      return toMemref.tensor();
+  return {};
+}
+
+namespace {
+
+struct DimOfToTensorFolder : public OpRewritePattern<tensor::DimOp> {
+  using OpRewritePattern<tensor::DimOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(tensor::DimOp dimOp,
+                                PatternRewriter &rewriter) const override {
+    auto memrefToTensorOp = dimOp.source().getDefiningOp<ToTensorOp>();
+    if (!memrefToTensorOp)
+      return failure();
+
+    rewriter.replaceOpWithNewOp<memref::DimOp>(dimOp, memrefToTensorOp.memref(),
+                                               dimOp.index());
+    return success();
+  }
+};
+
+} // namespace
+
+void ToTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                             MLIRContext *context) {
+  results.add<DimOfToTensorFolder>(context);
+}
+
+//===----------------------------------------------------------------------===//
+// ToMemrefOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult ToMemrefOp::fold(ArrayRef<Attribute>) {
+  if (auto memrefToTensor = tensor().getDefiningOp<ToTensorOp>())
+    if (memrefToTensor.memref().getType() == getType())
+      return memrefToTensor.memref();
+  return {};
+}
+
+namespace {
+
+/// Replace tensor.cast + to_memref by to_memref + memref.cast.
+struct ToMemrefOfCast : public OpRewritePattern<ToMemrefOp> {
+  using OpRewritePattern<ToMemrefOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(ToMemrefOp toMemref,
+                                PatternRewriter &rewriter) const final {
+    auto tensorCastOperand =
+        toMemref.getOperand().getDefiningOp<tensor::CastOp>();
+    if (!tensorCastOperand)
+      return failure();
+    auto srcTensorType =
+        tensorCastOperand.getOperand().getType().dyn_cast<RankedTensorType>();
+    if (!srcTensorType)
+      return failure();
+    auto memrefType = MemRefType::get(srcTensorType.getShape(),
+                                      srcTensorType.getElementType());
+    Value memref = rewriter.create<ToMemrefOp>(toMemref.getLoc(), memrefType,
+                                               tensorCastOperand.getOperand());
+    rewriter.replaceOpWithNewOp<memref::CastOp>(toMemref, toMemref.getType(),
+                                                memref);
+    return success();
+  }
+};
+
+/// Canonicalize bufferization.to_tensor + bufferization.to_memref to
+/// memref.cast when type mismatches prevent `ToMemrefOp::fold` to kick in.
+struct TensorLoadToMemref : public OpRewritePattern<ToMemrefOp> {
+  using OpRewritePattern<ToMemrefOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(ToMemrefOp toMemref,
+                                PatternRewriter &rewriter) const final {
+    auto memrefToTensor = toMemref.tensor().getDefiningOp<ToTensorOp>();
+    // Bail unless we have a memref_to_tensor + tensor_to_memref with different
+    // types. `ToMemrefOp::fold` handles the same type case.
+    if (!memrefToTensor ||
+        memrefToTensor.memref().getType() == toMemref.getType())
+      return failure();
+    // If types are definitely not cast-compatible, bail.
+    if (!memref::CastOp::areCastCompatible(memrefToTensor.memref().getType(),
+                                           toMemref.getType()))
+      return failure();
+
+    // We already know that the types are potentially cast-compatible. However
+    // in case the affine maps are different, we may need to use a copy if we go
+    // from dynamic to static offset or stride (the canonicalization cannot know
+    // at this point that it is really cast compatible).
+    auto isGuaranteedCastCompatible = [](MemRefType source, MemRefType target) {
+      int64_t sourceOffset, targetOffset;
+      SmallVector<int64_t, 4> sourceStrides, targetStrides;
+      if (failed(getStridesAndOffset(source, sourceStrides, sourceOffset)) ||
+          failed(getStridesAndOffset(target, targetStrides, targetOffset)))
+        return false;
+      auto dynamicToStatic = [](int64_t a, int64_t b) {
+        return a == MemRefType::getDynamicStrideOrOffset() &&
+               b != MemRefType::getDynamicStrideOrOffset();
+      };
+      if (dynamicToStatic(sourceOffset, targetOffset))
+        return false;
+      for (auto it : zip(sourceStrides, targetStrides))
+        if (dynamicToStatic(std::get<0>(it), std::get<1>(it)))
+          return false;
+      return true;
+    };
+
+    auto memrefToTensorType =
+        memrefToTensor.memref().getType().dyn_cast<MemRefType>();
+    auto toMemrefType = toMemref.getType().dyn_cast<MemRefType>();
+    if (memrefToTensorType && toMemrefType &&
+        !isGuaranteedCastCompatible(memrefToTensorType, toMemrefType)) {
+      MemRefType resultType = toMemrefType;
+      auto loc = toMemref.getLoc();
+      SmallVector<Value, 4> dynamicOperands;
+      for (int i = 0; i < resultType.getRank(); ++i) {
+        if (resultType.getShape()[i] != ShapedType::kDynamicSize)
+          continue;
+        auto index = rewriter.createOrFold<arith::ConstantIndexOp>(loc, i);
+        Value size = rewriter.create<tensor::DimOp>(loc, memrefToTensor, index);
+        dynamicOperands.push_back(size);
+      }
+      auto copy =
+          rewriter.create<memref::AllocOp>(loc, resultType, dynamicOperands);
+      rewriter.create<memref::CopyOp>(loc, memrefToTensor.memref(), copy);
+      rewriter.replaceOp(toMemref, {copy});
+    } else
+      rewriter.replaceOpWithNewOp<memref::CastOp>(toMemref, toMemref.getType(),
+                                                  memrefToTensor.memref());
+    return success();
+  }
+};
+
+/// Fold a load on a to_memref operation into an tensor.extract on the
+/// corresponding tensor.
+struct LoadOfToMemref : public OpRewritePattern<memref::LoadOp> {
+  using OpRewritePattern<memref::LoadOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(memref::LoadOp load,
+                                PatternRewriter &rewriter) const override {
+    auto toMemref = load.memref().getDefiningOp<ToMemrefOp>();
+    if (!toMemref)
+      return failure();
+
+    rewriter.replaceOpWithNewOp<tensor::ExtractOp>(load, toMemref.tensor(),
+                                                   load.indices());
+    return success();
+  }
+};
+
+/// Fold dim of a to_memref into the dim of the tensor.
+struct DimOfCastOp : public OpRewritePattern<memref::DimOp> {
+  using OpRewritePattern<memref::DimOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(memref::DimOp dimOp,
+                                PatternRewriter &rewriter) const override {
+    auto castOp = dimOp.source().getDefiningOp<ToMemrefOp>();
+    if (!castOp)
+      return failure();
+    Value newSource = castOp.getOperand();
+    rewriter.replaceOpWithNewOp<tensor::DimOp>(dimOp, newSource, dimOp.index());
+    return success();
+  }
+};
+
+} // namespace
+
+void ToMemrefOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                             MLIRContext *context) {
+  results.add<DimOfCastOp, LoadOfToMemref, ToMemrefOfCast, TensorLoadToMemref>(
+      context);
+}
+
+Optional<Operation *> CloneOp::buildDealloc(OpBuilder &builder, Value alloc) {
+  return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc)
+      .getOperation();
+}
+
+Optional<Value> CloneOp::buildClone(OpBuilder &builder, Value alloc) {
+  return builder.create<CloneOp>(alloc.getLoc(), alloc).getResult();
+}
+
+//===----------------------------------------------------------------------===//
+// TableGen'd op method definitions
+//===----------------------------------------------------------------------===//
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Bufferization/IR/BufferizationOps.cpp.inc"
--- a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
@ -1,12 +1,18 @@
-add_mlir_library(MLIRAllocationOpInterface
+add_mlir_dialect_library(MLIRBufferization
  AllocationOpInterface.cpp
+  BufferizationOps.cpp
+  BufferizationDialect.cpp

  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Bufferization/IR
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Bufferization

  DEPENDS
  MLIRAllocationOpInterfaceIncGen
+  MLIRBufferizationOpsIncGen

  LINK_LIBS PUBLIC
+  MLIRDialect
  MLIRIR
+  MLIRTensor
+  MLIRMemRef
  )
--- a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
+++ b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
@ -11,8 +11,8 @@
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BuiltinOps.h"

@ -49,7 +49,7 @@ Value Aliases::find(Value v) {
    // the aliasing further.
    if (isa<RegionBranchOpInterface>(defOp))
      return v;
-    if (isa<memref::BufferCastOp>(defOp))
+    if (isa<bufferization::ToMemrefOp>(defOp))
      return v;

    if (auto memEffect = dyn_cast<MemoryEffectOpInterface>(defOp)) {
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp
@ -8,6 +8,7 @@

 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h"

+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/AsmState.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@ -415,8 +416,8 @@ mlir::linalg::comprehensive_bufferize::bufferize(Operation *op,
                                                 BufferizationState &state) {
  OpBuilder b(op->getContext());

-  // Skip BufferCast and TensorLoad ops.
-  if (isa<memref::BufferCastOp, memref::TensorLoadOp>(op))
+  // Skip ToMemrefOp and ToTensorOp.
+  if (isa<bufferization::ToMemrefOp, bufferization::ToTensorOp>(op))
    return success();

  // Check if op has tensor results or operands.
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
@ -16,6 +16,7 @@ add_mlir_dialect_library(MLIRBufferizableOpInterface

  LINK_LIBS PUBLIC
  MLIRIR
+  MLIRBufferization
  MLIRMemRef
 )

--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp
@ -32,9 +32,9 @@
 //          #map = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
 //           func @foo(%A: tensor<?xf32> {linalg.inplaceable = true})
 //              -> tensor<?xf32> {
-//            %0 = memref.buffer_cast %A : memref<?xf32, #map>
+//            %0 = bufferization.to_memref %A : memref<?xf32, #map>
 //            // ... uses of %0
-//            %res = memref.tensor_load %0 : memref<?xf32, #map>
+//            %res = bufferization.to_tensor %0 : memref<?xf32, #map>
 //            return %res : tensor<?xf32>
 //          }
 //        ```
@ -57,13 +57,13 @@
 //          #map = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
 //          func @foo(%A: tensor<?xf32> {linalg.inplaceable = true})
 //              -> tensor<?xf32> {
-//            %0 = memref.buffer_cast %A : memref<?xf32, #map>
+//            %0 = bufferization.to_memref %A : memref<?xf32, #map>
 //            %1 = memref.dim %0, %c0 : memref<?xf32, #map>
 //            %2 = memref.alloc(%1) : memref<?xf32>
 //            %3 = memref.cast %2 : memref<?xf32> to memref<?xf32, #map>
 //            // ... uses of %3
 //            memref.dealloc %2 : memref<?xf32, #map>
-//            %res = memref.tensor_load %3 : memref<?xf32, #map>
+//            %res = bufferization.to_tensor %3 : memref<?xf32, #map>
 //            return %res : tensor<?xf32>
 //          }
 //       ```
@ -87,11 +87,11 @@
 //          #map = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
 //          func @foo(%arg0: tensor<?xf32> {linalg.inplaceable = true})
 //              -> tensor<4xf32> {
-//            %0 = memref.buffer_cast %arg0 : memref<?xf32, #map>
+//            %0 = bufferization.to_memref %arg0 : memref<?xf32, #map>
 //            %1 = memref.subview %0[0] [4] [1] : memref<?xf32, #map> to
 //                                                memref<4xf32, #map>
 //            // ... inplace computes into %1
-//            %3 = memref.tensor_load %1 : memref<4xf32, #map>
+//            %3 = bufferization.to_tensor %1 : memref<4xf32, #map>
 //            return %3 : tensor<4xf32>
 //          }
 //        ```
@ -110,6 +110,7 @@
 #include <random>

 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
@ -648,7 +649,7 @@ static LogicalResult bufferizeFuncOp(FuncOp funcOp, BufferizationState &state) {
                          ? getDynamicMemRefType(rankedTensorType)
                          : getContiguousOrUnrankedMemRefType(tensorType);
    Value bufferCast =
-        b.create<memref::BufferCastOp>(funcOp.getLoc(), memRefType, bbArg);
+        b.create<bufferization::ToMemrefOp>(funcOp.getLoc(), memRefType, bbArg);
    state.aliasInfo.insertNewBufferEquivalence(bufferCast, bbArg);
    state.mapBuffer(bbArg, bufferCast);
  }
@ -932,22 +933,23 @@ static LogicalResult bufferizeFuncOpBoundary(
    Value memref = frontBlock.addArgument(memrefType);
    OpBuilder b(funcOp->getContext());
    b.setInsertionPointToStart(&frontBlock);
-    // Replace all uses of bbArg through a BufferCastOp by a memref::CastOp.
+    // Replace all uses of bbArg through a ToMemrefOp by a memref::CastOp.
    for (auto &use : llvm::make_early_inc_range(bbArg.getUses())) {
-      if (auto bufferCastOp = dyn_cast<memref::BufferCastOp>(use.getOwner())) {
+      if (auto toMemrefOp =
+              dyn_cast<bufferization::ToMemrefOp>(use.getOwner())) {
        auto castOp = b.create<memref::CastOp>(
-            funcOp.getLoc(), bufferCastOp.memref().getType(), memref);
-        bufferCastOp.memref().replaceAllUsesWith(castOp);
+            funcOp.getLoc(), toMemrefOp.memref().getType(), memref);
+        toMemrefOp.memref().replaceAllUsesWith(castOp);
        aliasInfo.insertNewBufferEquivalence(castOp.dest(),
-                                             bufferCastOp.memref());
+                                             toMemrefOp.memref());
      }
    }
    // Replace all remaining uses by a tensor_load.
    if (!bbArg.use_empty()) {
-      auto tensorLoadOp =
-          b.create<memref::TensorLoadOp>(funcOp.getLoc(), memref);
-      aliasInfo.insertNewBufferEquivalence(tensorLoadOp, bbArg);
-      bbArg.replaceAllUsesWith(tensorLoadOp);
+      auto toTensorOp =
+          b.create<bufferization::ToTensorOp>(funcOp.getLoc(), memref);
+      aliasInfo.insertNewBufferEquivalence(toTensorOp, bbArg);
+      bbArg.replaceAllUsesWith(toTensorOp);
    }
    frontBlock.eraseArgument(0);
    // TODO: add support to erase aliasInfo entries if deemed necessary.
@ -1376,16 +1378,16 @@ struct CallOpInterface
          // info.
          state.aliasInfo.insertNewBufferEquivalence(oldRes, buffer);
          state.mapBuffer(oldRes, buffer);
-          // Add a TensorLoadOp to kill all uses of the CallOp return.
+          // Add a ToTensorOp to kill all uses of the CallOp return.
          // Replace all uses of the CallOp results so we can erase the CallOp.
-          // This TensorLoadOp must fold/DCE away or bufferization should be
+          // This ToTensorOp must fold/DCE away or bufferization should be
          // considered failed.
-          Value tensorLoad =
-              b.create<memref::TensorLoadOp>(callOp.getLoc(), buffer);
-          oldRes.replaceAllUsesWith(tensorLoad);
+          Value toTensor =
+              b.create<bufferization::ToTensorOp>(callOp.getLoc(), buffer);
+          oldRes.replaceAllUsesWith(toTensor);
          // Add new op equivalence info.
-          state.aliasInfo.insertNewBufferEquivalence(tensorLoad, buffer);
-          state.mapBuffer(tensorLoad, buffer);
+          state.aliasInfo.insertNewBufferEquivalence(toTensor, buffer);
+          state.mapBuffer(toTensor, buffer);
          continue;
        }

@ -1493,7 +1495,8 @@ struct ReturnOpInterface
      if (!tensorType)
        continue;
      Value v = state.lookupBuffer(operand.get());
-      Value returnTensor = b.create<memref::TensorLoadOp>(returnOp.getLoc(), v);
+      Value returnTensor =
+          b.create<bufferization::ToTensorOp>(returnOp.getLoc(), v);
      operand.set(returnTensor);
      state.aliasInfo.insertNewBufferEquivalence(returnTensor, v);
      state.mapBuffer(returnTensor, v);
--- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@ -11,6 +11,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/TypeUtilities.h"
--- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
@ -9,6 +9,7 @@
 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//

 #include "PassDetail.h"
+
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.h"
@ -37,8 +39,9 @@ struct LinalgComprehensiveModuleBufferize

  void getDependentDialects(DialectRegistry &registry) const override {
    registry
-        .insert<linalg::LinalgDialect, memref::MemRefDialect,
-                tensor::TensorDialect, vector::VectorDialect, scf::SCFDialect,
+        .insert<bufferization::BufferizationDialect, linalg::LinalgDialect,
+                memref::MemRefDialect, tensor::TensorDialect,
+                vector::VectorDialect, scf::SCFDialect,
                arith::ArithmeticDialect, StandardOpsDialect, AffineDialect>();
    registerBufferizableOpInterfaceExternalModels(registry);
    arith_ext::registerBufferizableOpInterfaceExternalModels(registry);
--- a/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h
@ -22,6 +22,10 @@ namespace arith {
 class ArithmeticDialect;
 } // end namespace arith

+namespace bufferization {
+class BufferizationDialect;
+} // end namespace bufferization
+
 namespace linalg {
 class LinalgDialect;
 } // end namespace linalg
--- a/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp
@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Transforms/InliningUtils.h"

 using namespace mlir;
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@ -11,7 +11,6 @@
 #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
-#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
@ -45,7 +44,7 @@ Operation *MemRefDialect::materializeConstant(OpBuilder &builder,
 /// This is a common class used for patterns of the form
 /// "someop(memrefcast) -> someop".  It folds the source of any memref.cast
 /// into the root operation directly.
-static LogicalResult foldMemRefCast(Operation *op, Value inner = nullptr) {
+LogicalResult mlir::memref::foldMemRefCast(Operation *op, Value inner) {
  bool folded = false;
  for (OpOperand &operand : op->getOpOperands()) {
    auto cast = operand.get().getDefiningOp<CastOp>();
@ -58,11 +57,9 @@ static LogicalResult foldMemRefCast(Operation *op, Value inner = nullptr) {
  return success(folded);
 }

-//===----------------------------------------------------------------------===//
-// Helpers for GlobalOp
-//===----------------------------------------------------------------------===//
-
-static Type getTensorTypeFromMemRefType(Type type) {
+/// Return an unranked/ranked tensor type for the given unranked/ranked memref
+/// type.
+Type mlir::memref::getTensorTypeFromMemRefType(Type type) {
  if (auto memref = type.dyn_cast<MemRefType>())
    return RankedTensorType::get(memref.getShape(), memref.getElementType());
  if (auto memref = type.dyn_cast<UnrankedMemRefType>())
@ -277,113 +274,6 @@ static LogicalResult verify(AssumeAlignmentOp op) {
  return success();
 }

-//===----------------------------------------------------------------------===//
-// BufferCastOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult BufferCastOp::fold(ArrayRef<Attribute>) {
-  if (auto tensorLoad = tensor().getDefiningOp<TensorLoadOp>())
-    if (tensorLoad.memref().getType() == getType())
-      return tensorLoad.memref();
-  return {};
-}
-
-namespace {
-/// Replace tensor_cast + buffer_cast by buffer_cast + memref_cast.
-struct BufferCast : public OpRewritePattern<BufferCastOp> {
-  using OpRewritePattern<BufferCastOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(BufferCastOp bufferCast,
-                                PatternRewriter &rewriter) const final {
-    auto tensorCastOperand =
-        bufferCast.getOperand().getDefiningOp<tensor::CastOp>();
-    if (!tensorCastOperand)
-      return failure();
-    auto srcTensorType =
-        tensorCastOperand.getOperand().getType().dyn_cast<RankedTensorType>();
-    if (!srcTensorType)
-      return failure();
-    auto memrefType = MemRefType::get(srcTensorType.getShape(),
-                                      srcTensorType.getElementType());
-    Value memref = rewriter.create<BufferCastOp>(
-        bufferCast.getLoc(), memrefType, tensorCastOperand.getOperand());
-    rewriter.replaceOpWithNewOp<CastOp>(bufferCast, bufferCast.getType(),
-                                        memref);
-    return success();
-  }
-};
-
-/// Canonicalize memref.tensor_load + memref.buffer_cast to memref.cast when
-/// type mismatches prevent `BufferCastOp::fold` to kick in.
-struct TensorLoadToMemRef : public OpRewritePattern<BufferCastOp> {
-  using OpRewritePattern<BufferCastOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(BufferCastOp bufferCast,
-                                PatternRewriter &rewriter) const final {
-    auto tensorLoad = bufferCast.tensor().getDefiningOp<TensorLoadOp>();
-    // Bail unless we have a tensor_load + memref.buffer_cast with different
-    // types. `BufferCastOp::fold` handles the same type case.
-    if (!tensorLoad || tensorLoad.memref().getType() == bufferCast.getType())
-      return failure();
-    // If types are definitely not cast-compatible, bail.
-    if (!CastOp::areCastCompatible(tensorLoad.memref().getType(),
-                                   bufferCast.getType()))
-      return failure();
-
-    // We already know that the types are potentially cast-compatible. However
-    // in case the affine maps are different, we may need to use a copy if we go
-    // from dynamic to static offset or stride (the canonicalization cannot know
-    // at this point that it is really cast compatible).
-    auto isGuaranteedCastCompatible = [](MemRefType source, MemRefType target) {
-      int64_t sourceOffset, targetOffset;
-      SmallVector<int64_t, 4> sourceStrides, targetStrides;
-      if (failed(getStridesAndOffset(source, sourceStrides, sourceOffset)) ||
-          failed(getStridesAndOffset(target, targetStrides, targetOffset)))
-        return false;
-      auto dynamicToStatic = [](int64_t a, int64_t b) {
-        return a == MemRefType::getDynamicStrideOrOffset() &&
-               b != MemRefType::getDynamicStrideOrOffset();
-      };
-      if (dynamicToStatic(sourceOffset, targetOffset))
-        return false;
-      for (auto it : zip(sourceStrides, targetStrides))
-        if (dynamicToStatic(std::get<0>(it), std::get<1>(it)))
-          return false;
-      return true;
-    };
-
-    auto tensorLoadType = tensorLoad.memref().getType().dyn_cast<MemRefType>();
-    auto bufferCastType = bufferCast.getType().dyn_cast<MemRefType>();
-    if (tensorLoadType && bufferCastType &&
-        !isGuaranteedCastCompatible(tensorLoadType, bufferCastType)) {
-      MemRefType resultType = bufferCastType;
-      auto loc = bufferCast.getLoc();
-      SmallVector<Value, 4> dynamicOperands;
-      for (int i = 0; i < resultType.getRank(); ++i) {
-        if (resultType.getShape()[i] != ShapedType::kDynamicSize)
-          continue;
-        auto index = rewriter.createOrFold<arith::ConstantIndexOp>(loc, i);
-        Value size = rewriter.create<tensor::DimOp>(loc, tensorLoad, index);
-        dynamicOperands.push_back(size);
-      }
-      auto copy =
-          rewriter.create<memref::AllocOp>(loc, resultType, dynamicOperands);
-      rewriter.create<CopyOp>(loc, tensorLoad.memref(), copy);
-      rewriter.replaceOp(bufferCast, {copy});
-    } else
-      rewriter.replaceOpWithNewOp<CastOp>(bufferCast, bufferCast.getType(),
-                                          tensorLoad.memref());
-    return success();
-  }
-};
-
-} // namespace
-
-void BufferCastOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                               MLIRContext *context) {
-  results.add<BufferCast, TensorLoadToMemRef>(context);
-}
-
 //===----------------------------------------------------------------------===//
 // CastOp
 //===----------------------------------------------------------------------===//
@ -551,99 +441,6 @@ OpFoldResult CastOp::fold(ArrayRef<Attribute> operands) {
  return succeeded(foldMemRefCast(*this)) ? getResult() : Value();
 }

-//===----------------------------------------------------------------------===//
-// CloneOp
-//===----------------------------------------------------------------------===//
-
-void CloneOp::getEffects(
-    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
-        &effects) {
-  effects.emplace_back(MemoryEffects::Read::get(), input(),
-                       SideEffects::DefaultResource::get());
-  effects.emplace_back(MemoryEffects::Write::get(), output(),
-                       SideEffects::DefaultResource::get());
-  effects.emplace_back(MemoryEffects::Allocate::get(), output(),
-                       SideEffects::DefaultResource::get());
-}
-
-namespace {
-/// Merge the clone and its source (by converting the clone to a cast) when
-/// possible.
-struct SimplifyClones : public OpRewritePattern<CloneOp> {
-  using OpRewritePattern<CloneOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(CloneOp cloneOp,
-                                PatternRewriter &rewriter) const override {
-    if (cloneOp.use_empty()) {
-      rewriter.eraseOp(cloneOp);
-      return success();
-    }
-
-    Value source = cloneOp.input();
-
-    // This only finds dealloc operations for the immediate value. It should
-    // also consider aliases. That would also make the safety check below
-    // redundant.
-    llvm::Optional<Operation *> maybeCloneDeallocOp =
-        findDealloc(cloneOp.output());
-    // Skip if either of them has > 1 deallocate operations.
-    if (!maybeCloneDeallocOp.hasValue())
-      return failure();
-    llvm::Optional<Operation *> maybeSourceDeallocOp = findDealloc(source);
-    if (!maybeSourceDeallocOp.hasValue())
-      return failure();
-    Operation *cloneDeallocOp = *maybeCloneDeallocOp;
-    Operation *sourceDeallocOp = *maybeSourceDeallocOp;
-
-    // If both are deallocated in the same block, their in-block lifetimes
-    // might not fully overlap, so we cannot decide which one to drop.
-    if (cloneDeallocOp && sourceDeallocOp &&
-        cloneDeallocOp->getBlock() == sourceDeallocOp->getBlock())
-      return failure();
-
-    Block *currentBlock = cloneOp->getBlock();
-    Operation *redundantDealloc = nullptr;
-    if (cloneDeallocOp && cloneDeallocOp->getBlock() == currentBlock) {
-      redundantDealloc = cloneDeallocOp;
-    } else if (sourceDeallocOp && sourceDeallocOp->getBlock() == currentBlock) {
-      redundantDealloc = sourceDeallocOp;
-    }
-
-    if (!redundantDealloc)
-      return failure();
-
-    // Safety check that there are no other deallocations inbetween
-    // cloneOp and redundantDealloc, as otherwise we might deallocate an alias
-    // of source before the uses of the clone. With alias information, we could
-    // restrict this to only fail of the dealloc's operand is an alias
-    // of the source.
-    for (Operation *pos = cloneOp->getNextNode(); pos != redundantDealloc;
-         pos = pos->getNextNode()) {
-      auto effectInterface = dyn_cast<MemoryEffectOpInterface>(pos);
-      if (!effectInterface)
-        continue;
-      if (effectInterface.hasEffect<MemoryEffects::Free>())
-        return failure();
-    }
-
-    rewriter.replaceOpWithNewOp<memref::CastOp>(cloneOp, cloneOp.getType(),
-                                                source);
-    rewriter.eraseOp(redundantDealloc);
-    return success();
-  }
-};
-
-} // end anonymous namespace.
-
-void CloneOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                          MLIRContext *context) {
-  results.insert<SimplifyClones>(context);
-}
-
-OpFoldResult CloneOp::fold(ArrayRef<Attribute> operands) {
-  return succeeded(foldMemRefCast(*this)) ? getResult() : Value();
-}
-
 //===----------------------------------------------------------------------===//
 // DeallocOp
 //===----------------------------------------------------------------------===//
@ -875,25 +672,11 @@ struct DimOfMemRefReshape : public OpRewritePattern<DimOp> {
  }
 };

-/// Fold dim of a cast into the dim of the source of the memref cast.
-struct DimOfCastOp : public OpRewritePattern<DimOp> {
-  using OpRewritePattern<DimOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(DimOp dimOp,
-                                PatternRewriter &rewriter) const override {
-    auto castOp = dimOp.source().getDefiningOp<BufferCastOp>();
-    if (!castOp)
-      return failure();
-    Value newSource = castOp.getOperand();
-    rewriter.replaceOpWithNewOp<tensor::DimOp>(dimOp, newSource, dimOp.index());
-    return success();
-  }
-};
 } // end anonymous namespace.

 void DimOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                        MLIRContext *context) {
-  results.add<DimOfMemRefReshape, DimOfCastOp>(context);
+  results.add<DimOfMemRefReshape>(context);
 }

 // ---------------------------------------------------------------------------
@ -1215,30 +998,6 @@ OpFoldResult LoadOp::fold(ArrayRef<Attribute> cstOperands) {
  return OpFoldResult();
 }

-namespace {
-/// Fold a load on a buffer_cast operation into an tensor.extract on the
-/// corresponding tensor.
-struct LoadOfBufferCast : public OpRewritePattern<LoadOp> {
-  using OpRewritePattern<LoadOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(LoadOp load,
-                                PatternRewriter &rewriter) const override {
-    auto buffercast = load.memref().getDefiningOp<BufferCastOp>();
-    if (!buffercast)
-      return failure();
-
-    rewriter.replaceOpWithNewOp<tensor::ExtractOp>(load, buffercast.tensor(),
-                                                   load.indices());
-    return success();
-  }
-};
-} // end anonymous namespace.
-
-void LoadOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                         MLIRContext *context) {
-  results.add<LoadOfBufferCast>(context);
-}
-
 //===----------------------------------------------------------------------===//
 // PrefetchOp
 //===----------------------------------------------------------------------===//
@ -2199,42 +1958,6 @@ OpFoldResult SubViewOp::fold(ArrayRef<Attribute> operands) {
  return {};
 }

-//===----------------------------------------------------------------------===//
-// TensorLoadOp
-//===----------------------------------------------------------------------===//
-
-OpFoldResult TensorLoadOp::fold(ArrayRef<Attribute>) {
-  if (auto bufferCast = memref().getDefiningOp<BufferCastOp>())
-    // Approximate alias analysis by conservatively folding only when no there
-    // is no interleaved operation.
-    if (bufferCast->getBlock() == this->getOperation()->getBlock() &&
-        bufferCast->getNextNode() == this->getOperation())
-      return bufferCast.tensor();
-  return {};
-}
-
-namespace {
-struct DimOfTensorLoadFolder : public OpRewritePattern<tensor::DimOp> {
-  using OpRewritePattern<tensor::DimOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(tensor::DimOp dimOp,
-                                PatternRewriter &rewriter) const override {
-    auto tensorLoadOp = dimOp.source().getDefiningOp<TensorLoadOp>();
-    if (!tensorLoadOp)
-      return failure();
-
-    rewriter.replaceOpWithNewOp<DimOp>(dimOp, tensorLoadOp.memref(),
-                                       dimOp.index());
-    return success();
-  }
-};
-} // namespace
-
-void TensorLoadOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                               MLIRContext *context) {
-  results.add<DimOfTensorLoadFolder>(context);
-}
-
 //===----------------------------------------------------------------------===//
 // TransposeOp
 //===----------------------------------------------------------------------===//
--- a/mlir/lib/Dialect/SCF/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/CMakeLists.txt
@ -9,6 +9,7 @@ add_mlir_dialect_library(MLIRSCF

  LINK_LIBS PUBLIC
  MLIRArithmetic
+  MLIRBufferization
  MLIRIR
  MLIRLoopLikeInterface
  MLIRMemRef
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@ -8,6 +8,7 @@

 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
@ -875,9 +876,10 @@ struct ForOpTensorCastFolder : public OpRewritePattern<ForOp> {
  }
 };

-/// Canonicalize the iter_args of an scf::ForOp that involve a tensor_load and
-/// for which only the last loop iteration is actually visible outside of the
-/// loop. The canonicalization looks for a pattern such as:
+/// Canonicalize the iter_args of an scf::ForOp that involve a
+/// `bufferization.to_tensor` and for which only the last loop iteration is
+/// actually visible outside of the loop. The canonicalization looks for a
+/// pattern such as:
 /// ```
 ///    %t0 = ... : tensor_type
 ///    %0 = scf.for ... iter_args(%bb0 : %t0) -> (tensor_type) {
@ -885,23 +887,25 @@ struct ForOpTensorCastFolder : public OpRewritePattern<ForOp> {
 ///      // %m is either buffer_cast(%bb00) or defined above the loop
 ///      %m... : memref_type
 ///      ... // uses of %m with potential inplace updates
-///      %new_tensor = tensor_load %m : memref_type
+///      %new_tensor = bufferization.to_tensor %m : memref_type
 ///      ...
 ///      scf.yield %new_tensor : tensor_type
 ///    }
 /// ```
 ///
 /// `%bb0` may have either 0 or 1 use. If it has 1 use it must be exactly a
-/// `%m = buffer_cast %bb0` op that feeds into the yielded `tensor_load`
-/// op.
+/// `%m = buffer_cast %bb0` op that feeds into the yielded
+/// `bufferization.to_tensor` op.
 ///
 /// If no aliasing write to the memref `%m`, from which `%new_tensor`is loaded,
-/// occurs between tensor_load and yield then the value %0 visible outside of
-/// the loop is the last `tensor_load` produced in the loop.
+/// occurs between `bufferization.to_tensor and yield then the value %0
+/// visible outside of the loop is the last `bufferization.to_tensor`
+/// produced in the loop.
 ///
 /// For now, we approximate the absence of aliasing by only supporting the case
-/// when the tensor_load is the operation immediately preceding the yield.
-///
+/// when the bufferization.to_tensor is the operation immediately preceding
+/// the yield.
+//
 /// The canonicalization rewrites the pattern as:
 /// ```
 ///    // %m is either a buffer_cast or defined above
@ -910,7 +914,7 @@ struct ForOpTensorCastFolder : public OpRewritePattern<ForOp> {
 ///      ... // uses of %m with potential inplace updates
 ///      scf.yield %bb0: tensor_type
 ///    }
-///    %0 = tensor_load %m : memref_type
+///    %0 = bufferization.to_tensor %m : memref_type
 /// ```
 ///
 /// A later bbArg canonicalization will further rewrite as:
@ -920,7 +924,7 @@ struct ForOpTensorCastFolder : public OpRewritePattern<ForOp> {
 ///    scf.for ... { // no iter_args
 ///      ... // uses of %m with potential inplace updates
 ///    }
-///    %0 = tensor_load %m : memref_type
+///    %0 = bufferization.to_tensor %m : memref_type
 /// ```
 struct LastTensorLoadCanonicalization : public OpRewritePattern<ForOp> {
  using OpRewritePattern<ForOp>::OpRewritePattern;
@ -936,39 +940,39 @@ struct LastTensorLoadCanonicalization : public OpRewritePattern<ForOp> {
      unsigned idx = bbArg.getArgNumber() - /*numIv=*/1;
      auto yieldOp = cast<scf::YieldOp>(forOp.region().front().getTerminator());
      Value yieldVal = yieldOp->getOperand(idx);
-      auto tensorLoadOp = yieldVal.getDefiningOp<memref::TensorLoadOp>();
+      auto tensorLoadOp = yieldVal.getDefiningOp<bufferization::ToTensorOp>();
      bool isTensor = bbArg.getType().isa<TensorType>();

-      memref::BufferCastOp bufferCastOp;
+      bufferization::ToMemrefOp tensorToMemref;
      // Either bbArg has no use or it has a single buffer_cast use.
      if (bbArg.hasOneUse())
-        bufferCastOp =
-            dyn_cast<memref::BufferCastOp>(*bbArg.getUsers().begin());
-      if (!isTensor || !tensorLoadOp || (!bbArg.use_empty() && !bufferCastOp))
+        tensorToMemref =
+            dyn_cast<bufferization::ToMemrefOp>(*bbArg.getUsers().begin());
+      if (!isTensor || !tensorLoadOp || (!bbArg.use_empty() && !tensorToMemref))
        continue;
-      // If bufferCastOp is present, it must feed into the `tensorLoadOp`.
-      if (bufferCastOp && tensorLoadOp.memref() != bufferCastOp)
+      // If tensorToMemref is present, it must feed into the `ToTensorOp`.
+      if (tensorToMemref && tensorLoadOp.memref() != tensorToMemref)
        continue;
      // TODO: Any aliasing write of tensorLoadOp.memref() nested under `forOp`
-      // must be before `tensorLoadOp` in the block so that the lastWrite
+      // must be before `ToTensorOp` in the block so that the lastWrite
      // property is not subject to additional side-effects.
-      // For now, we only support the case when tensorLoadOp appears immediately
-      // before the terminator.
+      // For now, we only support the case when ToTensorOp appears
+      // immediately before the terminator.
      if (tensorLoadOp->getNextNode() != yieldOp)
        continue;

-      // Clone the optional bufferCastOp before forOp.
-      if (bufferCastOp) {
+      // Clone the optional tensorToMemref before forOp.
+      if (tensorToMemref) {
        rewriter.setInsertionPoint(forOp);
-        rewriter.replaceOpWithNewOp<memref::BufferCastOp>(
-            bufferCastOp, bufferCastOp.memref().getType(),
-            bufferCastOp.tensor());
+        rewriter.replaceOpWithNewOp<bufferization::ToMemrefOp>(
+            tensorToMemref, tensorToMemref.memref().getType(),
+            tensorToMemref.tensor());
      }

      // Clone the tensorLoad after forOp.
      rewriter.setInsertionPointAfter(forOp);
-      Value newTensorLoad =
-          rewriter.create<memref::TensorLoadOp>(loc, tensorLoadOp.memref());
+      Value newTensorLoad = rewriter.create<bufferization::ToTensorOp>(
+          loc, tensorLoadOp.memref());
      Value forOpResult = forOp.getResult(bbArg.getArgNumber() - /*iv=*/1);
      replacements.insert(std::make_pair(forOpResult, newTensorLoad));

--- a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp
@ -8,6 +8,7 @@

 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/Passes.h"
 #include "mlir/Dialect/SCF/SCF.h"
--- a/mlir/lib/Dialect/SCF/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/SCF/Transforms/PassDetail.h
@ -22,6 +22,10 @@ namespace arith {
 class ArithmeticDialect;
 } // end namespace arith

+namespace bufferization {
+class BufferizationDialect;
+} // end namespace bufferization
+
 namespace memref {
 class MemRefDialect;
 } // end namespace memref
--- a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp
@ -8,6 +8,7 @@

 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Shape/Transforms/Passes.h"
 #include "mlir/Pass/Pass.h"
--- a/mlir/lib/Dialect/Shape/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Shape/Transforms/PassDetail.h
@ -13,6 +13,10 @@

 namespace mlir {

+namespace bufferization {
+class BufferizationDialect;
+} // end namespace bufferization
+
 namespace memref {
 class MemRefDialect;
 } // end namespace memref
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@ -14,6 +14,7 @@
 //
 //===----------------------------------------------------------------------===//

+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
@ -659,7 +660,7 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
      insertScalarIntoDenseTensor(rewriter, loc, elemPtr, dst, rank, ind);
      rewriter.create<scf::YieldOp>(loc);
      rewriter.setInsertionPointAfter(whileOp);
-      rewriter.replaceOpWithNewOp<memref::TensorLoadOp>(op, resType, dst);
+      rewriter.replaceOpWithNewOp<bufferization::ToTensorOp>(op, resType, dst);
      return success();
    }
    if (!encDst && !encSrc) {
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp
@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//

+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
@ -120,8 +121,9 @@ struct SparseTensorConversionPass
    target.addLegalOp<arith::CmpFOp, arith::CmpIOp, arith::ConstantOp,
                      arith::IndexCastOp, linalg::FillOp, linalg::YieldOp,
                      tensor::ExtractOp>();
-    target.addLegalDialect<LLVM::LLVMDialect, memref::MemRefDialect,
-                           scf::SCFDialect>();
+    target
+        .addLegalDialect<bufferization::BufferizationDialect, LLVM::LLVMDialect,
+                         memref::MemRefDialect, scf::SCFDialect>();
    // Populate with rules and apply rewriting rules.
    populateFuncOpTypeConversionPattern(patterns, converter);
    populateCallOpTypeConversionPattern(patterns, converter);
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@ -12,6 +12,7 @@

 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
@ -457,7 +458,7 @@ static Value genOutputBuffer(CodeGen &codegen, PatternRewriter &rewriter,
  // the major advantage that the sparse kernel only updates the nonzero
  // positions for the output tensor.
  if (isInPlace(tensor))
-    return rewriter.create<memref::BufferCastOp>(loc, denseTp, tensor);
+    return rewriter.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
  // By default, a new buffer is allocated which is initialized to the
  // tensor defined in the outs() clause. This is always correct but
  // introduces a dense initialization component that may negatively
@ -472,7 +473,7 @@ static Value genOutputBuffer(CodeGen &codegen, PatternRewriter &rewriter,
    rewriter.create<linalg::FillOp>(loc, zero, alloc);
    return alloc;
  }
-  Value init = rewriter.create<memref::BufferCastOp>(loc, denseTp, tensor);
+  Value init = rewriter.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
  Value alloc = rewriter.create<memref::AllocOp>(loc, denseTp, args);
  rewriter.create<memref::CopyOp>(loc, init, alloc);
  return alloc;
@ -532,7 +533,7 @@ static void genBuffers(Merger &merger, CodeGen &codegen,
      auto denseTp = MemRefType::get(shape, elementType);
      if (tensor < op.getNumInputs())
        codegen.buffers[tensor] =
-            rewriter.create<memref::BufferCastOp>(loc, denseTp, t->get());
+            rewriter.create<bufferization::ToMemrefOp>(loc, denseTp, t->get());
      else
        codegen.buffers[tensor] =
            genOutputBuffer(codegen, rewriter, op, denseTp, args);
@ -1466,7 +1467,7 @@ static void genResult(Merger &merger, CodeGen &codegen,
    // To rematerialize an non-annotated tensor, simply load it
    // from the bufferized value.
    Value val = codegen.buffers.back(); // value array
-    rewriter.replaceOpWithNewOp<memref::TensorLoadOp>(op, resType, val);
+    rewriter.replaceOpWithNewOp<bufferization::ToTensorOp>(op, resType, val);
  }
 }

--- a/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp
@ -12,6 +12,7 @@

 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
--- a/mlir/lib/Dialect/StandardOps/Transforms/FuncBufferize.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/FuncBufferize.cpp
@ -11,7 +11,7 @@
 //===----------------------------------------------------------------------===//

 #include "PassDetail.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/FuncConversions.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
@ -42,7 +42,8 @@ struct FuncBufferizePass : public FuncBufferizeBase<FuncBufferizePass> {

    populateBranchOpInterfaceTypeConversionPattern(patterns, typeConverter);
    populateReturnOpTypeConversionPattern(patterns, typeConverter);
-    target.addLegalOp<ModuleOp, memref::TensorLoadOp, memref::BufferCastOp>();
+    target.addLegalOp<ModuleOp, bufferization::ToTensorOp,
+                      bufferization::ToMemrefOp>();

    target.markUnknownOpDynamicallyLegal([&](Operation *op) {
      return isNotBranchOpInterfaceOrReturnLikeOp(op) ||
--- a/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h
@ -16,6 +16,10 @@ namespace mlir {

 class AtomicRMWOp;

+namespace bufferization {
+class BufferizationDialect;
+} // end namespace bufferization
+
 namespace memref {
 class MemRefDialect;
 } // end namespace memref
--- a/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//

 #include "PassDetail.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
--- a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp
@ -13,6 +13,7 @@
 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
--- a/mlir/lib/Dialect/Tensor/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Tensor/Transforms/PassDetail.h
@ -13,6 +13,10 @@

 namespace mlir {

+namespace bufferization {
+class BufferizationDialect;
+} // end namespace bufferization
+
 namespace memref {
 class MemRefDialect;
 } // end namespace memref
--- a/mlir/lib/Dialect/Vector/VectorTransferPermutationMapRewritePatterns.cpp
+++ b/mlir/lib/Dialect/Vector/VectorTransferPermutationMapRewritePatterns.cpp
@ -13,6 +13,7 @@

 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Vector/VectorTransforms.h"
 #include "mlir/Interfaces/VectorInterfaces.h"

--- a/mlir/lib/Transforms/BufferDeallocation.cpp
+++ b/mlir/lib/Transforms/BufferDeallocation.cpp
@ -53,6 +53,7 @@
 #include "PassDetail.h"

 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Operation.h"
@ -581,7 +582,7 @@ private:
  /// Builds a clone operation compatible with the given allocation value. If
  /// there is no registered AllocationOpInterface implementation for the given
  /// value (e.g. in the case of a function parameter), this method builds a
-  /// memref::CloneOp.
+  /// bufferization::CloneOp.
  FailureOr<Value> buildClone(Operation *op, Value alloc) {
    OpBuilder builder(op);
    auto it = aliasToAllocations.find(alloc);
@ -596,7 +597,8 @@ private:
                                "are not supported");
    }
    // Build a "default" CloneOp for unknown allocation sources.
-    return builder.create<memref::CloneOp>(alloc.getLoc(), alloc).getResult();
+    return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
+        .getResult();
  }

  /// The dominator info to find the appropriate start operation to move the
@ -618,14 +620,17 @@ private:
 // BufferDeallocationPass
 //===----------------------------------------------------------------------===//

-template <typename T>
 struct DefaultAllocationInterface
    : public bufferization::AllocationOpInterface::ExternalModel<
-          DefaultAllocationInterface<T>, T> {
+          DefaultAllocationInterface, memref::AllocOp> {
  static Optional<Operation *> buildDealloc(OpBuilder &builder, Value alloc) {
    return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc)
        .getOperation();
  }
+  static Optional<Value> buildClone(OpBuilder &builder, Value alloc) {
+    return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
+        .getResult();
+  }
 };

 /// The actual buffer deallocation pass that inserts and moves dealloc nodes
@ -633,11 +638,9 @@ struct DefaultAllocationInterface
 /// necessary. It uses the algorithm described at the top of the file.
 struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<bufferization::BufferizationDialect>();
    registry.insert<memref::MemRefDialect>();
-    registry.addOpInterface<memref::AllocOp,
-                            DefaultAllocationInterface<memref::AllocOp>>();
-    registry.addOpInterface<memref::CloneOp,
-                            DefaultAllocationInterface<memref::CloneOp>>();
+    registry.addOpInterface<memref::AllocOp, DefaultAllocationInterface>();
  }

  void runOnFunction() override {
--- a/mlir/lib/Transforms/Bufferize.cpp
+++ b/mlir/lib/Transforms/Bufferize.cpp
@ -8,7 +8,7 @@

 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Transforms/Passes.h"

@ -22,7 +22,7 @@ static Value materializeTensorLoad(OpBuilder &builder, TensorType type,
                                   ValueRange inputs, Location loc) {
  assert(inputs.size() == 1);
  assert(inputs[0].getType().isa<BaseMemRefType>());
-  return builder.create<memref::TensorLoadOp>(loc, type, inputs[0]);
+  return builder.create<bufferization::ToTensorOp>(loc, type, inputs[0]);
 }

 /// Registers conversions into BufferizeTypeConverter
@ -43,22 +43,23 @@ BufferizeTypeConverter::BufferizeTypeConverter() {
                              ValueRange inputs, Location loc) -> Value {
    assert(inputs.size() == 1);
    assert(inputs[0].getType().isa<TensorType>());
-    return builder.create<memref::BufferCastOp>(loc, type, inputs[0]);
+    return builder.create<bufferization::ToMemrefOp>(loc, type, inputs[0]);
  });
 }

 void mlir::populateBufferizeMaterializationLegality(ConversionTarget &target) {
-  target.addLegalOp<memref::TensorLoadOp, memref::BufferCastOp>();
+  target.addLegalOp<bufferization::ToTensorOp, bufferization::ToMemrefOp>();
 }

 namespace {
 // In a finalizing bufferize conversion, we know that all tensors have been
 // converted to memrefs, thus, this op becomes an identity.
-class BufferizeTensorLoadOp : public OpConversionPattern<memref::TensorLoadOp> {
+class BufferizeTensorLoadOp
+    : public OpConversionPattern<bufferization::ToTensorOp> {
 public:
  using OpConversionPattern::OpConversionPattern;
  LogicalResult
-  matchAndRewrite(memref::TensorLoadOp op, OpAdaptor adaptor,
+  matchAndRewrite(bufferization::ToTensorOp op, OpAdaptor adaptor,
                  ConversionPatternRewriter &rewriter) const override {
    rewriter.replaceOp(op, adaptor.memref());
    return success();
@ -69,11 +70,11 @@ public:
 namespace {
 // In a finalizing bufferize conversion, we know that all tensors have been
 // converted to memrefs, thus, this op becomes an identity.
-class BufferizeCastOp : public OpConversionPattern<memref::BufferCastOp> {
+class BufferizeCastOp : public OpConversionPattern<bufferization::ToMemrefOp> {
 public:
  using OpConversionPattern::OpConversionPattern;
  LogicalResult
-  matchAndRewrite(memref::BufferCastOp op, OpAdaptor adaptor,
+  matchAndRewrite(bufferization::ToMemrefOp op, OpAdaptor adaptor,
                  ConversionPatternRewriter &rewriter) const override {
    rewriter.replaceOp(op, adaptor.tensor());
    return success();
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@ -32,7 +32,7 @@ add_mlir_library(MLIRTransforms
  LINK_LIBS PUBLIC
  MLIRAffine
  MLIRAnalysis
-  MLIRAllocationOpInterface
+  MLIRBufferization
  MLIRCopyOpInterface
  MLIRLoopLikeInterface
  MLIRMemRef
--- a/mlir/test/Dialect/Arithmetic/bufferize.mlir
+++ b/mlir/test/Dialect/Arithmetic/bufferize.mlir
@ -7,8 +7,8 @@ func @index_cast(%tensor: tensor<i32>, %scalar: i32) -> (tensor<index>, index) {
  %index_scalar = arith.index_cast %scalar : i32 to index
  return %index_tensor, %index_scalar : tensor<index>, index
 }
-// CHECK:  %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<i32>
+// CHECK:  %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<i32>
 // CHECK-NEXT: %[[INDEX_MEMREF:.*]] = arith.index_cast %[[MEMREF]]
 // CHECK-SAME:   memref<i32> to memref<index>
-// CHECK-NEXT: %[[INDEX_TENSOR:.*]] = memref.tensor_load %[[INDEX_MEMREF]]
+// CHECK-NEXT: %[[INDEX_TENSOR:.*]] = bufferization.to_tensor %[[INDEX_MEMREF]]
 // CHECK: return %[[INDEX_TENSOR]]
--- a/mlir/test/Dialect/Bufferization/canonicalize.mlir
+++ b/mlir/test/Dialect/Bufferization/canonicalize.mlir
@ -0,0 +1,245 @@
+// RUN: mlir-opt %s -canonicalize --split-input-file \
+// RUN:   -allow-unregistered-dialect |\
+// RUN: FileCheck %s
+
+// Basic folding of to_tensor(to_memref(t)) -> t
+// CHECK-LABEL: func @tensor_load_of_buffer_cast(
+func @tensor_load_of_buffer_cast(%arg0: tensor<?xf32>) -> tensor<?xf32> {
+  %0 = bufferization.to_memref %arg0 : memref<?xf32>
+  %1 = bufferization.to_tensor %0 : memref<?xf32>
+  return %1 : tensor<?xf32>
+}
+// CHECK-SAME:   %[[TENSOR:.*]]: tensor<?xf32>) -> tensor<?xf32> {
+// CHECK: return %[[TENSOR]]
+
+// -----
+
+// Basic folding of to_memref(to_tensor(m)) -> m
+// CHECK-LABEL: func @buffer_cast_of_tensor_load(
+func @buffer_cast_of_tensor_load(%arg0: memref<?xf32>) -> memref<?xf32> {
+  %0 = bufferization.to_tensor %arg0 : memref<?xf32>
+  %1 = bufferization.to_memref %0 : memref<?xf32>
+  return %1 : memref<?xf32>
+}
+// CHECK-SAME:   %[[MEMREF:.*]]: memref<?xf32>) -> memref<?xf32> {
+// CHECK: return %[[MEMREF]]
+
+// -----
+
+// If the memrefs are not the same type, don't fold them.
+// If the memrefs are not cast-compatible (e.g. different address space), don't
+// canonicalize them either.
+// CHECK-LABEL: func @no_fold_buffer_cast_of_tensor_load(
+//  CHECK-SAME:   %[[MEMREF_ADDRSPACE2:.*]]: memref<?xf32, 2>)
+//  CHECK-SAME:     -> memref<?xf32, 7> {
+//       CHECK: %[[TENSOR:.*]] = bufferization.to_tensor
+//  CHECK-SAME:   %[[MEMREF_ADDRSPACE2]] : memref<?xf32, 2>
+//       CHECK: %[[MEMREF_ADDRSPACE7:.*]] = bufferization.to_memref
+//  CHECK-SAME:   %[[TENSOR]] : memref<?xf32, 7>
+//       CHECK: return %[[MEMREF_ADDRSPACE7]]
+func @no_fold_buffer_cast_of_tensor_load(%arg0: memref<?xf32, 2>)
+    -> memref<?xf32, 7> {
+  %0 = bufferization.to_tensor %arg0 : memref<?xf32, 2>
+  %1 = bufferization.to_memref %0 : memref<?xf32, 7>
+  return %1 : memref<?xf32, 7>
+}
+
+// -----
+
+// CHECK-DAG: #[[$OFF_3:[a-z0-9]+]] = affine_map<(d0) -> (d0 + 3)>
+// CHECK-DAG: #[[$OFF_UNK:[a-z0-9]+]] = affine_map<(d0)[s0] -> (d0 + s0)>
+
+// If the memrefs are definitely cast-compatible, canonicalize to
+//            cast.
+// CHECK-LABEL: func @canonicalize_buffer_cast_of_tensor_load(
+//  CHECK-SAME:   %[[M:.*]]: memref<?xf32, #[[$OFF_3]]>)
+//  CHECK-SAME:     -> memref<?xf32, #[[$OFF_UNK]]> {
+//   CHECK-NOT: bufferization.to_tensor
+//   CHECK-NOT: bufferization.to_memref
+//       CHECK: %[[R:.*]] = memref.cast %[[M]]
+//  CHECK-SAME:   memref<?xf32, #[[$OFF_3]]> to memref<?xf32, #[[$OFF_UNK]]>
+//       CHECK: return %[[R]]
+func @canonicalize_buffer_cast_of_tensor_load(
+  %arg0: memref<?xf32, offset: 3, strides: [1]>)
+  -> memref<?xf32, offset: ?, strides: [1]>
+{
+  %0 = bufferization.to_tensor %arg0 : memref<?xf32, offset: 3, strides: [1]>
+  %1 = bufferization.to_memref %0 : memref<?xf32, offset: ?, strides: [1]>
+  return %1 : memref<?xf32, offset: ?, strides: [1]>
+}
+
+// -----
+
+// CHECK-DAG: #[[$OFF_UNK:[a-z0-9]+]] = affine_map<(d0)[s0] -> (d0 + s0)>
+// CHECK-DAG: #[[$OFF_3:[a-z0-9]+]] = affine_map<(d0) -> (d0 + 3)>
+
+// If the memrefs are potentially cast-compatible, canonicalize to
+//            copy.
+// CHECK-LABEL: func @canonicalize_buffer_cast_of_tensor_load_to_copy(
+func @canonicalize_buffer_cast_of_tensor_load_to_copy(
+  %arg0: memref<?xf32, offset: ?, strides: [1]>)
+  -> memref<?xf32, offset: 3, strides: [1]> {
+  %0 = bufferization.to_tensor %arg0 : memref<?xf32, offset: ?, strides: [1]>
+  %1 = bufferization.to_memref %0 : memref<?xf32, offset: 3, strides: [1]>
+  return %1 : memref<?xf32, offset: 3, strides: [1]>
+}
+// CHECK-SAME:   %[[M:.*]]: memref<?xf32, #[[$OFF_UNK]]>)
+// CHECK-SAME:     -> memref<?xf32, #[[$OFF_3]]> {
+//  CHECK-NOT: bufferization.to_tensor
+//  CHECK-NOT: bufferization.to_memref
+//      CHECK: %[[C0:.*]] = arith.constant 0 : index
+//      CHECK: %[[DIM:.*]] = memref.dim %[[M]], %[[C0]] : memref<?xf32, #[[$OFF_UNK]]>
+//      CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM]]) : memref<?xf32, #[[$OFF_3]]>
+//      CHECK: memref.copy %[[M]], %[[ALLOC]]
+// CHECK-SAME:   memref<?xf32, #[[$OFF_UNK]]> to memref<?xf32, #[[$OFF_3]]>
+//      CHECK: return %[[ALLOC]]
+
+// -----
+
+
+// Basic folding of tensor.dim(to_tensor(m)) -> memref.dim(m).
+// CHECK-LABEL: func @dim_of_tensor_load(
+//  CHECK-SAME:     %[[MEMREF:[0-9a-z]*]]: memref<?xf32>
+//       CHECK:   %[[C0:.*]] = arith.constant 0
+//       CHECK:   %[[D:.*]] = memref.dim %[[MEMREF]], %[[C0]]
+//       CHECK:   return %[[D]] : index
+func @dim_of_tensor_load(%arg0: memref<?xf32>) -> index {
+  %c0 = arith.constant 0 : index
+  %0 = bufferization.to_tensor %arg0 : memref<?xf32>
+  %1 = tensor.dim %0, %c0 : tensor<?xf32>
+  return %1 : index
+}
+
+// -----
+
+// CHECK-LABEL: @clone_before_dealloc
+func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
+  %0 = bufferization.clone %arg0 : memref<?xf32> to memref<?xf32>
+  memref.dealloc %arg0 : memref<?xf32>
+  return %0 : memref<?xf32>
+}
+// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
+// CHECK-NEXT: return %[[ARG]]
+
+// -----
+
+// CHECK-LABEL: @clone_before_dealloc
+func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
+  %0 = bufferization.clone %arg0 : memref<?xf32> to memref<?xf32>
+  "use"(%0) : (memref<?xf32>) -> ()
+  memref.dealloc %0 : memref<?xf32>
+  return %arg0 : memref<?xf32>
+}
+// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
+// CHECK-NEXT: "use"(%arg0)
+// CHECK-NEXT: return %[[ARG]]
+
+// -----
+
+// CHECK-LABEL: @clone_after_cast
+func @clone_after_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
+  %0 = memref.cast %arg0 : memref<?xf32> to memref<32xf32>
+  %1 = bufferization.clone %0 : memref<32xf32> to memref<32xf32>
+  return %1 : memref<32xf32>
+}
+// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
+// CHECK-NEXT: bufferization.clone %[[ARG]] : memref<?xf32> to memref<32xf32>
+// CHECK-NOT: memref.cast
+
+// -----
+
+// CHECK-LABEL: @clone_and_cast
+func @clone_and_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
+  %0 = bufferization.clone %arg0 : memref<?xf32> to memref<32xf32>
+  memref.dealloc %arg0 : memref<?xf32>
+  return %0 : memref<32xf32>
+}
+// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
+// CHECK-NEXT: %[[RES:.*]] = memref.cast %[[ARG]]
+// CHECK-SAME:   memref<?xf32> to memref<32xf32>
+// CHECK-NEXT: return %[[RES]]
+
+// -----
+
+// CHECK-LABEL: @alias_is_freed
+func @alias_is_freed(%arg0 : memref<?xf32>) {
+  %0 = memref.cast %arg0 : memref<?xf32> to memref<32xf32>
+  %1 = bufferization.clone %0 : memref<32xf32> to memref<32xf32>
+  memref.dealloc %arg0 : memref<?xf32>
+  "use"(%1) : (memref<32xf32>) -> ()
+  memref.dealloc %1 : memref<32xf32>
+  return
+}
+// CHECK: bufferization.clone
+// CHECK: memref.dealloc
+// CHECK: memref.dealloc
+
+// -----
+
+// Verify SimplifyClones skips clones with multiple deallocations.
+// CHECK-LABEL: @clone_multiple_dealloc_of_source
+func @clone_multiple_dealloc_of_source(%arg0: memref<?xf32>) -> memref<?xf32> {
+  %0 = bufferization.clone %arg0 : memref<?xf32> to memref<?xf32>
+  "if_else"() ({
+    memref.dealloc %arg0 : memref<?xf32>
+    }, {
+    memref.dealloc %arg0 : memref<?xf32>
+    }) : () -> ()
+  return %0 : memref<?xf32>
+}
+// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
+// CHECK-NEXT: %[[RES:.*]] = bufferization.clone %[[ARG]]
+// CHECK: memref.dealloc %[[ARG]]
+// CHECK: memref.dealloc %[[ARG]]
+// CHECK: return %[[RES]]
+
+// -----
+
+// CHECK-LABEL: @clone_multiple_dealloc_of_clone
+// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
+func @clone_multiple_dealloc_of_clone(%arg0: memref<?xf32>) -> memref<?xf32> {
+  // CHECK-NEXT: %[[CLONE:.*]] = bufferization.clone %[[ARG]]
+  // CHECK: memref.dealloc %[[CLONE]]
+  // CHECK: memref.dealloc %[[CLONE]]
+  // CHECK: return %[[ARG]]
+  %0 = bufferization.clone %arg0 : memref<?xf32> to memref<?xf32>
+  "use"(%0) : (memref<?xf32>) -> ()
+  "if_else"() ({
+    memref.dealloc %0 : memref<?xf32>
+    }, {
+    memref.dealloc %0 : memref<?xf32>
+    }) : () -> ()
+  return %arg0 : memref<?xf32>
+}
+
+// -----
+
+
+// CHECK-LABEL: func @tensor_cast_to_memref
+//  CHECK-SAME:   %[[ARG0:.+]]: tensor<4x6x16x32xi8>
+func @tensor_cast_to_memref(%arg0 : tensor<4x6x16x32xi8>) ->
+  memref<?x?x16x32xi8> {
+  %0 = tensor.cast %arg0 : tensor<4x6x16x32xi8> to tensor<?x?x16x32xi8>
+  %1 = bufferization.to_memref %0 : memref<?x?x16x32xi8>
+  return %1 : memref<?x?x16x32xi8>
+}
+// CHECK:   %[[M:.+]] = bufferization.to_memref %[[ARG0]] : memref<4x6x16x32xi8>
+// CHECK:   %[[M1:.+]] = memref.cast %[[M]] 
+// CHECK-SAME: memref<4x6x16x32xi8> to memref<?x?x16x32xi8>
+// CHECK:   return %[[M1]] : memref<?x?x16x32xi8>
+
+// -----
+
+// Folding of memref.load(to_memref(%v, %idxs)) -> tensor.extract(%v, %idx)
+// CHECK-LABEL: func @load_from_buffer_cast(
+func @load_from_buffer_cast(%arg0: index, %arg1: index,
+                            %arg2: tensor<?x?xf32>) -> f32 {
+  %0 = bufferization.to_memref %arg2 : memref<?x?xf32>
+  %1 = memref.load %0[%arg0, %arg1] : memref<?x?xf32>
+  return %1 : f32
+}
+//  CHECK-SAME: %[[IDX0:[0-9a-z]+]]: index, %[[IDX1:[0-9a-z]+]]: index
+//  CHECK-SAME: %[[TENSOR:[0-9a-z]+]]: tensor<?x?xf32>
+//       CHECK: %[[RES:.*]] = tensor.extract %[[TENSOR]][%[[IDX0]], %[[IDX1]]]
+//   CHECK-NOT: memref.load
+//       CHECK: return %[[RES]] : f32
--- a/mlir/test/Dialect/Bufferization/ops.mlir
+++ b/mlir/test/Dialect/Bufferization/ops.mlir
@ -0,0 +1,24 @@
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s --mlir-print-op-generic | mlir-opt | FileCheck %s
+
+// CHECK-LABEL: func @test_clone
+func @test_clone(%buf : memref<*xf32>) -> memref<*xf32> {
+  %clone = bufferization.clone %buf : memref<*xf32> to memref<*xf32>
+  return %clone : memref<*xf32>
+}
+
+// CHECK-LABEL: test_to_memref
+func @test_to_memref(%arg0: tensor<?xi64>, %arg1: tensor<*xi64>)
+    -> (memref<?xi64, affine_map<(d0) -> (d0 + 7)>>, memref<*xi64, 1>) {
+  %0 = bufferization.to_memref %arg0
+    : memref<?xi64, affine_map<(d0) -> (d0 + 7)>>
+  %1 = bufferization.to_memref %arg1
+    : memref<*xi64, 1>
+  return %0, %1 : memref<?xi64, affine_map<(d0) -> (d0 + 7)>>, memref<*xi64, 1>
+}
+
+// CHECK-LABEL: func @test_to_tensor
+func @test_to_tensor(%buf : memref<2xf32>) -> tensor<2xf32> {
+  %tensor = bufferization.to_tensor %buf : memref<2xf32>
+  return %tensor : tensor<2xf32>
+}
--- a/mlir/test/Dialect/Linalg/bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/bufferize.mlir
@ -3,7 +3,8 @@
 #map0 = affine_map<(d0) -> (d0)>

 // In-depth checking of a basic case, this is testing
-// - memref.buffer_cast / memref.tensor_load materializations are properly inserted
+// - bufferization.to_memref / bufferization.to_tensor materializations are
+//   properly inserted
 // - payload is correctly carried over
 // - affine maps are correctly carried over
 // Later tests will not check all these details.
@ -11,7 +12,7 @@
 // CHECK: #map = affine_map<(d0) -> (d0)>
 // CHECK-LABEL:   func @basic(
 // CHECK-SAME:                %[[TENSOR:.*]]: tensor<4xf32>) -> tensor<4xf32> {
-// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<4xf32>
+// CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<4xf32>
 // CHECK:           %[[RESULT_MEMREF:.*]] = memref.alloc() : memref<4xf32>
 // CHECK:           linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]}
 // CHECK-SAME:      ins(%[[MEMREF]] : memref<4xf32>)
@ -20,7 +21,7 @@
 // CHECK:             %[[DIM1:.*]] = math.exp %[[RESULT1]] : f32
 // CHECK:             linalg.yield %[[DIM1]] : f32
 // CHECK:           }
-// CHECK:           %[[RESULT:.*]] = memref.tensor_load %[[RESULT_MEMREF]] : memref<4xf32>
+// CHECK:           %[[RESULT:.*]] = bufferization.to_tensor %[[RESULT_MEMREF]] : memref<4xf32>
 // CHECK:           return %[[RESULT]] : tensor<4xf32>
 func @basic(%arg0: tensor<4xf32>) -> tensor<4xf32> {
    %0 = linalg.generic {
@ -45,7 +46,7 @@ func @basic(%arg0: tensor<4xf32>) -> tensor<4xf32> {
 // CHECK: #map = affine_map<(d0) -> (d0)>
 // CHECK-LABEL: func @init_tensor(
 // CHECK-SAME:      %[[IN:.*]]: tensor<?xf32>, %[[SIZE:.*]]: index)
-// CHECK:         %[[MEMREF:.*]] = memref.buffer_cast %[[IN]] : memref<?xf32>
+// CHECK:         %[[MEMREF:.*]] = bufferization.to_memref %[[IN]] : memref<?xf32>
 // CHECK:         %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) : memref<?xf32>
 // CHECK:         linalg.generic
 // CHECK-SAME:    ins(%[[MEMREF]] : memref<?xf32>)
@ -100,7 +101,7 @@ func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
 // CHECK-SAME:                          %[[ARG:.*]]: tensor<?x?xf32>
 // CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
-// CHECK:           %[[MEMREF_ARG:.*]] = memref.buffer_cast %[[ARG]] : memref<?x?xf32>
+// CHECK:           %[[MEMREF_ARG:.*]] = bufferization.to_memref %[[ARG]] : memref<?x?xf32>
 // CHECK:           %[[DIM0:.*]] = tensor.dim %[[ARG]], %[[C0]] : tensor<?x?xf32>
 // CHECK:           %[[DIM1:.*]] = tensor.dim %[[ARG]], %[[C1]] : tensor<?x?xf32>
 // CHECK:           %[[RESULT0:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
@ -139,8 +140,8 @@ func @dynamic_results(%arg0: tensor<?x?xf32>)
 // CHECK-LABEL:   func @generic_with_init_tensor(
 // CHECK-SAME:                                   %[[ARG0_TENSOR:.*]]: tensor<2x3x4xvector<3x4xi4>>,
 // CHECK-SAME:                                   %[[ARG1_TENSOR:.*]]: tensor<3x2xf32>) -> tensor<3x2xf32> {
-// CHECK-DAG:           %[[ARG0_MEMREF:.*]] = memref.buffer_cast %[[ARG0_TENSOR]] : memref<2x3x4xvector<3x4xi4>>
-// CHECK-DAG:           %[[ARG1_MEMREF:.*]] = memref.buffer_cast %[[ARG1_TENSOR]] : memref<3x2xf32>
+// CHECK-DAG:           %[[ARG0_MEMREF:.*]] = bufferization.to_memref %[[ARG0_TENSOR]] : memref<2x3x4xvector<3x4xi4>>
+// CHECK-DAG:           %[[ARG1_MEMREF:.*]] = bufferization.to_memref %[[ARG1_TENSOR]] : memref<3x2xf32>
 // CHECK:           %[[INIT_BUFFER:.*]] = memref.alloc() : memref<3x2xf32>
 // CHECK:           linalg.copy(%[[ARG1_MEMREF]], %[[INIT_BUFFER]]) : memref<3x2xf32>, memref<3x2xf32>
 // CHECK:           linalg.generic
@ -169,7 +170,7 @@ func private @make_index() -> index
 // CHECK-LABEL: func @bufferize_slice(
 //  CHECK-SAME:   %[[T:[0-9a-z]*]]: tensor<?x?xf32>
 func @bufferize_slice(%t : tensor<?x?xf32>) -> (tensor<2x3xf32>, tensor<2x?xf32>) {
-  //      CHECK: %[[M:.*]] = memref.buffer_cast %[[T]] : memref<?x?xf32>
+  //      CHECK: %[[M:.*]] = bufferization.to_memref %[[T]] : memref<?x?xf32>

  //      CHECK: %[[IDX:.*]] = call @make_index() : () -> index
  %i0 = call @make_index() : () -> index
@ -178,14 +179,14 @@ func @bufferize_slice(%t : tensor<?x?xf32>) -> (tensor<2x3xf32>, tensor<2x?xf32>
  // CHECK-NEXT: %[[SM0:.*]] = memref.subview %[[M]][0, 0] [2, 3] [1, 1]
  // CHECK-SAME:   memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
  // CHECK-NEXT: linalg.copy(%[[SM0]], %[[A0]]) : memref<2x3xf32, #[[$MAP0]]>, memref<2x3xf32>
-  // CHECK-NEXT: %[[RT0:.*]] = memref.tensor_load %[[A0]] : memref<2x3xf32>
+  // CHECK-NEXT: %[[RT0:.*]] = bufferization.to_tensor %[[A0]] : memref<2x3xf32>
  %st0 = tensor.extract_slice %t[0, 0][2, 3][1, 1] : tensor<?x?xf32> to tensor<2x3xf32>

  // CHECK-NEXT: %[[A1:.*]] = memref.alloc(%[[IDX]]) : memref<2x?xf32>
  // CHECK-NEXT: %[[SM1:.*]] = memref.subview %[[M]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
  // CHECK-SAME:   memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
  // CHECK-NEXT: linalg.copy(%[[SM1]], %[[A1]]) : memref<2x?xf32, #[[$MAP1]]>, memref<2x?xf32>
-  // CHECK-NEXT: %[[RT1:.*]] = memref.tensor_load %[[A1]] : memref<2x?xf32>
+  // CHECK-NEXT: %[[RT1:.*]] = bufferization.to_tensor %[[A1]] : memref<2x?xf32>
  %st1 = tensor.extract_slice %t[0, %i0][2, %i0][1, 2] : tensor<?x?xf32> to tensor<2x?xf32>

  // CHECK-NEXT: return %[[RT0]], %[[RT1]]
@ -205,9 +206,9 @@ func private @make_index() -> index
 //  CHECK-SAME:   %[[ST1:[0-9a-z]*]]: tensor<2x?xf32>
 func @bufferize_insert_slice(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %st1 : tensor<2x?xf32>) ->
    (tensor<?x?xf32>, tensor<?x?xf32>) {
-  // CHECK-DAG: %[[M:.*]] = memref.buffer_cast %[[T]] : memref<?x?xf32>
-  // CHECK-DAG: %[[SM0:.*]] = memref.buffer_cast %[[ST0]] : memref<2x3xf32>
-  // CHECK-DAG: %[[SM1:.*]] = memref.buffer_cast %[[ST1]] : memref<2x?xf32>
+  // CHECK-DAG: %[[M:.*]] = bufferization.to_memref %[[T]] : memref<?x?xf32>
+  // CHECK-DAG: %[[SM0:.*]] = bufferization.to_memref %[[ST0]] : memref<2x3xf32>
+  // CHECK-DAG: %[[SM1:.*]] = bufferization.to_memref %[[ST1]] : memref<2x?xf32>

  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
@ -224,7 +225,7 @@ func @bufferize_insert_slice(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %st1
  // CHECK-NEXT: %[[SUBVIEW0:.*]] = memref.subview %[[M_COPY0]][0, 0] [2, 3] [1, 1]
  // CHECK-SAME:   memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
  // CHECK-NEXT: linalg.copy(%[[SM0]], %[[SUBVIEW0]]) : memref<2x3xf32>, memref<2x3xf32, #[[$MAP0]]>
-  // CHECK-NEXT: %[[RT0:.*]] = memref.tensor_load %[[M_COPY0]] : memref<?x?xf32>
+  // CHECK-NEXT: %[[RT0:.*]] = bufferization.to_tensor %[[M_COPY0]] : memref<?x?xf32>
  %t0 = tensor.insert_slice %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor<?x?xf32>

  // CHECK-NEXT: %[[M_COPY1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
@ -232,7 +233,7 @@ func @bufferize_insert_slice(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %st1
  // CHECK-NEXT: %[[SUBVIEW1:.*]] = memref.subview %[[M_COPY1]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
  // CHECK-SAME:   memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
  // CHECK-NEXT: linalg.copy(%[[SM1]], %[[SUBVIEW1]]) : memref<2x?xf32>, memref<2x?xf32, #[[$MAP1]]>
-  // CHECK-NEXT: %[[RT1:.*]] = memref.tensor_load %[[M_COPY1]] : memref<?x?xf32>
+  // CHECK-NEXT: %[[RT1:.*]] = bufferization.to_tensor %[[M_COPY1]] : memref<?x?xf32>
  %t1 = tensor.insert_slice %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor<?x?xf32>

  //     CHECK: return %[[RT0]], %[[RT1]]
@ -245,9 +246,9 @@ func @bufferize_insert_slice(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %st1
 // CHECK-SAME:    %[[IN:.*]]: tensor<?xf32>
 func @bufferize_fill(%arg0: tensor<?xf32>) -> tensor<?xf32> {
  %c0 = arith.constant 0.0 : f32
-  // CHECK: %[[MEMREF:.*]] = memref.buffer_cast %[[IN]] : memref<?xf32>
+  // CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[IN]] : memref<?xf32>
  // CHECK: linalg.fill(%cst, %[[MEMREF]]) : f32, memref<?xf32>
-  // CHECK: %[[TENSOR:.*]] = memref.tensor_load %[[MEMREF]] : memref<?xf32>
+  // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[MEMREF]] : memref<?xf32>
  // CHECK: return %[[TENSOR]]
  %0 = linalg.fill(%c0, %arg0) : f32, tensor<?xf32> -> tensor<?xf32>
  return %0 : tensor<?xf32>
@ -262,10 +263,10 @@ func @bufferize_tensor_collapse_shape(%arg0: tensor<4x5xf32>) -> tensor<20xf32>
     tensor<4x5xf32> into tensor<20xf32>
  return %out : tensor<20xf32>
 }
-// CHECK: %[[MEMREF:.*]] = memref.buffer_cast %[[IN]] : memref<4x5xf32>
+// CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[IN]] : memref<4x5xf32>
 // CHECK: %[[RESHAPE:.*]] = memref.collapse_shape %[[MEMREF]] {{\[}}[0, 1]]
 // CHECK-SAME: : memref<4x5xf32> into memref<20xf32>
-// CHECK: %[[TENSOR:.*]] = memref.tensor_load %[[RESHAPE]] : memref<20xf32>
+// CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[RESHAPE]] : memref<20xf32>
 // CHECK: return %[[TENSOR]]

 // -----
@ -287,7 +288,7 @@ func @pad_tensor_dynamic_shape(%arg0: tensor<4x?x2x?xf32>, %arg1: index) -> tens
 // CHECK-DAG:       %[[C2:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[CST:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK:           %[[IN_MEMREF:.*]] = memref.buffer_cast %[[IN]] : memref<4x?x2x?xf32>
+// CHECK:           %[[IN_MEMREF:.*]] = bufferization.to_memref %[[IN]] : memref<4x?x2x?xf32>
 // CHECK:           %[[DIM1:.*]] = tensor.dim %[[IN]], %[[C1]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[OUT_DIM2:.*]] = arith.addi %[[OFFSET]], %[[C2]] : index
 // CHECK:           %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
@ -298,7 +299,7 @@ func @pad_tensor_dynamic_shape(%arg0: tensor<4x?x2x?xf32>, %arg1: index) -> tens
 // CHECK:           linalg.copy(%[[FILLED]], %[[OUT]]) : memref<4x?x?x?xf32>, memref<4x?x?x?xf32>
 // CHECK:           %[[INTERIOR:.*]] = memref.subview %[[OUT]][0, 0, %[[OFFSET]], 0] [4, %[[DIM1]], 2, %[[DIM3]]] [1, 1, 1, 1] : memref<4x?x?x?xf32> to memref<4x?x2x?xf32, #map>
 // CHECK:           linalg.copy(%[[IN_MEMREF]], %[[INTERIOR]]) : memref<4x?x2x?xf32>, memref<4x?x2x?xf32, #map>
-// CHECK:           %[[OUT_TENSOR:.*]] = memref.tensor_load %[[OUT]] : memref<4x?x?x?xf32>
+// CHECK:           %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[OUT]] : memref<4x?x?x?xf32>
 // CHECK:           return %[[OUT_TENSOR]] : tensor<4x?x?x?xf32>
 // CHECK:         }

@ -328,6 +329,6 @@ func @bufferize_dot(%in: tensor<4xf32>, %out: tensor<f32>) -> tensor<f32> {
  return %dot : tensor<f32>
  // CHECK: linalg.dot ins(%{{.*}}, %{{.*}} : memref<4xf32>, memref<4xf32>)
  // CHECK-SAME:       outs(%[[OUT:.*]] : memref<f32>)
-  // CHECK: %[[OUT_TENSOR:.*]] = memref.tensor_load %[[OUT]] : memref<f32>
+  // CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[OUT]] : memref<f32>
  // CHECK: return %[[OUT_TENSOR]]
 }
--- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
@ -5,12 +5,12 @@
 // CHECK-LABEL: func @pad_tensor_with_memrefs
 func @pad_tensor_with_memrefs(%arg0: memref<1x28x28x1xf32>) -> memref<2x31x31x3xf32> {
  %cst = arith.constant 0.000000e+00 : f32
-  %0 = memref.tensor_load %arg0 : memref<1x28x28x1xf32>
+  %0 = bufferization.to_tensor %arg0 : memref<1x28x28x1xf32>
  %1 = linalg.pad_tensor %0 low[1, 1, 1, 2] high[0, 2, 2, 0]  {
  ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  // no predecessors
    linalg.yield %cst : f32
  } : tensor<1x28x28x1xf32> to tensor<2x31x31x3xf32>
-  %2 = memref.buffer_cast %1 : memref<2x31x31x3xf32>
+  %2 = bufferization.to_memref %1 : memref<2x31x31x3xf32>
  return %2 : memref<2x31x31x3xf32>
 }

--- a/mlir/test/Dialect/MemRef/canonicalize.mlir
+++ b/mlir/test/Dialect/MemRef/canonicalize.mlir
@ -1,99 +1,5 @@
 // RUN: mlir-opt %s -canonicalize --split-input-file -allow-unregistered-dialect | FileCheck %s

-// Test case: Basic folding of memref.tensor_load(memref.buffer_cast(t)) -> t
-// CHECK-LABEL: func @tensor_load_of_buffer_cast(
-//  CHECK-SAME:   %[[TENSOR:.*]]: tensor<?xf32>) -> tensor<?xf32> {
-//       CHECK: return %[[TENSOR]]
-func @tensor_load_of_buffer_cast(%arg0: tensor<?xf32>) -> tensor<?xf32> {
-  %0 = memref.buffer_cast %arg0 : memref<?xf32>
-  %1 = memref.tensor_load %0 : memref<?xf32>
-  return %1 : tensor<?xf32>
-}
-
-// -----
-
-// Test case: Basic folding of memref.buffer_cast(memref.tensor_load(m)) -> m
-// CHECK-LABEL: func @buffer_cast_of_tensor_load(
-//  CHECK-SAME:   %[[MEMREF:.*]]: memref<?xf32>) -> memref<?xf32> {
-//       CHECK: return %[[MEMREF]]
-func @buffer_cast_of_tensor_load(%arg0: memref<?xf32>) -> memref<?xf32> {
-  %0 = memref.tensor_load %arg0 : memref<?xf32>
-  %1 = memref.buffer_cast %0 : memref<?xf32>
-  return %1 : memref<?xf32>
-}
-
-// -----
-
-// Test case: If the memrefs are not the same type, don't fold them.
-// Test case: If the memrefs are not cast-compatible (e.g. different address space),
-// don't canonicalize them either.
-// CHECK-LABEL: func @no_fold_buffer_cast_of_tensor_load(
-//  CHECK-SAME:   %[[MEMREF_ADDRSPACE2:.*]]: memref<?xf32, 2>)
-//  CHECK-SAME:     -> memref<?xf32, 7> {
-//       CHECK: %[[TENSOR:.*]] = memref.tensor_load
-//  CHECK_SAME:   %[[MEMREF_ADDRSPACE2]] : memref<?xf32, 2>
-//       CHECK: %[[MEMREF_ADDRSPACE7:.*]] = memref.buffer_cast
-//  CHECK_SAME:   %[[TENSOR]] : memref<?xf32, 7>
-//       CHECK: return %[[MEMREF_ADDRSPACE7]]
-func @no_fold_buffer_cast_of_tensor_load(%arg0: memref<?xf32, 2>) -> memref<?xf32, 7> {
-  %0 = memref.tensor_load %arg0 : memref<?xf32, 2>
-  %1 = memref.buffer_cast %0 : memref<?xf32, 7>
-  return %1 : memref<?xf32, 7>
-}
-
-// -----
-
-// CHECK-DAG: #[[$OFF_3:[a-z0-9]+]] = affine_map<(d0) -> (d0 + 3)>
-// CHECK-DAG: #[[$OFF_UNK:[a-z0-9]+]] = affine_map<(d0)[s0] -> (d0 + s0)>
-
-// Test case: If the memrefs are definitely cast-compatible, canonicalize to
-//            cast.
-// CHECK-LABEL: func @canonicalize_buffer_cast_of_tensor_load(
-//  CHECK-SAME:   %[[M:.*]]: memref<?xf32, #[[$OFF_3]]>)
-//  CHECK-SAME:     -> memref<?xf32, #[[$OFF_UNK]]> {
-//   CHECK-NOT: memref.tensor_load
-//   CHECK-NOT: memref.buffer_cast
-//       CHECK: %[[R:.*]] = memref.cast %[[M]]
-//  CHECK-SAME:   memref<?xf32, #[[$OFF_3]]> to memref<?xf32, #[[$OFF_UNK]]>
-//       CHECK: return %[[R]]
-func @canonicalize_buffer_cast_of_tensor_load(
-  %arg0: memref<?xf32, offset: 3, strides: [1]>)
-  -> memref<?xf32, offset: ?, strides: [1]>
-{
-  %0 = memref.tensor_load %arg0 : memref<?xf32, offset: 3, strides: [1]>
-  %1 = memref.buffer_cast %0 : memref<?xf32, offset: ?, strides: [1]>
-  return %1 : memref<?xf32, offset: ?, strides: [1]>
-}
-
-// -----
-
-// CHECK-DAG: #[[$OFF_UNK:[a-z0-9]+]] = affine_map<(d0)[s0] -> (d0 + s0)>
-// CHECK-DAG: #[[$OFF_3:[a-z0-9]+]] = affine_map<(d0) -> (d0 + 3)>
-
-// Test case: If the memrefs are potentially cast-compatible, canonicalize to
-//            copy.
-// CHECK-LABEL: func @canonicalize_buffer_cast_of_tensor_load_to_copy(
-//  CHECK-SAME:   %[[M:.*]]: memref<?xf32, #[[$OFF_UNK]]>)
-//  CHECK-SAME:     -> memref<?xf32, #[[$OFF_3]]> {
-//   CHECK-NOT: memref.tensor_load
-//   CHECK-NOT: memref.buffer_cast
-//       CHECK: %[[C0:.*]] = arith.constant 0 : index
-//       CHECK: %[[DIM:.*]] = memref.dim %[[M]], %[[C0]] : memref<?xf32, #[[$OFF_UNK]]>
-//       CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM]]) : memref<?xf32, #[[$OFF_3]]>
-//       CHECK: memref.copy %[[M]], %[[ALLOC]]
-//  CHECK-SAME:   memref<?xf32, #[[$OFF_UNK]]> to memref<?xf32, #[[$OFF_3]]>
-//       CHECK: return %[[ALLOC]]
-func @canonicalize_buffer_cast_of_tensor_load_to_copy(
-  %arg0: memref<?xf32, offset: ?, strides: [1]>)
-  -> memref<?xf32, offset: 3, strides: [1]>
-{
-  %0 = memref.tensor_load %arg0 : memref<?xf32, offset: ?, strides: [1]>
-  %1 = memref.buffer_cast %0 : memref<?xf32, offset: 3, strides: [1]>
-  return %1 : memref<?xf32, offset: 3, strides: [1]>
-}
-
-// -----
-
 // CHECK-LABEL: func @subview_of_memcast
 //  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: memref<4x6x16x32xi8>
 //       CHECK:   %[[S:.+]] = memref.subview %arg0[0, 1, 0] [1, 1, 16] [1, 1, 1] : memref<4x6x16x32xi8> to memref<16x32xi8, #{{.*}}>
@ -216,107 +122,6 @@ func @multiple_reducing_dims_all_dynamic(%arg0 : memref<?x?x?xf32, offset: ?, st
 //  CHECK-SAME:       : memref<1x?xf32, #[[MAP1]]> to memref<?xf32, #[[MAP0]]>


-// -----
-
-// CHECK-LABEL: @clone_before_dealloc
-// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
-func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
-  // CHECK-NEXT: return %[[ARG]]
-  %0 = memref.clone %arg0 : memref<?xf32> to memref<?xf32>
-  memref.dealloc %arg0 : memref<?xf32>
-  return %0 : memref<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @clone_before_dealloc
-// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
-func @clone_before_dealloc(%arg0: memref<?xf32>) -> memref<?xf32> {
-  // CHECK-NEXT: "use"(%arg0)
-  // CHECK-NEXT: return %[[ARG]]
-  %0 = memref.clone %arg0 : memref<?xf32> to memref<?xf32>
-  "use"(%0) : (memref<?xf32>) -> ()
-  memref.dealloc %0 : memref<?xf32>
-  return %arg0 : memref<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @clone_after_cast
-// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
-func @clone_after_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
-  // CHECK-NEXT: memref.clone %[[ARG]] : memref<?xf32> to memref<32xf32>
-  // CHECK-NOT: memref.cast
-  %0 = memref.cast %arg0 : memref<?xf32> to memref<32xf32>
-  %1 = memref.clone %0 : memref<32xf32> to memref<32xf32>
-  return %1 : memref<32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @clone_and_cast
-// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
-func @clone_and_cast(%arg0: memref<?xf32>) -> memref<32xf32> {
-  // CHECK-NEXT: %[[RES:.*]] = memref.cast %[[ARG]] : memref<?xf32> to memref<32xf32>
-  %0 = memref.clone %arg0 : memref<?xf32> to memref<32xf32>
-  // CHECK-NEXT: return %[[RES]]
-  memref.dealloc %arg0 : memref<?xf32>
-  return %0 : memref<32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @alias_is_freed
-func @alias_is_freed(%arg0 : memref<?xf32>) {
-  // CHECK: memref.clone
-  // CHECK: memref.dealloc
-  // CHECK: memref.dealloc
-  %0 = memref.cast %arg0 : memref<?xf32> to memref<32xf32>
-  %1 = memref.clone %0 : memref<32xf32> to memref<32xf32>
-  memref.dealloc %arg0 : memref<?xf32>
-  "use"(%1) : (memref<32xf32>) -> ()
-  memref.dealloc %1 : memref<32xf32>
-  return
-}
-
-// -----
-
-// Verify SimplifyClones skips clones with multiple deallocations.
-// CHECK-LABEL: @clone_multiple_dealloc_of_source
-// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
-func @clone_multiple_dealloc_of_source(%arg0: memref<?xf32>) -> memref<?xf32> {
-  // CHECK-NEXT: %[[RES:.*]] = memref.clone %[[ARG]]
-  // CHECK: memref.dealloc %[[ARG]]
-  // CHECK: memref.dealloc %[[ARG]]
-  // CHECK: return %[[RES]]
-  %0 = memref.clone %arg0 : memref<?xf32> to memref<?xf32>
-  "if_else"() ({
-    memref.dealloc %arg0 : memref<?xf32>
-    }, {
-    memref.dealloc %arg0 : memref<?xf32>
-    }) : () -> ()
-  return %0 : memref<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @clone_multiple_dealloc_of_clone
-// CHECK-SAME: %[[ARG:.*]]: memref<?xf32>
-func @clone_multiple_dealloc_of_clone(%arg0: memref<?xf32>) -> memref<?xf32> {
-  // CHECK-NEXT: %[[CLONE:.*]] = memref.clone %[[ARG]]
-  // CHECK: memref.dealloc %[[CLONE]]
-  // CHECK: memref.dealloc %[[CLONE]]
-  // CHECK: return %[[ARG]]
-  %0 = memref.clone %arg0 : memref<?xf32> to memref<?xf32>
-  "use"(%0) : (memref<?xf32>) -> ()
-  "if_else"() ({
-    memref.dealloc %0 : memref<?xf32>
-    }, {
-    memref.dealloc %0 : memref<?xf32>
-    }) : () -> ()
-  return %arg0 : memref<?xf32>
-}
-
 // -----

 // CHECK-LABEL: func @dim_of_sized_view
@ -343,38 +148,6 @@ func @no_fold_of_store(%arg : memref<32xi8>, %holder: memref<memref<?xi8>>) {

 // -----

-// Test case: Folding of memref.load(memref.buffer_cast(%v, %idxs))
-//            -> tensor.extract(%v, %idx)
-// CHECK-LABEL: func @load_from_buffer_cast(
-//  CHECK-SAME:     %[[IDX0:[0-9a-z]+]]: index, %[[IDX1:[0-9a-z]+]]: index
-//  CHECK-SAME:     %[[TENSOR:[0-9a-z]+]]: tensor<?x?xf32>
-//       CHECK:   %[[RES:.*]] = tensor.extract %[[TENSOR]][%[[IDX0]], %[[IDX1]]]
-//   CHECK-NOT:   memref.load
-//       CHECK:   return %[[RES]] : f32
-func @load_from_buffer_cast(%arg0: index, %arg1: index, %arg2: tensor<?x?xf32>) -> f32 {
-  %0 = memref.buffer_cast %arg2 : memref<?x?xf32>
-  %1 = memref.load %0[%arg0, %arg1] : memref<?x?xf32>
-  return %1 : f32
-}
-
-// -----
-
-
-// Test case: Basic folding of tensor.dim(memref.tensor_load(m)) -> memref.dim(m).
-// CHECK-LABEL: func @dim_of_tensor_load(
-//  CHECK-SAME:     %[[MEMREF:[0-9a-z]*]]: memref<?xf32>
-//       CHECK:   %[[C0:.*]] = arith.constant 0
-//       CHECK:   %[[D:.*]] = memref.dim %[[MEMREF]], %[[C0]]
-//       CHECK:   return %[[D]] : index
-func @dim_of_tensor_load(%arg0: memref<?xf32>) -> index {
-  %c0 = arith.constant 0 : index
-  %0 = memref.tensor_load %arg0 : memref<?xf32>
-  %1 = tensor.dim %0, %c0 : tensor<?xf32>
-  return %1 : index
-}
-
-// -----
-
 // Test case: Folding of memref.dim(memref.alloca(%size), %idx) -> %size
 // CHECK-LABEL: func @dim_of_alloca(
 //  CHECK-SAME:     %[[SIZE:[0-9a-z]+]]: index
@ -445,20 +218,6 @@ func @dim_of_memref_reshape_i32(%arg0: memref<*xf32>, %arg1: memref<?xi32>)

 // -----

-// CHECK-LABEL: func @tensor_cast_to_memref
-//  CHECK-SAME:   %[[ARG0:.+]]: tensor<4x6x16x32xi8>
-//       CHECK:   %[[M:.+]] = memref.buffer_cast %[[ARG0]] : memref<4x6x16x32xi8>
-//       CHECK:   %[[M1:.+]] = memref.cast %[[M]] : memref<4x6x16x32xi8> to memref<?x?x16x32xi8>
-//       CHECK:   return %[[M1]] : memref<?x?x16x32xi8>
-func @tensor_cast_to_memref(%arg0 : tensor<4x6x16x32xi8>) ->
-  memref<?x?x16x32xi8> {
-  %0 = tensor.cast %arg0 : tensor<4x6x16x32xi8> to tensor<?x?x16x32xi8>
-  %1 = memref.buffer_cast %0 : memref<?x?x16x32xi8>
-  return %1 : memref<?x?x16x32xi8>
-}
-
-// -----
-
 // CHECK-LABEL: func @alloc_const_fold
 func @alloc_const_fold() -> memref<?xf32> {
  // CHECK-NEXT: %0 = memref.alloc() : memref<4xf32>
--- a/mlir/test/Dialect/MemRef/ops.mlir
+++ b/mlir/test/Dialect/MemRef/ops.mlir
@ -6,13 +6,6 @@
 // CHECK-DAG: #[[$strided2DOFF0:.*]] = affine_map<(d0, d1)[s0] -> (d0 * s0 + d1)>
 // CHECK-DAG: #[[$strided3DOFF0:.*]] = affine_map<(d0, d1, d2)[s0, s1] -> (d0 * s0 + d1 * s1 + d2)>

-// CHECK-LABEL: test_buffer_cast
-func @test_buffer_cast(%arg0: tensor<?xi64>, %arg1: tensor<*xi64>) -> (memref<?xi64, affine_map<(d0) -> (d0 + 7)>>, memref<*xi64, 1>) {
-  %0 = memref.buffer_cast %arg0 : memref<?xi64, affine_map<(d0) -> (d0 + 7)>>
-  %1 = memref.buffer_cast %arg1 : memref<*xi64, 1>
-  return %0, %1 : memref<?xi64, affine_map<(d0) -> (d0 + 7)>>, memref<*xi64, 1>
-}
-
 // CHECK-LABEL: func @memref_reinterpret_cast
 func @memref_reinterpret_cast(%in: memref<?xf32>)
    -> memref<10x?xf32, offset: ?, strides: [?, 1]> {
@ -62,15 +55,6 @@ func @write_global_memref() {
 // CHECK-LABEL: func @read_global_memref
 func @read_global_memref() {
  %0 = memref.get_global @memref0 : memref<2xf32>
-  %1 = memref.tensor_load %0 : memref<2xf32>
-  return
-}
-
-// CHECK-LABEL: func @memref_clone
-func @memref_clone() {
-  %0 = memref.alloc() : memref<2xf32>
-  %1 = memref.cast %0 : memref<2xf32> to memref<*xf32>
-  %2 = memref.clone %1 : memref<*xf32> to memref<*xf32>
  return
 }

--- a/mlir/test/Dialect/SCF/bufferize.mlir
+++ b/mlir/test/Dialect/SCF/bufferize.mlir
@ -4,14 +4,14 @@
 // CHECK-SAME:             %[[PRED:.*]]: i1,
 // CHECK-SAME:             %[[TRUE_TENSOR:.*]]: tensor<?xf32>,
 // CHECK-SAME:             %[[FALSE_TENSOR:.*]]: tensor<?xf32>) -> tensor<?xf32> {
-// CHECK:           %[[TRUE_MEMREF:.*]] = memref.buffer_cast %[[TRUE_TENSOR]] : memref<?xf32>
-// CHECK:           %[[FALSE_MEMREF:.*]] = memref.buffer_cast %[[FALSE_TENSOR]] : memref<?xf32>
+// CHECK:           %[[TRUE_MEMREF:.*]] = bufferization.to_memref %[[TRUE_TENSOR]] : memref<?xf32>
+// CHECK:           %[[FALSE_MEMREF:.*]] = bufferization.to_memref %[[FALSE_TENSOR]] : memref<?xf32>
 // CHECK:           %[[RESULT_MEMREF:.*]] = scf.if %[[PRED]] -> (memref<?xf32>) {
 // CHECK:             scf.yield %[[TRUE_MEMREF]] : memref<?xf32>
 // CHECK:           } else {
 // CHECK:             scf.yield %[[FALSE_MEMREF]] : memref<?xf32>
 // CHECK:           }
-// CHECK:           %[[RESULT_TENSOR:.*]] = memref.tensor_load %[[RESULT_MEMREF:.*]] : memref<?xf32>
+// CHECK:           %[[RESULT_TENSOR:.*]] = bufferization.to_tensor %[[RESULT_MEMREF:.*]] : memref<?xf32>
 // CHECK:           return %[[RESULT_TENSOR]] : tensor<?xf32>
 // CHECK:         }
 func @if(%pred: i1, %true_val: tensor<?xf32>, %false_val: tensor<?xf32>) -> tensor<?xf32> {
@ -27,11 +27,11 @@ func @if(%pred: i1, %true_val: tensor<?xf32>, %false_val: tensor<?xf32>) -> tens
 // CHECK-SAME:              %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:              %[[LB:.*]]: index, %[[UB:.*]]: index,
 // CHECK-SAME:              %[[STEP:.*]]: index) -> tensor<f32> {
-// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<f32>
+// CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<f32>
 // CHECK:           %[[RESULT_MEMREF:.*]] = scf.for %[[VAL_6:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF]]) -> (memref<f32>) {
 // CHECK:             scf.yield %[[ITER]] : memref<f32>
 // CHECK:           }
-// CHECK:           %[[VAL_8:.*]] = memref.tensor_load %[[VAL_9:.*]] : memref<f32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_tensor %[[VAL_9:.*]] : memref<f32>
 // CHECK:           return %[[VAL_8]] : tensor<f32>
 // CHECK:         }
 func @for(%arg0: tensor<f32>, %lb: index, %ub: index, %step: index) -> tensor<f32> {
@ -60,14 +60,14 @@ func @if_correct_recursive_legalization_behavior(%pred: i1, %tensor: tensor<f32>
 // CHECK-LABEL:   func @for_correct_recursive_legalization_behavior(
 // CHECK-SAME:                                                      %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:                                                      %[[INDEX:.*]]: index) -> tensor<f32> {
-// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<f32>
+// CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<f32>
 // CHECK:           %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[INDEX]] to %[[INDEX]] step %[[INDEX]] iter_args(%[[MEMREF_ITER:.*]] = %[[MEMREF]]) -> (memref<f32>) {
-// CHECK:             %[[TENSOR_ITER:.*]] = memref.tensor_load %[[MEMREF_ITER]] : memref<f32>
+// CHECK:             %[[TENSOR_ITER:.*]] = bufferization.to_tensor %[[MEMREF_ITER]] : memref<f32>
 // CHECK:             %[[TENSOR_MUNGED:.*]] = "test.munge_tensor"(%[[TENSOR_ITER]]) : (tensor<f32>) -> tensor<f32>
-// CHECK:             %[[MEMREF_MUNGED:.*]] = memref.buffer_cast %[[TENSOR_MUNGED]] : memref<f32>
+// CHECK:             %[[MEMREF_MUNGED:.*]] = bufferization.to_memref %[[TENSOR_MUNGED]] : memref<f32>
 // CHECK:             scf.yield %[[MEMREF_MUNGED]] : memref<f32>
 // CHECK:           }
-// CHECK:           %[[TENSOR:.*]] = memref.tensor_load %[[RESULT:.*]] : memref<f32>
+// CHECK:           %[[TENSOR:.*]] = bufferization.to_tensor %[[RESULT:.*]] : memref<f32>
 // CHECK:           return %[[TENSOR]] : tensor<f32>
 // CHECK:         }
 func @for_correct_recursive_legalization_behavior(%arg0: tensor<f32>, %index: index) -> tensor<f32> {
@ -80,12 +80,12 @@ func @for_correct_recursive_legalization_behavior(%arg0: tensor<f32>, %index: in

 // CHECK-LABEL:   func @bufferize_while(
 // CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64, %[[ARG2:.*]]: tensor<f32>
-// CHECK: %[[M:.*]] = memref.buffer_cast %[[ARG2]] : memref<f32>
+// CHECK: %[[M:.*]] = bufferization.to_memref %[[ARG2]] : memref<f32>
 // CHECK: %[[RES1:.*]]:3 = scf.while (%{{.*}} = %[[ARG0]], %{{.*}} = %[[M]]) : (i64, memref<f32>) -> (i64, i64, memref<f32>)
 // CHECK: scf.condition(%{{.*}}) %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, memref<f32>
 // CHECK: ^bb0(%{{.*}}: i64, %{{.*}}: i64, %{{.*}}: memref<f32>):
 // CHECK: scf.yield %{{.*}}, %{{.*}} : i64, memref<f32>
-// CHECK:  %[[RES2:.*]] = memref.tensor_load %[[RES1]]#2 : memref<f32>
+// CHECK:  %[[RES2:.*]] = bufferization.to_tensor %[[RES1]]#2 : memref<f32>
 // CHECK:  return %[[RES1]]#1, %[[RES2]] : i64, tensor<f32>
 func @bufferize_while(%arg0: i64, %arg1: i64, %arg2: tensor<f32>) -> (i64, tensor<f32>) {
  %c2_i64 = arith.constant 2 : i64
--- a/mlir/test/Dialect/SCF/canonicalize.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize.mlir
@ -564,12 +564,12 @@ func @last_value(%t0: tensor<128x128xf32>, %t1: tensor<128x128xf32>,
                 %lb : index, %ub : index, %step : index)
  -> (tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x128xf32>)
 {
-  // CHECK-NEXT: %[[M1:.*]] = memref.buffer_cast %[[T1]] : memref<128x128xf32>
+  // CHECK-NEXT: %[[M1:.*]] = bufferization.to_memref %[[T1]] : memref<128x128xf32>
  // CHECK-NEXT: %[[FOR_RES:.*]] = scf.for {{.*}} iter_args(%[[BBARG_T2:.*]] = %[[T2]]) -> (tensor<128x128xf32>) {
  %0:3 = scf.for %arg0 = %lb to %ub step %step iter_args(%arg1 = %t0, %arg2 = %t1, %arg3 = %t2)
    -> (tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x128xf32>)
  {
-    %m1 = memref.buffer_cast %arg2 : memref<128x128xf32>
+    %m1 = bufferization.to_memref %arg2 : memref<128x128xf32>

    // CHECK-NEXT:   call @process(%[[M0]]) : (memref<128x128xf32>) -> ()
    call @process(%m0) : (memref<128x128xf32>) -> ()
@ -579,13 +579,13 @@ func @last_value(%t0: tensor<128x128xf32>, %t1: tensor<128x128xf32>,

    // This does not hoist (fails the bbArg has at most a single check).
    // CHECK-NEXT:   %[[T:.*]] = call @process_tensor(%[[BBARG_T2]]) : (tensor<128x128xf32>) -> memref<128x128xf32>
-    // CHECK-NEXT:   %[[YIELD_T:.*]] = memref.tensor_load %[[T:.*]]
+    // CHECK-NEXT:   %[[YIELD_T:.*]] = bufferization.to_tensor %[[T:.*]]
    %m2 = call @process_tensor(%arg3): (tensor<128x128xf32>) -> memref<128x128xf32>
-    %3 = memref.tensor_load %m2 : memref<128x128xf32>
+    %3 = bufferization.to_tensor %m2 : memref<128x128xf32>

    // All this stuff goes away, incrementally
-    %1 = memref.tensor_load %m0 : memref<128x128xf32>
-    %2 = memref.tensor_load %m1 : memref<128x128xf32>
+    %1 = bufferization.to_tensor %m0 : memref<128x128xf32>
+    %2 = bufferization.to_tensor %m1 : memref<128x128xf32>

    // CHECK-NEXT:   scf.yield %[[YIELD_T]] : tensor<128x128xf32>
    scf.yield %1, %2, %3 : tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x128xf32>
@ -593,8 +593,8 @@ func @last_value(%t0: tensor<128x128xf32>, %t1: tensor<128x128xf32>,
  // CHECK-NEXT: }
  }

-  // CHECK-NEXT: %[[R0:.*]] = memref.tensor_load %[[M0]] : memref<128x128xf32>
-  // CHECK-NEXT: %[[R1:.*]] = memref.tensor_load %[[M1]] : memref<128x128xf32>
+  // CHECK-NEXT: %[[R0:.*]] = bufferization.to_tensor %[[M0]] : memref<128x128xf32>
+  // CHECK-NEXT: %[[R1:.*]] = bufferization.to_tensor %[[M1]] : memref<128x128xf32>
  // CHECK-NEXT: return %[[R0]], %[[R1]], %[[FOR_RES]] : tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x128xf32>
  return %0#0, %0#1, %0#2 : tensor<128x128xf32>, tensor<128x128xf32>, tensor<128x128xf32>
 }
--- a/mlir/test/Dialect/Shape/bufferize.mlir
+++ b/mlir/test/Dialect/Shape/bufferize.mlir
@ -6,10 +6,10 @@
 // CHECK:           %[[WTRUE:.*]] = shape.const_witness true
 // CHECK:           %[[MEMREF:.*]] = shape.assuming %[[WTRUE]] -> (memref<2xf16>) {
 // CHECK:             %[[TENSOR_VAL:.*]] = "test.source"() : () -> tensor<2xf16>
-// CHECK:             %[[YIELDED_MEMREF:.*]] = memref.buffer_cast %[[TENSOR_VAL]] : memref<2xf16>
+// CHECK:             %[[YIELDED_MEMREF:.*]] = bufferization.to_memref %[[TENSOR_VAL]] : memref<2xf16>
 // CHECK:             shape.assuming_yield %[[YIELDED_MEMREF]] : memref<2xf16>
 // CHECK:           }
-// CHECK:           %[[TENSOR:.*]] = memref.tensor_load %[[MEMREF:.*]] : memref<2xf16>
+// CHECK:           %[[TENSOR:.*]] = bufferization.to_tensor %[[MEMREF:.*]] : memref<2xf16>
 // CHECK:           "test.sink"(%[[TENSOR]]) : (tensor<2xf16>) -> ()
 // CHECK:           return
 // CHECK:         }
--- a/mlir/test/Dialect/SparseTensor/conversion_sparse2dense.mlir
+++ b/mlir/test/Dialect/SparseTensor/conversion_sparse2dense.mlir
@ -46,7 +46,7 @@
 //       CHECK:   memref.store %[[ElemVal]], %[[M]][%[[Iv0]]] : memref<13xi32>
 //       CHECK:   scf.yield
 //       CHECK: }
-//       CHECK: %[[T:.*]] = memref.tensor_load %[[M]] : memref<13xi32>
+//       CHECK: %[[T:.*]] = bufferization.to_tensor %[[M]] : memref<13xi32>
 //       CHECK: return %[[T]] : tensor<13xi32>
 func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13xi32> {
  %0 = sparse_tensor.convert %arg0 : tensor<13xi32, #SparseVector> to tensor<13xi32>
@ -86,7 +86,7 @@ func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13xi32>
 //       CHECK:   memref.store %[[ElemVal]], %[[M]][%[[Iv0]]] : memref<?xi32>
 //       CHECK:   scf.yield
 //       CHECK: }
-//       CHECK: %[[T:.*]] = memref.tensor_load %[[M]] : memref<?xi32>
+//       CHECK: %[[T:.*]] = bufferization.to_tensor %[[M]] : memref<?xi32>
 //       CHECK: return %[[T]] : tensor<?xi32>
 func @sparse_convert_1d_dyn(%arg0: tensor<?xi32, #SparseVector>) -> tensor<?xi32> {
  %0 = sparse_tensor.convert %arg0 : tensor<?xi32, #SparseVector> to tensor<?xi32>
@ -130,7 +130,7 @@ func @sparse_convert_1d_dyn(%arg0: tensor<?xi32, #SparseVector>) -> tensor<?xi32
 //       CHECK:   memref.store %[[ElemVal]], %[[M]][%[[Iv0]], %[[Iv1]]] : memref<2x4xf64>
 //       CHECK:   scf.yield
 //       CHECK: }
-//       CHECK: %[[T:.*]] = memref.tensor_load %[[M]] : memref<2x4xf64>
+//       CHECK: %[[T:.*]] = bufferization.to_tensor %[[M]] : memref<2x4xf64>
 //       CHECK: return %[[T]] : tensor<2x4xf64>
 func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x4xf64> {
  %0 = sparse_tensor.convert %arg0 : tensor<2x4xf64, #SparseMatrix> to tensor<2x4xf64>
@ -174,7 +174,7 @@ func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x4xf64
 //       CHECK:   memref.store %[[ElemVal]], %[[M]][%[[Iv0]], %[[Iv1]]] : memref<?x4xf64>
 //       CHECK:   scf.yield
 //       CHECK: }
-//       CHECK: %[[T:.*]] = memref.tensor_load %[[M]] : memref<?x4xf64>
+//       CHECK: %[[T:.*]] = bufferization.to_tensor %[[M]] : memref<?x4xf64>
 //       CHECK: return %[[T]] : tensor<?x4xf64>
 func @sparse_convert_2d_dyn0(%arg0: tensor<?x4xf64, #SparseMatrix>) -> tensor<?x4xf64> {
  %0 = sparse_tensor.convert %arg0 : tensor<?x4xf64, #SparseMatrix> to tensor<?x4xf64>
@ -218,7 +218,7 @@ func @sparse_convert_2d_dyn0(%arg0: tensor<?x4xf64, #SparseMatrix>) -> tensor<?x
 //       CHECK:   memref.store %[[ElemVal]], %[[M]][%[[Iv0]], %[[Iv1]]] : memref<2x?xf64>
 //       CHECK:   scf.yield
 //       CHECK: }
-//       CHECK: %[[T:.*]] = memref.tensor_load %[[M]] : memref<2x?xf64>
+//       CHECK: %[[T:.*]] = bufferization.to_tensor %[[M]] : memref<2x?xf64>
 //       CHECK: return %[[T]] : tensor<2x?xf64>
 func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tensor<2x?xf64> {
  %0 = sparse_tensor.convert %arg0 : tensor<2x?xf64, #SparseMatrix> to tensor<2x?xf64>
@ -262,7 +262,7 @@ func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tensor<2x
 //       CHECK:   memref.store %[[ElemVal]], %[[M]][%[[Iv0]], %[[Iv1]]] : memref<?x?xf64>
 //       CHECK:   scf.yield
 //       CHECK: }
-//       CHECK: %[[T:.*]] = memref.tensor_load %[[M]] : memref<?x?xf64>
+//       CHECK: %[[T:.*]] = bufferization.to_tensor %[[M]] : memref<?x?xf64>
 //       CHECK: return %[[T]] : tensor<?x?xf64>
 func @sparse_convert_2d_dyn2(%arg0: tensor<?x?xf64, #SparseMatrix>) -> tensor<?x?xf64> {
  %0 = sparse_tensor.convert %arg0 : tensor<?x?xf64, #SparseMatrix> to tensor<?x?xf64>
@ -311,7 +311,7 @@ func @sparse_convert_2d_dyn2(%arg0: tensor<?x?xf64, #SparseMatrix>) -> tensor<?x
 //       CHECK:   memref.store %[[ElemVal]], %[[M]][%[[Iv0]], %[[Iv1]], %[[Iv2]]] : memref<2x3x4xf64>
 //       CHECK:   scf.yield
 //       CHECK: }
-//       CHECK: %[[T:.*]] = memref.tensor_load %[[M]] : memref<2x3x4xf64>
+//       CHECK: %[[T:.*]] = bufferization.to_tensor %[[M]] : memref<2x3x4xf64>
 //       CHECK: return %[[T]] : tensor<2x3x4xf64>
 func @sparse_convert_3d(%arg0: tensor<2x3x4xf64, #SparseTensor>) -> tensor<2x3x4xf64> {
  %0 = sparse_tensor.convert %arg0 : tensor<2x3x4xf64, #SparseTensor> to tensor<2x3x4xf64>
--- a/mlir/test/Dialect/SparseTensor/dense.mlir
+++ b/mlir/test/Dialect/SparseTensor/dense.mlir
@ -41,7 +41,7 @@
 // CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK:           %[[VAL_9:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_8]], %[[VAL_9]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
@ -53,7 +53,7 @@
 // CHECK:               memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_10]], %[[VAL_11]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_16:.*]] = memref.tensor_load %[[VAL_9]] : memref<32x16xf32>
+// CHECK:           %[[VAL_16:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_16]] : tensor<32x16xf32>
 // CHECK:         }
 func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>,
@ -84,7 +84,7 @@ func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>,
 // CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
@ -94,7 +94,7 @@ func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>,
 // CHECK:               memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_9]], %[[VAL_10]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_15:.*]] = memref.tensor_load %[[VAL_8]] : memref<32x16xf32>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_15]] : tensor<32x16xf32>
 // CHECK:         }
 func @dense2(%arga: tensor<32x16xf32, #DenseMatrix>,
@ -124,7 +124,7 @@ func @dense2(%arga: tensor<32x16xf32, #DenseMatrix>,
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32x16xf32>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16xf32>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
@ -168,7 +168,7 @@ func @dense3(%arga: tensor<32x16xf32>,
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}}>> to memref<?xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
--- a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
@ -21,7 +21,7 @@
 // CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_8:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_7]], %[[VAL_8]] : memref<32xf32> to memref<32xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@ -29,7 +29,7 @@
 // CHECK:             %[[VAL_11:.*]] = arith.addf %[[VAL_10]], %[[VAL_1]] : f32
 // CHECK:             memref.store %[[VAL_11]], %[[VAL_8]]{{\[}}%[[VAL_9]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_12:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xf32>
 // CHECK:           return %[[VAL_12]] : tensor<32xf32>
 // CHECK:         }
 func @add_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -58,7 +58,7 @@ func @add_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> te
 // CHECK:             %[[VAL_10:.*]] = arith.addf %[[VAL_9]], %[[VAL_1]] : f32
 // CHECK:             memref.store %[[VAL_10]], %[[VAL_7]]{{\[}}%[[VAL_8]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_11:.*]] = memref.tensor_load %[[VAL_7]] : memref<32xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_tensor %[[VAL_7]] : memref<32xf32>
 // CHECK:           return %[[VAL_11]] : tensor<32xf32>
 // CHECK:         }
 func @add_d_init(%arga: tensor<32xf32, #DV>, %argb: f32) -> tensor<32xf32> {
@ -81,7 +81,7 @@ func @add_d_init(%arga: tensor<32xf32, #DV>, %argb: f32) -> tensor<32xf32> {
 // CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_8:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_7]], %[[VAL_8]] : memref<32xf32> to memref<32xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@ -89,7 +89,7 @@ func @add_d_init(%arga: tensor<32xf32, #DV>, %argb: f32) -> tensor<32xf32> {
 // CHECK:             %[[VAL_11:.*]] = arith.mulf %[[VAL_10]], %[[VAL_1]] : f32
 // CHECK:             memref.store %[[VAL_11]], %[[VAL_8]]{{\[}}%[[VAL_9]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_12:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xf32>
 // CHECK:           return %[[VAL_12]] : tensor<32xf32>
 // CHECK:         }
 func @mul_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -114,7 +114,7 @@ func @mul_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> te
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32xf32> to memref<32xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -145,7 +145,7 @@ func @mul_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> te
 // CHECK:           scf.for %[[VAL_28:.*]] = %[[VAL_29:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             memref.store %[[VAL_1]], %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_30:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xf32>
+// CHECK:           %[[VAL_30:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xf32>
 // CHECK:           return %[[VAL_30]] : tensor<32xf32>
 // CHECK:         }
 func @add_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -167,7 +167,7 @@ func @add_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> te
 // CHECK:           %[[VAL_4:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_2]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK:           %[[VAL_8:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_7]], %[[VAL_8]] : memref<32xf32> to memref<32xf32>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@ -183,7 +183,7 @@ func @add_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> te
 // CHECK:             %[[VAL_19:.*]] = arith.addf %[[VAL_15]], %[[VAL_18]] : f32
 // CHECK:             memref.store %[[VAL_19]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_20:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xf32>
+// CHECK:           %[[VAL_20:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xf32>
 // CHECK:           return %[[VAL_20]] : tensor<32xf32>
 // CHECK:         }
 func @repeated_add_s(%arga: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -208,7 +208,7 @@ func @repeated_add_s(%arga: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tenso
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_9:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_8]], %[[VAL_9]] : memref<32xf32> to memref<32xf32>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@ -219,7 +219,7 @@ func @repeated_add_s(%arga: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tenso
 // CHECK:             %[[VAL_15:.*]] = arith.mulf %[[VAL_14]], %[[VAL_1]] : f32
 // CHECK:             memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_13]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_16:.*]] = memref.tensor_load %[[VAL_9]] : memref<32xf32>
+// CHECK:           %[[VAL_16:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32xf32>
 // CHECK:           return %[[VAL_16]] : tensor<32xf32>
 // CHECK:         }
 func @mul_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -251,8 +251,8 @@ func @mul_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> te
 // CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_9:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_8]], %[[VAL_9]] : memref<32xf32> to memref<32xf32>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@ -261,7 +261,7 @@ func @mul_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> te
 // CHECK:             %[[VAL_13:.*]] = arith.addf %[[VAL_11]], %[[VAL_12]] : f32
 // CHECK:             memref.store %[[VAL_13]], %[[VAL_9]]{{\[}}%[[VAL_10]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_14:.*]] = memref.tensor_load %[[VAL_9]] : memref<32xf32>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32xf32>
 // CHECK:           return %[[VAL_14]] : tensor<32xf32>
 // CHECK:         }
 func @add_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -283,8 +283,8 @@ func @add_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32
 // CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_9:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_8]], %[[VAL_9]] : memref<32xf32> to memref<32xf32>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@ -293,7 +293,7 @@ func @add_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32
 // CHECK:             %[[VAL_13:.*]] = arith.mulf %[[VAL_11]], %[[VAL_12]] : f32
 // CHECK:             memref.store %[[VAL_13]], %[[VAL_9]]{{\[}}%[[VAL_10]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_14:.*]] = memref.tensor_load %[[VAL_9]] : memref<32xf32>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32xf32>
 // CHECK:           return %[[VAL_14]] : tensor<32xf32>
 // CHECK:         }
 func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -315,11 +315,11 @@ func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32xf32>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32xf32>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_11]], %[[VAL_12]] : memref<32xf32> to memref<32xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -353,7 +353,7 @@ func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32
 // CHECK:             %[[VAL_33:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_31]]] : memref<32xf32>
 // CHECK:             memref.store %[[VAL_33]], %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_34:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xf32>
+// CHECK:           %[[VAL_34:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32xf32>
 // CHECK:           return %[[VAL_34]] : tensor<32xf32>
 // CHECK:         }
 func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -373,11 +373,11 @@ func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32
 // CHECK-SAME:                 %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = memref.buffer_cast %[[VAL_0]] : memref<32xf32>
+// CHECK:           %[[VAL_5:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32xf32>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_10:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_9]], %[[VAL_10]] : memref<32xf32> to memref<32xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@ -389,7 +389,7 @@ func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32
 // CHECK:             %[[VAL_17:.*]] = arith.mulf %[[VAL_15]], %[[VAL_16]] : f32
 // CHECK:             memref.store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_18:.*]] = memref.tensor_load %[[VAL_10]] : memref<32xf32>
+// CHECK:           %[[VAL_18:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32xf32>
 // CHECK:           return %[[VAL_18]] : tensor<32xf32>
 // CHECK:         }
 func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -414,8 +414,8 @@ func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_11]], %[[VAL_12]] : memref<32xf32> to memref<32xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -449,7 +449,7 @@ func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32
 // CHECK:             %[[VAL_33:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_31]]] : memref<32xf32>
 // CHECK:             memref.store %[[VAL_33]], %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_34:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xf32>
+// CHECK:           %[[VAL_34:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32xf32>
 // CHECK:           return %[[VAL_34]] : tensor<32xf32>
 // CHECK:         }
 func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -472,8 +472,8 @@ func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_10:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_9]], %[[VAL_10]] : memref<32xf32> to memref<32xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@ -485,7 +485,7 @@ func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32
 // CHECK:             %[[VAL_17:.*]] = arith.mulf %[[VAL_15]], %[[VAL_16]] : f32
 // CHECK:             memref.store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_18:.*]] = memref.tensor_load %[[VAL_10]] : memref<32xf32>
+// CHECK:           %[[VAL_18:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32xf32>
 // CHECK:           return %[[VAL_18]] : tensor<32xf32>
 // CHECK:         }
 func @mul_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -511,7 +511,7 @@ func @mul_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_11]], %[[VAL_12]] : memref<32xf32> to memref<32xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@ -569,7 +569,7 @@ func @mul_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32
 // CHECK:             %[[VAL_52:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_49]]] : memref<?xf32>
 // CHECK:             memref.store %[[VAL_52]], %[[VAL_12]]{{\[}}%[[VAL_51]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_53:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xf32>
+// CHECK:           %[[VAL_53:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32xf32>
 // CHECK:           return %[[VAL_53]] : tensor<32xf32>
 // CHECK:         }
 func @add_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -595,7 +595,7 @@ func @add_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tens
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_11]], %[[VAL_12]] : memref<32xf32> to memref<32xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@ -631,7 +631,7 @@ func @add_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tens
 // CHECK:             %[[VAL_40:.*]] = select %[[VAL_38]], %[[VAL_39]], %[[VAL_24]] : index
 // CHECK:             scf.yield %[[VAL_37]], %[[VAL_40]] : index, index
 // CHECK:           }
-// CHECK:           %[[VAL_41:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xf32>
+// CHECK:           %[[VAL_41:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32xf32>
 // CHECK:           return %[[VAL_41]] : tensor<32xf32>
 // CHECK:         }
 func @mul_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
@ -658,7 +658,7 @@ func @mul_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tens
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_3]] : memref<16xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_3]] : memref<16xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<16xf32>
 // CHECK:           memref.copy %[[VAL_12]], %[[VAL_13]] : memref<16xf32> to memref<16xf32>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -722,7 +722,7 @@ func @mul_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tens
 // CHECK:             %[[VAL_59:.*]] = arith.mulf %[[VAL_58]], %[[VAL_2]] : f32
 // CHECK:             memref.store %[[VAL_59]], %[[VAL_13]]{{\[}}%[[VAL_57]]] : memref<16xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_60:.*]] = memref.tensor_load %[[VAL_13]] : memref<16xf32>
+// CHECK:           %[[VAL_60:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<16xf32>
 // CHECK:           return %[[VAL_60]] : tensor<16xf32>
 // CHECK:         }
 func @two_way_inv(%arga: tensor<16xf32, #SV>, %argb: tensor<16xf32, #SV>, %argc: f32, %argx: tensor<16xf32>) -> tensor<16xf32> {
@ -752,7 +752,7 @@ func @two_way_inv(%arga: tensor<16xf32, #SV>, %argb: tensor<16xf32, #SV>, %argc:
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_3]] : memref<16xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_3]] : memref<16xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<16xf32>
 // CHECK:           memref.copy %[[VAL_12]], %[[VAL_13]] : memref<16xf32> to memref<16xf32>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -815,7 +815,7 @@ func @two_way_inv(%arga: tensor<16xf32, #SV>, %argb: tensor<16xf32, #SV>, %argc:
 // CHECK:             %[[VAL_58:.*]] = arith.mulf %[[VAL_57]], %[[VAL_2]] : f32
 // CHECK:             memref.store %[[VAL_58]], %[[VAL_13]]{{\[}}%[[VAL_56]]] : memref<16xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_59:.*]] = memref.tensor_load %[[VAL_13]] : memref<16xf32>
+// CHECK:           %[[VAL_59:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<16xf32>
 // CHECK:           return %[[VAL_59]] : tensor<16xf32>
 // CHECK:         }
 func @two_way_inv_alt(%arga: tensor<16xf32, #SV>,
@ -848,7 +848,7 @@ func @two_way_inv_alt(%arga: tensor<16xf32, #SV>,
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_4:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor<?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_6:.*]] = memref.buffer_cast %[[VAL_1]] : memref<f32>
+// CHECK:           %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK:           %[[VAL_7:.*]] = memref.alloc() : memref<f32>
 // CHECK:           memref.copy %[[VAL_6]], %[[VAL_7]] : memref<f32> to memref<f32>
 // CHECK-DAG:       %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@ -860,7 +860,7 @@ func @two_way_inv_alt(%arga: tensor<16xf32, #SV>,
 // CHECK:             scf.yield %[[VAL_15]] : f32
 // CHECK:           }
 // CHECK:           memref.store %[[VAL_11]], %[[VAL_7]][] : memref<f32>
-// CHECK:           %[[VAL_17:.*]] = memref.tensor_load %[[VAL_7]] : memref<f32>
+// CHECK:           %[[VAL_17:.*]] = bufferization.to_tensor %[[VAL_7]] : memref<f32>
 // CHECK:           return %[[VAL_17]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction(%arga: tensor<?xf32, #SV>, %argx: tensor<f32>) -> tensor<f32> {
@ -896,7 +896,7 @@ func @sum_reduction(%arga: tensor<?xf32, #SV>, %argx: tensor<f32>) -> tensor<f32
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_3]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_3]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<f32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<f32>
 // CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<f32>
 // CHECK:           memref.copy %[[VAL_11]], %[[VAL_12]] : memref<f32> to memref<f32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_12]][] : memref<f32>
@ -962,7 +962,7 @@ func @sum_reduction(%arga: tensor<?xf32, #SV>, %argx: tensor<f32>) -> tensor<f32
 // CHECK:             scf.yield %[[VAL_69]] : f32
 // CHECK:           }
 // CHECK:           memref.store %[[VAL_70:.*]], %[[VAL_12]][] : memref<f32>
-// CHECK:           %[[VAL_71:.*]] = memref.tensor_load %[[VAL_12]] : memref<f32>
+// CHECK:           %[[VAL_71:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<f32>
 // CHECK:           return %[[VAL_71]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction_ss(%arga: tensor<16xf32, #SV>,
@ -1002,11 +1002,11 @@ func @sum_reduction_ss(%arga: tensor<16xf32, #SV>,
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<f32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.pointers %[[VAL_2]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.indices %[[VAL_2]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_3]] : memref<f32>
+// CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<f32>
 // CHECK:           %[[VAL_14:.*]] = memref.alloc() : memref<f32>
 // CHECK:           memref.copy %[[VAL_13]], %[[VAL_14]] : memref<f32> to memref<f32>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_14]][] : memref<f32>
@ -1076,7 +1076,7 @@ func @sum_reduction_ss(%arga: tensor<16xf32, #SV>,
 // CHECK:             scf.yield %[[VAL_75]] : f32
 // CHECK:           }
 // CHECK:           memref.store %[[VAL_76:.*]], %[[VAL_14]][] : memref<f32>
-// CHECK:           %[[VAL_77:.*]] = memref.tensor_load %[[VAL_14]] : memref<f32>
+// CHECK:           %[[VAL_77:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<f32>
 // CHECK:           return %[[VAL_77]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction_inv(%arga: tensor<16xf32, #SV>,
@ -1118,16 +1118,16 @@ func @sum_reduction_inv(%arga: tensor<16xf32, #SV>,
 // CHECK-DAG:           %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:           %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_0]] : memref<?xf64>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?xf64>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_5]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_5]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<?xf64>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?xf64>
 // CHECK:           %[[VAL_13:.*]] = sparse_tensor.pointers %[[VAL_3]], %[[VAL_5]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = sparse_tensor.indices %[[VAL_3]], %[[VAL_5]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
 // CHECK:           %[[VAL_16:.*]] = tensor.dim %[[VAL_4]], %[[VAL_5]] : tensor<?xf64>
-// CHECK:           %[[VAL_17:.*]] = memref.buffer_cast %[[VAL_4]] : memref<?xf64>
+// CHECK:           %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_4]] : memref<?xf64>
 // CHECK:           %[[VAL_18:.*]] = memref.alloc(%[[VAL_16]]) : memref<?xf64>
 // CHECK:           memref.copy %[[VAL_17]], %[[VAL_18]] : memref<?xf64> to memref<?xf64>
 // CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@ -1257,7 +1257,7 @@ func @sum_reduction_inv(%arga: tensor<16xf32, #SV>,
 // CHECK:             %[[VAL_114:.*]] = arith.addf %[[VAL_112]], %[[VAL_113]] : f64
 // CHECK:             memref.store %[[VAL_114]], %[[VAL_18]]{{\[}}%[[VAL_110]]] : memref<?xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_115:.*]] = memref.tensor_load %[[VAL_18]] : memref<?xf64>
+// CHECK:           %[[VAL_115:.*]] = bufferization.to_tensor %[[VAL_18]] : memref<?xf64>
 // CHECK:           return %[[VAL_115]] : tensor<?xf64>
 // CHECK:         }
 func @four_tensors_op(%arga: tensor<?xf64>,
@ -1304,7 +1304,7 @@ func @four_tensors_op(%arga: tensor<?xf64>,
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.pointers %[[VAL_2]], %[[VAL_4]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_2]], %[[VAL_4]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
-// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_3]] : memref<f64>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_3]] : memref<f64>
 // CHECK:           %[[VAL_16:.*]] = memref.alloc() : memref<f64>
 // CHECK:           memref.copy %[[VAL_15]], %[[VAL_16]] : memref<f64> to memref<f64>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_16]][] : memref<f64>
@ -1574,7 +1574,7 @@ func @four_tensors_op(%arga: tensor<?xf64>,
 // CHECK:             scf.yield %[[VAL_250]] : f64
 // CHECK:           }
 // CHECK:           memref.store %[[VAL_251:.*]], %[[VAL_16]][] : memref<f64>
-// CHECK:           %[[VAL_252:.*]] = memref.tensor_load %[[VAL_16]] : memref<f64>
+// CHECK:           %[[VAL_252:.*]] = bufferization.to_tensor %[[VAL_16]] : memref<f64>
 // CHECK:           return %[[VAL_252]] : tensor<f64>
 // CHECK:         }
 func @red3s(%arga: tensor<?xf64, #SV>,
--- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
@ -25,8 +25,8 @@
 // CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_10:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_9]], %[[VAL_10]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
@ -39,7 +39,7 @@
 // CHECK:               memref.store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_18:.*]] = memref.tensor_load %[[VAL_10]] : memref<32x16xf32>
+// CHECK:           %[[VAL_18:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_18]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -62,8 +62,8 @@ func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_10:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_9]], %[[VAL_10]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
@ -76,7 +76,7 @@ func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:               memref.store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_18:.*]] = memref.tensor_load %[[VAL_10]] : memref<32x16xf32>
+// CHECK:           %[[VAL_18:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_18]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -102,8 +102,8 @@ func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_7]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_7]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_12]], %[[VAL_13]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_7]] {
@ -140,7 +140,7 @@ func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:               memref.store %[[VAL_36]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_34]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_37:.*]] = memref.tensor_load %[[VAL_13]] : memref<32x16xf32>
+// CHECK:           %[[VAL_37:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_37]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -164,8 +164,8 @@ func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@ -180,7 +180,7 @@ func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:               memref.store %[[VAL_20]], %[[VAL_11]]{{\[}}%[[VAL_12]], %[[VAL_17]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_21:.*]] = memref.tensor_load %[[VAL_11]] : memref<32x16xf32>
+// CHECK:           %[[VAL_21:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_21]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -206,8 +206,8 @@ func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_6]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_6]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_12]], %[[VAL_13]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
@ -249,7 +249,7 @@ func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:               memref.store %[[VAL_39]], %[[VAL_13]]{{\[}}%[[VAL_36]], %[[VAL_38]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_40:.*]] = memref.tensor_load %[[VAL_13]] : memref<32x16xf32>
+// CHECK:           %[[VAL_40:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_40]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -273,8 +273,8 @@ func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -290,7 +290,7 @@ func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:               memref.store %[[VAL_21]], %[[VAL_11]]{{\[}}%[[VAL_15]], %[[VAL_16]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_22:.*]] = memref.tensor_load %[[VAL_11]] : memref<32x16xf32>
+// CHECK:           %[[VAL_22:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_22]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -318,8 +318,8 @@ func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_7]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_7]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_14:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_15:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_14]], %[[VAL_15]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
@ -385,7 +385,7 @@ func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:               memref.store %[[VAL_57]], %[[VAL_15]]{{\[}}%[[VAL_54]], %[[VAL_56]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_58:.*]] = memref.tensor_load %[[VAL_15]] : memref<32x16xf32>
+// CHECK:           %[[VAL_58:.*]] = bufferization.to_tensor %[[VAL_15]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_58]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -410,8 +410,8 @@ func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_11]], %[[VAL_12]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@ -429,7 +429,7 @@ func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:               memref.store %[[VAL_24]], %[[VAL_12]]{{\[}}%[[VAL_16]], %[[VAL_21]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_25:.*]] = memref.tensor_load %[[VAL_12]] : memref<32x16xf32>
+// CHECK:           %[[VAL_25:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_25]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -459,7 +459,7 @@ func @mul_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_16:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_15]], %[[VAL_16]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@ -594,7 +594,7 @@ func @mul_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: te
 // CHECK:               memref.store %[[VAL_115]], %[[VAL_16]]{{\[}}%[[VAL_109]], %[[VAL_114]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_116:.*]] = memref.tensor_load %[[VAL_16]] : memref<32x16xf32>
+// CHECK:           %[[VAL_116:.*]] = bufferization.to_tensor %[[VAL_16]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_116]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -624,7 +624,7 @@ func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>,
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_16:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_15]], %[[VAL_16]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@ -691,7 +691,7 @@ func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>,
 // CHECK:             %[[VAL_71:.*]] = select %[[VAL_69]], %[[VAL_70]], %[[VAL_28]] : index
 // CHECK:             scf.yield %[[VAL_68]], %[[VAL_71]] : index, index
 // CHECK:           }
-// CHECK:           %[[VAL_72:.*]] = memref.tensor_load %[[VAL_16]] : memref<32x16xf32>
+// CHECK:           %[[VAL_72:.*]] = bufferization.to_tensor %[[VAL_16]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_72]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -720,7 +720,7 @@ func @mul_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>,
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_7]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_7]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_14:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_15:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_14]], %[[VAL_15]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@ -798,7 +798,7 @@ func @mul_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>,
 // CHECK:               memref.store %[[VAL_69]], %[[VAL_15]]{{\[}}%[[VAL_62]], %[[VAL_68]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_70:.*]] = memref.tensor_load %[[VAL_15]] : memref<32x16xf32>
+// CHECK:           %[[VAL_70:.*]] = bufferization.to_tensor %[[VAL_15]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_70]] : tensor<32x16xf32>
 // CHECK:         }
 func @add_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -825,7 +825,7 @@ func @add_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>,
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_5]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_5]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<32x16xf32>
 // CHECK:           memref.copy %[[VAL_12]], %[[VAL_13]] : memref<32x16xf32> to memref<32x16xf32>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -845,7 +845,7 @@ func @add_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>,
 // CHECK:               memref.store %[[VAL_27]], %[[VAL_13]]{{\[}}%[[VAL_17]], %[[VAL_22]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_28:.*]] = memref.tensor_load %[[VAL_13]] : memref<32x16xf32>
+// CHECK:           %[[VAL_28:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_28]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
@ -879,8 +879,8 @@ func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>,
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<16xf32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<16xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<16xf32>
 // CHECK:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<16xf32> to memref<16xf32>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@ -898,7 +898,7 @@ func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>,
 // CHECK:             }
 // CHECK:             memref.store %[[VAL_17]], %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<16xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_26:.*]] = memref.tensor_load %[[VAL_11]] : memref<16xf32>
+// CHECK:           %[[VAL_26:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<16xf32>
 // CHECK:           return %[[VAL_26]] : tensor<16xf32>
 // CHECK:         }
 func @matvec(%argA: tensor<16x32xf32, #Tds>, %argb: tensor<32xf32>, %argx: tensor<16xf32>) -> tensor<16xf32> {
@ -930,7 +930,7 @@ func @matvec(%argA: tensor<16x32xf32, #Tds>, %argb: tensor<32xf32>, %argx: tenso
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<10x20xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<f32>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK:           %[[VAL_8:.*]] = memref.alloc() : memref<f32>
 // CHECK:           memref.copy %[[VAL_7]], %[[VAL_8]] : memref<f32> to memref<f32>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<f32>
@ -946,7 +946,7 @@ func @matvec(%argA: tensor<16x32xf32, #Tds>, %argb: tensor<32xf32>, %argx: tenso
 // CHECK:             scf.yield %[[VAL_16]] : f32
 // CHECK:           }
 // CHECK:           memref.store %[[VAL_10]], %[[VAL_8]][] : memref<f32>
-// CHECK:           %[[VAL_23:.*]] = memref.tensor_load %[[VAL_8]] : memref<f32>
+// CHECK:           %[[VAL_23:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<f32>
 // CHECK:           return %[[VAL_23]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor<f32>) -> tensor<f32> {
@ -980,7 +980,7 @@ func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor<f32>) -> tenso
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
 // CHECK:           %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf64>
 // CHECK:           %[[VAL_9:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf64>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<?x?xf64>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?xf64>
 // CHECK:           %[[VAL_11:.*]] = memref.alloc(%[[VAL_8]], %[[VAL_9]]) : memref<?x?xf64>
 // CHECK:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<?x?xf64> to memref<?x?xf64>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] {
@ -994,7 +994,7 @@ func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor<f32>) -> tenso
 // CHECK:               memref.store %[[VAL_19]], %[[VAL_11]]{{\[}}%[[VAL_12]], %[[VAL_17]]] : memref<?x?xf64>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_20:.*]] = memref.tensor_load %[[VAL_11]] : memref<?x?xf64>
+// CHECK:           %[[VAL_20:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<?x?xf64>
 // CHECK:           return %[[VAL_20]] : tensor<?x?xf64>
 // CHECK:         }
 func @scale(%arga: tensor<?x?xf64, #Tds>, %argx: tensor<?x?xf64>) -> tensor<?x?xf64> {
@ -1032,12 +1032,12 @@ func @scale(%arga: tensor<?x?xf64, #Tds>, %argx: tensor<?x?xf64>) -> tensor<?x?x
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<?x?xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?xf32>
 // CHECK:           %[[VAL_12:.*]] = tensor.dim %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<?x?xf32>
+// CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?x?xf32>
 // CHECK:           %[[VAL_14:.*]] = tensor.dim %[[VAL_3]], %[[VAL_4]] : tensor<?x?xf32>
 // CHECK:           %[[VAL_15:.*]] = tensor.dim %[[VAL_3]], %[[VAL_5]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_16:.*]] = memref.buffer_cast %[[VAL_3]] : memref<?x?xf32>
+// CHECK:           %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_3]] : memref<?x?xf32>
 // CHECK:           %[[VAL_17:.*]] = memref.alloc(%[[VAL_14]], %[[VAL_15]]) : memref<?x?xf32>
 // CHECK:           memref.copy %[[VAL_16]], %[[VAL_17]] : memref<?x?xf32> to memref<?x?xf32>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -1062,7 +1062,7 @@ func @scale(%arga: tensor<?x?xf64, #Tds>, %argx: tensor<?x?xf64>) -> tensor<?x?x
 // CHECK:               memref.store %[[VAL_29]], %[[VAL_17]]{{\[}}%[[VAL_21]], %[[VAL_26]]] : memref<?x?xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_38:.*]] = memref.tensor_load %[[VAL_17]] : memref<?x?xf32>
+// CHECK:           %[[VAL_38:.*]] = bufferization.to_tensor %[[VAL_17]] : memref<?x?xf32>
 // CHECK:           return %[[VAL_38]] : tensor<?x?xf32>
 // CHECK:         }
 func @sampled_dense_dense(%args: tensor<?x?xf32, #Tss>,
@ -1115,10 +1115,10 @@ func @sampled_dense_dense(%args: tensor<?x?xf32, #Tss>,
 // CHECK:           %[[VAL_17:.*]] = sparse_tensor.pointers %[[VAL_2]], %[[VAL_7]] : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = sparse_tensor.indices %[[VAL_2]], %[[VAL_7]] : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_20:.*]] = memref.buffer_cast %[[VAL_3]] : memref<?xf32>
-// CHECK:           %[[VAL_21:.*]] = memref.buffer_cast %[[VAL_4]] : memref<f32>
+// CHECK:           %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_3]] : memref<?xf32>
+// CHECK:           %[[VAL_21:.*]] = bufferization.to_memref %[[VAL_4]] : memref<f32>
 // CHECK:           %[[VAL_22:.*]] = tensor.dim %[[VAL_5]], %[[VAL_6]] : tensor<?xf32>
-// CHECK:           %[[VAL_23:.*]] = memref.buffer_cast %[[VAL_5]] : memref<?xf32>
+// CHECK:           %[[VAL_23:.*]] = bufferization.to_memref %[[VAL_5]] : memref<?xf32>
 // CHECK:           %[[VAL_24:.*]] = memref.alloc(%[[VAL_22]]) : memref<?xf32>
 // CHECK:           memref.copy %[[VAL_23]], %[[VAL_24]] : memref<?xf32> to memref<?xf32>
 // CHECK:           %[[VAL_25:.*]] = memref.load %[[VAL_21]][] : memref<f32>
@ -1282,7 +1282,7 @@ func @sampled_dense_dense(%args: tensor<?x?xf32, #Tss>,
 // CHECK:             }
 // CHECK:             memref.store %[[VAL_173:.*]], %[[VAL_24]]{{\[}}%[[VAL_162]]] : memref<?xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_174:.*]] = memref.tensor_load %[[VAL_24]] : memref<?xf32>
+// CHECK:           %[[VAL_174:.*]] = bufferization.to_tensor %[[VAL_24]] : memref<?xf32>
 // CHECK:           return %[[VAL_174]] : tensor<?xf32>
 // CHECK:         }
 func @sum_kernel_with_inv(%arga: tensor<?x?xf32, #Tss>,
--- a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
@ -32,8 +32,8 @@
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
@ -50,7 +50,7 @@
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_22:.*]] = memref.tensor_load %[[VAL_11]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_22:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_22]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -74,8 +74,8 @@ func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
@ -92,7 +92,7 @@ func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_22:.*]] = memref.tensor_load %[[VAL_11]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_22:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_22]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -120,8 +120,8 @@ func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_14:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_15:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_14]], %[[VAL_15]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_9]] {
@ -162,7 +162,7 @@ func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_42:.*]] = memref.tensor_load %[[VAL_15]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_42:.*]] = bufferization.to_tensor %[[VAL_15]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_42]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -188,8 +188,8 @@ func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_12]], %[[VAL_13]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
@ -208,7 +208,7 @@ func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_26:.*]] = memref.tensor_load %[[VAL_13]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_26:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_26]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -235,8 +235,8 @@ func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_8]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_8]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_14:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_13]], %[[VAL_14]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_8]] {
@ -281,7 +281,7 @@ func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_43:.*]] = memref.tensor_load %[[VAL_14]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_43:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_43]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -306,8 +306,8 @@ func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_6]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_6]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_11]], %[[VAL_12]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
@ -326,7 +326,7 @@ func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_25:.*]] = memref.tensor_load %[[VAL_12]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_25:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_25]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -356,8 +356,8 @@ func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_16:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_17:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_16]], %[[VAL_17]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_18:.*]] = %[[VAL_8]] to %[[VAL_4]] step %[[VAL_9]] {
@ -426,7 +426,7 @@ func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_62:.*]] = memref.tensor_load %[[VAL_17]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_62:.*]] = bufferization.to_tensor %[[VAL_17]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_62]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -453,8 +453,8 @@ func @add_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_14:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_13]], %[[VAL_14]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
@ -475,7 +475,7 @@ func @add_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_29:.*]] = memref.tensor_load %[[VAL_14]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_29:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_29]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -502,8 +502,8 @@ func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_7]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_7]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_14:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_13]], %[[VAL_14]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
@ -553,7 +553,7 @@ func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_46:.*]] = memref.tensor_load %[[VAL_14]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_46:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_46]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -578,8 +578,8 @@ func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_11]], %[[VAL_12]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@ -599,7 +599,7 @@ func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_26:.*]] = memref.tensor_load %[[VAL_12]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_26:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_26]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -629,8 +629,8 @@ func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_16:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_17:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_16]], %[[VAL_17]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<?xindex>
@ -704,7 +704,7 @@ func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_65:.*]] = memref.tensor_load %[[VAL_17]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_65:.*]] = bufferization.to_tensor %[[VAL_17]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_65]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -731,8 +731,8 @@ func @add_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_14:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_13]], %[[VAL_14]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@ -754,7 +754,7 @@ func @add_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_30:.*]] = memref.tensor_load %[[VAL_14]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_30:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_30]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -783,8 +783,8 @@ func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_8]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_8]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_14:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_16:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_15]], %[[VAL_16]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
@ -862,7 +862,7 @@ func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_66:.*]] = memref.tensor_load %[[VAL_16]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_66:.*]] = bufferization.to_tensor %[[VAL_16]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_66]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -888,8 +888,8 @@ func @add_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_12]], %[[VAL_13]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -911,7 +911,7 @@ func @add_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_29:.*]] = memref.tensor_load %[[VAL_13]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_29:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_29]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -943,8 +943,8 @@ func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_14:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_17:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_18:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_19:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_18]], %[[VAL_19]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<?xindex>
@ -1046,7 +1046,7 @@ func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_85:.*]] = memref.tensor_load %[[VAL_19]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_85:.*]] = bufferization.to_tensor %[[VAL_19]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_85]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @add_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -1074,8 +1074,8 @@ func @add_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_14:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           %[[VAL_15:.*]] = memref.alloc() : memref<32x16x8xf32>
 // CHECK:           memref.copy %[[VAL_14]], %[[VAL_15]] : memref<32x16x8xf32> to memref<32x16x8xf32>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -1099,7 +1099,7 @@ func @add_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_33:.*]] = memref.tensor_load %[[VAL_15]] : memref<32x16x8xf32>
+// CHECK:           %[[VAL_33:.*]] = bufferization.to_tensor %[[VAL_15]] : memref<32x16x8xf32>
 // CHECK:           return %[[VAL_33]] : tensor<32x16x8xf32>
 // CHECK:         }
 func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
@ -1136,11 +1136,11 @@ func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
 // CHECK:           %[[VAL_10:.*]] = tensor.dim %[[VAL_2]], %[[VAL_5]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<?x?xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_3]] : memref<?x?xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?x?xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_3]] : memref<?x?xf32>
 // CHECK:           %[[VAL_13:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?xf32>
 // CHECK:           %[[VAL_14:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?xf32>
-// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_0]] : memref<?x?xf32>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?x?xf32>
 // CHECK:           %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]], %[[VAL_14]]) : memref<?x?xf32>
 // CHECK:           memref.copy %[[VAL_15]], %[[VAL_16]] : memref<?x?xf32> to memref<?x?xf32>
 // CHECK:           scf.for %[[VAL_17:.*]] = %[[VAL_5]] to %[[VAL_13]] step %[[VAL_6]] {
@ -1165,7 +1165,7 @@ func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %ar
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_34:.*]] = memref.tensor_load %[[VAL_16]] : memref<?x?xf32>
+// CHECK:           %[[VAL_34:.*]] = bufferization.to_tensor %[[VAL_16]] : memref<?x?xf32>
 // CHECK:           return %[[VAL_34]] : tensor<?x?xf32>
 // CHECK:         }
 func @kernel_3d(%arga: tensor<?x?xf32>,
@ -1203,7 +1203,7 @@ func @kernel_3d(%arga: tensor<?x?xf32>,
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}>>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<f32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK:           %[[VAL_10:.*]] = memref.alloc() : memref<f32>
 // CHECK:           memref.copy %[[VAL_9]], %[[VAL_10]] : memref<f32> to memref<f32>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_10]][] : memref<f32>
@ -1227,7 +1227,7 @@ func @kernel_3d(%arga: tensor<?x?xf32>,
 // CHECK:             scf.yield %[[VAL_20]] : f32
 // CHECK:           }
 // CHECK:           memref.store %[[VAL_14]], %[[VAL_10]][] : memref<f32>
-// CHECK:           %[[VAL_34:.*]] = memref.tensor_load %[[VAL_10]] : memref<f32>
+// CHECK:           %[[VAL_34:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<f32>
 // CHECK:           return %[[VAL_34]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor<f32>) -> tensor<f32> {
@ -1260,10 +1260,10 @@ func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor<f32>) -> t
 // CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32>
 // CHECK:           %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_0]] : memref<?x?x?xf32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?x?x?xf32>
 // CHECK:           %[[VAL_9:.*]] = tensor.dim %[[VAL_1]], %[[VAL_5]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<f32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<f32>
 // CHECK:           %[[VAL_12:.*]] = memref.alloc() : memref<f32>
 // CHECK:           memref.copy %[[VAL_11]], %[[VAL_12]] : memref<f32> to memref<f32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_12]][] : memref<f32>
@ -1281,7 +1281,7 @@ func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor<f32>) -> t
 // CHECK:             scf.yield %[[VAL_18]] : f32
 // CHECK:           }
 // CHECK:           memref.store %[[VAL_14]], %[[VAL_12]][] : memref<f32>
-// CHECK:           %[[VAL_30:.*]] = memref.tensor_load %[[VAL_12]] : memref<f32>
+// CHECK:           %[[VAL_30:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<f32>
 // CHECK:           return %[[VAL_30]] : tensor<f32>
 // CHECK:         }
 func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
@ -1320,9 +1320,9 @@ func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
 // CHECK-DAG:           %[[VAL_7:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_8:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<20xf32>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<30xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_3]] : memref<10x20x30xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<30xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_3]] : memref<10x20x30xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<10x20x30xf32>
 // CHECK:           memref.copy %[[VAL_12]], %[[VAL_13]] : memref<10x20x30xf32> to memref<10x20x30xf32>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
@ -1337,7 +1337,7 @@ func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_22:.*]] = memref.tensor_load %[[VAL_13]] : memref<10x20x30xf32>
+// CHECK:           %[[VAL_22:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<10x20x30xf32>
 // CHECK:           return %[[VAL_22]] : tensor<10x20x30xf32>
 // CHECK:         }
 func @invariants(%arga: tensor<10xf32, #Td>,
--- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
@ -24,8 +24,8 @@
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<4xf32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<4xf32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32xf32>
 // CHECK:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32xf32> to memref<32xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<4xf32>
@ -39,7 +39,7 @@
 // CHECK:             %[[VAL_20:.*]] = arith.addf %[[VAL_17]], %[[VAL_19]] : f32
 // CHECK:             memref.store %[[VAL_20]], %[[VAL_11]]{{\[}}%[[VAL_16]]] : memref<32xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_21:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xf32>
+// CHECK:           %[[VAL_21:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xf32>
 // CHECK:           return %[[VAL_21]] : tensor<32xf32>
 // CHECK:         }
 func @mul_inv_dense1d(%arga: tensor<32xf32, #SpVec>,
@ -76,8 +76,8 @@ func @mul_inv_dense1d(%arga: tensor<32xf32, #SpVec>,
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<34xi32>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xi32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34xi32>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi32>
 // CHECK:           %[[VAL_11:.*]] = memref.alloc() : memref<32xi32>
 // CHECK:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32xi32> to memref<32xi32>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@ -90,7 +90,7 @@ func @mul_inv_dense1d(%arga: tensor<32xf32, #SpVec>,
 // CHECK:             %[[VAL_19:.*]] = arith.andi %[[VAL_16]], %[[VAL_18]] : i32
 // CHECK:             memref.store %[[VAL_19]], %[[VAL_11]]{{\[}}%[[VAL_15]]] : memref<32xi32>
 // CHECK:           }
-// CHECK:           %[[VAL_20:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xi32>
+// CHECK:           %[[VAL_20:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xi32>
 // CHECK:           return %[[VAL_20]] : tensor<32xi32>
 // CHECK:         }
 func @and_affine_dense1d(%arga: tensor<32xi32, #SpVec>,
@ -128,8 +128,8 @@ func @and_affine_dense1d(%arga: tensor<32xi32, #SpVec>,
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<34x19xf64>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32x16xf64>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34x19xf64>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf64>
 // CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<32x16xf64>
 // CHECK:           memref.copy %[[VAL_12]], %[[VAL_13]] : memref<32x16xf64> to memref<32x16xf64>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_3]] {
@ -148,7 +148,7 @@ func @and_affine_dense1d(%arga: tensor<32xi32, #SpVec>,
 // CHECK:               memref.store %[[VAL_26]], %[[VAL_13]]{{\[}}%[[VAL_14]], %[[VAL_19]]] : memref<32x16xf64>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_27:.*]] = memref.tensor_load %[[VAL_13]] : memref<32x16xf64>
+// CHECK:           %[[VAL_27:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<32x16xf64>
 // CHECK:           return %[[VAL_27]] : tensor<32x16xf64>
 // CHECK:         }
 func @mul_affine_dense2d(%arga: tensor<32x16xf64, #CSR>,
--- a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
@ -39,7 +39,7 @@
 // CHECK:           %[[VAL_4:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_2]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] {
@ -48,7 +48,7 @@
 // CHECK:             %[[VAL_13:.*]] = math.abs %[[VAL_12]] : f64
 // CHECK:             memref.store %[[VAL_13]], %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<32xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_14:.*]] = memref.tensor_load %[[VAL_7]] : memref<32xf64>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_tensor %[[VAL_7]] : memref<32xf64>
 // CHECK:           return %[[VAL_14]] : tensor<32xf64>
 func @abs(%arga: tensor<32xf64, #SV>,
          %argx: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> {
@ -70,7 +70,7 @@ func @abs(%arga: tensor<32xf64, #SV>,
 // CHECK:           %[[VAL_4:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_2]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] {
@ -79,7 +79,7 @@ func @abs(%arga: tensor<32xf64, #SV>,
 // CHECK:             %[[VAL_13:.*]] = math.ceil %[[VAL_12]] : f64
 // CHECK:             memref.store %[[VAL_13]], %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<32xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_14:.*]] = memref.tensor_load %[[VAL_7]] : memref<32xf64>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_tensor %[[VAL_7]] : memref<32xf64>
 // CHECK:           return %[[VAL_14]] : tensor<32xf64>
 // CHECK:         }
 func @ceil(%arga: tensor<32xf64, #SV>,
@ -102,7 +102,7 @@ func @ceil(%arga: tensor<32xf64, #SV>,
 // CHECK:           %[[VAL_4:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_2]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] {
@ -111,7 +111,7 @@ func @ceil(%arga: tensor<32xf64, #SV>,
 // CHECK:             %[[VAL_13:.*]] = math.floor %[[VAL_12]] : f64
 // CHECK:             memref.store %[[VAL_13]], %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<32xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_14:.*]] = memref.tensor_load %[[VAL_7]] : memref<32xf64>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_tensor %[[VAL_7]] : memref<32xf64>
 // CHECK:           return %[[VAL_14]] : tensor<32xf64>
 // CHECK:         }
 func @floor(%arga: tensor<32xf64, #SV>,
@ -134,7 +134,7 @@ func @floor(%arga: tensor<32xf64, #SV>,
 // CHECK:           %[[VAL_4:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_2]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] {
@ -143,7 +143,7 @@ func @floor(%arga: tensor<32xf64, #SV>,
 // CHECK:             %[[VAL_13:.*]] = arith.negf %[[VAL_12]] : f64
 // CHECK:             memref.store %[[VAL_13]], %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<32xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_14:.*]] = memref.tensor_load %[[VAL_7]] : memref<32xf64>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_tensor %[[VAL_7]] : memref<32xf64>
 // CHECK:           return %[[VAL_14]] : tensor<32xf64>
 // CHECK:         }
 func @neg(%arga: tensor<32xf64, #SV>,
@ -169,8 +169,8 @@ func @neg(%arga: tensor<32xf64, #SV>,
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
@ -202,7 +202,7 @@ func @neg(%arga: tensor<32xf64, #SV>,
 // CHECK:             %[[VAL_32:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_30]]] : memref<32xf64>
 // CHECK:             memref.store %[[VAL_32]], %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<32xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_33:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xf64>
+// CHECK:           %[[VAL_33:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xf64>
 // CHECK:           return %[[VAL_33]] : tensor<32xf64>
 // CHECK:         }
 func @add(%arga: tensor<32xf64, #SV>,
@ -229,8 +229,8 @@ func @add(%arga: tensor<32xf64, #SV>,
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
@ -264,7 +264,7 @@ func @add(%arga: tensor<32xf64, #SV>,
 // CHECK:             %[[VAL_34:.*]] = arith.negf %[[VAL_33]] : f64
 // CHECK:             memref.store %[[VAL_34]], %[[VAL_11]]{{\[}}%[[VAL_31]]] : memref<32xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_35:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xf64>
+// CHECK:           %[[VAL_35:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xf64>
 // CHECK:           return %[[VAL_35]] : tensor<32xf64>
 // CHECK:         }
 func @sub(%arga: tensor<32xf64, #SV>,
@ -289,8 +289,8 @@ func @sub(%arga: tensor<32xf64, #SV>,
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
@ -300,7 +300,7 @@ func @sub(%arga: tensor<32xf64, #SV>,
 // CHECK:             %[[VAL_16:.*]] = arith.mulf %[[VAL_14]], %[[VAL_15]] : f64
 // CHECK:             memref.store %[[VAL_16]], %[[VAL_9]]{{\[}}%[[VAL_13]]] : memref<32xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_17:.*]] = memref.tensor_load %[[VAL_9]] : memref<32xf64>
+// CHECK:           %[[VAL_17:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32xf64>
 // CHECK:           return %[[VAL_17]] : tensor<32xf64>
 // CHECK:         }
 func @mul(%arga: tensor<32xf64, #SV>,
@ -325,7 +325,7 @@ func @mul(%arga: tensor<32xf64, #SV>,
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xf64>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
@ -334,7 +334,7 @@ func @mul(%arga: tensor<32xf64, #SV>,
 // CHECK:             %[[VAL_14:.*]] = arith.divf %[[VAL_13]], %[[VAL_2]] : f64
 // CHECK:             memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_15:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xf64>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xf64>
 // CHECK:           return %[[VAL_15]] : tensor<32xf64>
 // CHECK:         }
 func @divbyc(%arga: tensor<32xf64, #SV>,
--- a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
@ -33,8 +33,8 @@
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xi64>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
@ -66,7 +66,7 @@
 // CHECK:             %[[VAL_32:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_30]]] : memref<32xi64>
 // CHECK:             memref.store %[[VAL_32]], %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<32xi64>
 // CHECK:           }
-// CHECK:           %[[VAL_33:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xi64>
+// CHECK:           %[[VAL_33:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xi64>
 // CHECK:           return %[[VAL_33]] : tensor<32xi64>
 // CHECK:         }
 func @add(%arga: tensor<32xi64, #SV>,
@ -94,8 +94,8 @@ func @add(%arga: tensor<32xi64, #SV>,
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xi64>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
@ -129,7 +129,7 @@ func @add(%arga: tensor<32xi64, #SV>,
 // CHECK:             %[[VAL_35:.*]] = arith.subi %[[VAL_7]], %[[VAL_34]] : i64
 // CHECK:             memref.store %[[VAL_35]], %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<32xi64>
 // CHECK:           }
-// CHECK:           %[[VAL_36:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xi64>
+// CHECK:           %[[VAL_36:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32xi64>
 // CHECK:           return %[[VAL_36]] : tensor<32xi64>
 // CHECK:         }
 func @sub(%arga: tensor<32xi64, #SV>,
@ -154,8 +154,8 @@ func @sub(%arga: tensor<32xi64, #SV>,
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xi64>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
@ -165,7 +165,7 @@ func @sub(%arga: tensor<32xi64, #SV>,
 // CHECK:             %[[VAL_16:.*]] = arith.muli %[[VAL_14]], %[[VAL_15]] : i64
 // CHECK:             memref.store %[[VAL_16]], %[[VAL_9]]{{\[}}%[[VAL_13]]] : memref<32xi64>
 // CHECK:           }
-// CHECK:           %[[VAL_17:.*]] = memref.tensor_load %[[VAL_9]] : memref<32xi64>
+// CHECK:           %[[VAL_17:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32xi64>
 // CHECK:           return %[[VAL_17]] : tensor<32xi64>
 // CHECK:         }
 func @mul(%arga: tensor<32xi64, #SV>,
@ -190,7 +190,7 @@ func @mul(%arga: tensor<32xi64, #SV>,
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
@ -199,7 +199,7 @@ func @mul(%arga: tensor<32xi64, #SV>,
 // CHECK:             %[[VAL_14:.*]] = arith.divsi %[[VAL_13]], %[[VAL_2]] : i64
 // CHECK:             memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xi64>
 // CHECK:           }
-// CHECK:           %[[VAL_15:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xi64>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xi64>
 // CHECK:           return %[[VAL_15]] : tensor<32xi64>
 // CHECK:         }
 func @divsbyc(%arga: tensor<32xi64, #SV>,
@ -224,7 +224,7 @@ func @divsbyc(%arga: tensor<32xi64, #SV>,
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{.*}}}>>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
@ -233,7 +233,7 @@ func @divsbyc(%arga: tensor<32xi64, #SV>,
 // CHECK:             %[[VAL_14:.*]] = arith.divui %[[VAL_13]], %[[VAL_2]] : i64
 // CHECK:             memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xi64>
 // CHECK:           }
-// CHECK:           %[[VAL_15:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xi64>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xi64>
 // CHECK:           return %[[VAL_15]] : tensor<32xi64>
 // CHECK:         }
 func @divubyc(%arga: tensor<32xi64, #SV>,
@ -258,8 +258,8 @@ func @divubyc(%arga: tensor<32xi64, #SV>,
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xi64>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
@ -269,7 +269,7 @@ func @divubyc(%arga: tensor<32xi64, #SV>,
 // CHECK:             %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i64
 // CHECK:             memref.store %[[VAL_16]], %[[VAL_9]]{{\[}}%[[VAL_13]]] : memref<32xi64>
 // CHECK:           }
-// CHECK:           %[[VAL_17:.*]] = memref.tensor_load %[[VAL_9]] : memref<32xi64>
+// CHECK:           %[[VAL_17:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32xi64>
 // CHECK:           return %[[VAL_17]] : tensor<32xi64>
 // CHECK:         }
 func @and(%arga: tensor<32xi64, #SV>,
@ -296,8 +296,8 @@ func @and(%arga: tensor<32xi64, #SV>,
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xi64>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
@ -329,7 +329,7 @@ func @and(%arga: tensor<32xi64, #SV>,
 // CHECK:             %[[VAL_32:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_30]]] : memref<32xi64>
 // CHECK:             memref.store %[[VAL_32]], %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<32xi64>
 // CHECK:           }
-// CHECK:           %[[VAL_33:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xi64>
+// CHECK:           %[[VAL_33:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xi64>
 // CHECK:           return %[[VAL_33]] : tensor<32xi64>
 // CHECK:         }
 func @or(%arga: tensor<32xi64, #SV>,
@ -356,8 +356,8 @@ func @or(%arga: tensor<32xi64, #SV>,
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
-// CHECK:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xi64>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
@ -389,7 +389,7 @@ func @or(%arga: tensor<32xi64, #SV>,
 // CHECK:             %[[VAL_32:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_30]]] : memref<32xi64>
 // CHECK:             memref.store %[[VAL_32]], %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<32xi64>
 // CHECK:           }
-// CHECK:           %[[VAL_33:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xi64>
+// CHECK:           %[[VAL_33:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xi64>
 // CHECK:           return %[[VAL_33]] : tensor<32xi64>
 // CHECK:         }
 func @xor(%arga: tensor<32xi64, #SV>,
@ -414,7 +414,7 @@ func @xor(%arga: tensor<32xi64, #SV>,
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
@ -423,7 +423,7 @@ func @xor(%arga: tensor<32xi64, #SV>,
 // CHECK:             %[[VAL_14:.*]] = arith.shrsi %[[VAL_13]], %[[VAL_2]] : i64
 // CHECK:             memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xi64>
 // CHECK:           }
-// CHECK:           %[[VAL_15:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xi64>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xi64>
 // CHECK:           return %[[VAL_15]] : tensor<32xi64>
 // CHECK:         }
 func @ashrbyc(%arga: tensor<32xi64, #SV>,
@ -448,7 +448,7 @@ func @ashrbyc(%arga: tensor<32xi64, #SV>,
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
@ -457,7 +457,7 @@ func @ashrbyc(%arga: tensor<32xi64, #SV>,
 // CHECK:             %[[VAL_14:.*]] = arith.shrui %[[VAL_13]], %[[VAL_2]] : i64
 // CHECK:             memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xi64>
 // CHECK:           }
-// CHECK:           %[[VAL_15:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xi64>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xi64>
 // CHECK:           return %[[VAL_15]] : tensor<32xi64>
 // CHECK:         }
 func @lsrbyc(%arga: tensor<32xi64, #SV>,
@ -482,7 +482,7 @@ func @lsrbyc(%arga: tensor<32xi64, #SV>,
 // CHECK:           %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<32xi64>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
@ -491,7 +491,7 @@ func @lsrbyc(%arga: tensor<32xi64, #SV>,
 // CHECK:             %[[VAL_14:.*]] = arith.shli %[[VAL_13]], %[[VAL_2]] : i64
 // CHECK:             memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xi64>
 // CHECK:           }
-// CHECK:           %[[VAL_15:.*]] = memref.tensor_load %[[VAL_8]] : memref<32xi64>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xi64>
 // CHECK:           return %[[VAL_15]] : tensor<32xi64>
 // CHECK:         }
 func @lslbyc(%arga: tensor<32xi64, #SV>,
--- a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
@ -17,8 +17,8 @@
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<20x30xf32>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<10x30xf32>
+// CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30xf32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x30xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<10x30xf32>
 // CHECK:           memref.copy %[[VAL_12]], %[[VAL_13]] : memref<10x30xf32> to memref<10x30xf32>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@ -40,7 +40,7 @@
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_29:.*]] = memref.tensor_load %[[VAL_13]] : memref<10x30xf32>
+// CHECK:           %[[VAL_29:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<10x30xf32>
 // CHECK:           return %[[VAL_29]] : tensor<10x30xf32>
 // CHECK:         }
 func @matmul(%a: tensor<10x20xf32, #DCSR>,
@ -59,13 +59,13 @@ func @matmul(%a: tensor<10x20xf32, #DCSR>,
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 6 : index
-// CHECK:           %[[VAL_6:.*]] = memref.buffer_cast %[[VAL_0]] : memref<8x8xi32>
+// CHECK:           %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<8x8xi32>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_3]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_3]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<6x6xi32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<6x6xi32>
 // CHECK:           %[[VAL_13:.*]] = memref.alloc() : memref<6x6xi32>
 // CHECK:           memref.copy %[[VAL_12]], %[[VAL_13]] : memref<6x6xi32> to memref<6x6xi32>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@ -91,7 +91,7 @@ func @matmul(%a: tensor<10x20xf32, #DCSR>,
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_32:.*]] = memref.tensor_load %[[VAL_13]] : memref<6x6xi32>
+// CHECK:           %[[VAL_32:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<6x6xi32>
 // CHECK:           return %[[VAL_32]] : tensor<6x6xi32>
 // CHECK:         }
 func @conv2d(%input:  tensor<8x8xi32>,
@ -111,13 +111,13 @@ func @conv2d(%input:  tensor<8x8xi32>,
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 5 : index
-// CHECK:           %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<5x3xi8>
+// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<5x3xi8>
 // CHECK:           %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_10:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_5]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_5]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<5x6xi64>
+// CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<5x6xi64>
 // CHECK:           %[[VAL_14:.*]] = memref.alloc() : memref<5x6xi64>
 // CHECK:           memref.copy %[[VAL_13]], %[[VAL_14]] : memref<5x6xi64> to memref<5x6xi64>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@ -142,7 +142,7 @@ func @conv2d(%input:  tensor<8x8xi32>,
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_33:.*]] = memref.tensor_load %[[VAL_14]] : memref<5x6xi64>
+// CHECK:           %[[VAL_33:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<5x6xi64>
 // CHECK:           return %[[VAL_33]] : tensor<5x6xi64>
 // CHECK:         }
 func @quantized_matmul(%input1: tensor<5x3xi8>,
--- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
@ -30,8 +30,8 @@
 // CHECK-HIR:           %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-HIR:           %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-HIR:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<64xf64>
-// CHECK-HIR:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK-HIR:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
+// CHECK-HIR:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK-HIR:           %[[VAL_11:.*]] = memref.alloc() : memref<32xf64>
 // CHECK-HIR:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32xf64> to memref<32xf64>
 // CHECK-HIR:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@ -49,7 +49,7 @@
 // CHECK-HIR:             }
 // CHECK-HIR:             memref.store %[[VAL_17]], %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<32xf64>
 // CHECK-HIR:           }
-// CHECK-HIR:           %[[VAL_26:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xf64>
+// CHECK-HIR:           %[[VAL_26:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xf64>
 // CHECK-HIR:           return %[[VAL_26]] : tensor<32xf64>
 // CHECK-HIR:         }

@ -63,8 +63,8 @@
 // CHECK-MIR:           %[[VAL_6:.*]] = call @sparsePointers(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK-MIR:           %[[VAL_7:.*]] = call @sparseIndices(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK-MIR:           %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr<i8>) -> memref<?xf64>
-// CHECK-MIR:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<64xf64>
-// CHECK-MIR:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK-MIR:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
+// CHECK-MIR:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK-MIR:           %[[VAL_11:.*]] = memref.alloc() : memref<32xf64>
 // CHECK-MIR:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32xf64> to memref<32xf64>
 // CHECK-MIR:           scf.for %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@ -82,7 +82,7 @@
 // CHECK-MIR:             }
 // CHECK-MIR:             memref.store %[[VAL_19]], %[[VAL_11]]{{\[}}%[[VAL_14]]] : memref<32xf64>
 // CHECK-MIR:           }
-// CHECK-MIR:           %[[VAL_28:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xf64>
+// CHECK-MIR:           %[[VAL_28:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xf64>
 // CHECK-MIR:           return %[[VAL_28]] : tensor<32xf64>
 // CHECK-MIR:         }

--- a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
@ -33,8 +33,8 @@
 // CHECK-HIR:           %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], dimOrdering = affine_map<(d0, d1) -> (d1, d0)>, pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK-HIR:           %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], dimOrdering = affine_map<(d0, d1) -> (d1, d0)>, pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK-HIR:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], dimOrdering = affine_map<(d0, d1) -> (d1, d0)>, pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
-// CHECK-HIR:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<64xf64>
-// CHECK-HIR:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK-HIR:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
+// CHECK-HIR:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK-HIR:           %[[VAL_11:.*]] = memref.alloc() : memref<32xf64>
 // CHECK-HIR:           memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32xf64> to memref<32xf64>
 // CHECK-HIR:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@ -51,7 +51,7 @@
 // CHECK-HIR:               memref.store %[[VAL_22]], %[[VAL_11]]{{\[}}%[[VAL_18]]] : memref<32xf64>
 // CHECK-HIR:             }
 // CHECK-HIR:           }
-// CHECK-HIR:           %[[VAL_23:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xf64>
+// CHECK-HIR:           %[[VAL_23:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xf64>
 // CHECK-HIR:           return %[[VAL_23]] : tensor<32xf64>
 // CHECK-HIR:         }

@ -65,8 +65,8 @@
 // CHECK-MIR:           %[[VAL_7:.*]] = call @sparsePointers(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK-MIR:           %[[VAL_8:.*]] = call @sparseIndices(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK-MIR:           %[[VAL_9:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr<i8>) -> memref<?xf64>
-// CHECK-MIR:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_1]] : memref<64xf64>
-// CHECK-MIR:           %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK-MIR:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
+// CHECK-MIR:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK-MIR:           %[[VAL_12:.*]] = memref.alloc() : memref<32xf64>
 // CHECK-MIR:           memref.copy %[[VAL_11]], %[[VAL_12]] : memref<32xf64> to memref<32xf64>
 // CHECK-MIR:           scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
@ -83,7 +83,7 @@
 // CHECK-MIR:               memref.store %[[VAL_25]], %[[VAL_12]]{{\[}}%[[VAL_21]]] : memref<32xf64>
 // CHECK-MIR:             }
 // CHECK-MIR:           }
-// CHECK-MIR:           %[[VAL_26:.*]] = memref.tensor_load %[[VAL_12]] : memref<32xf64>
+// CHECK-MIR:           %[[VAL_26:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32xf64>
 // CHECK-MIR:           return %[[VAL_26]] : tensor<32xf64>
 // CHECK-MIR:         }

--- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
@ -30,8 +30,8 @@
 // CHECK-HIR:           %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-HIR:           %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-HIR:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<64xf64>
-// CHECK-HIR:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK-HIR:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
+// CHECK-HIR:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK-HIR:           scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK-HIR-DAG:         %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xindex>
 // CHECK-HIR-DAG:         %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_5]] : index
@ -47,7 +47,7 @@
 // CHECK-HIR:             }
 // CHECK-HIR:             memref.store %[[VAL_16]], %[[VAL_10]]{{\[}}%[[VAL_11]]] : memref<32xf64>
 // CHECK-HIR:           }
-// CHECK-HIR:           %[[VAL_25:.*]] = memref.tensor_load %[[VAL_10]] : memref<32xf64>
+// CHECK-HIR:           %[[VAL_25:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32xf64>
 // CHECK-HIR:           return %[[VAL_25]] : tensor<32xf64>
 // CHECK-HIR:         }

@ -61,8 +61,8 @@
 // CHECK-MIR:           %[[VAL_6:.*]] = call @sparsePointers(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK-MIR:           %[[VAL_7:.*]] = call @sparseIndices(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK-MIR:           %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr<i8>) -> memref<?xf64>
-// CHECK-MIR:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<64xf64>
-// CHECK-MIR:           %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64>
+// CHECK-MIR:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
+// CHECK-MIR:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK-MIR:           scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK-MIR-DAG:         %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xindex>
 // CHECK-MIR-DAG:         %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_5]] : index
@ -78,7 +78,7 @@
 // CHECK-MIR:             }
 // CHECK-MIR:             memref.store %[[VAL_16]], %[[VAL_10]]{{\[}}%[[VAL_11]]] : memref<32xf64>
 // CHECK-MIR:           }
-// CHECK-MIR:           %[[VAL_25:.*]] = memref.tensor_load %[[VAL_10]] : memref<32xf64>
+// CHECK-MIR:           %[[VAL_25:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32xf64>
 // CHECK-MIR:           return %[[VAL_25]] : tensor<32xf64>
 // CHECK-MIR:         }

--- a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
@ -34,13 +34,13 @@
 // CHECK:           %[[VAL_10:.*]] = arith.constant 80 : index
 // CHECK:           %[[VAL_11:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_12:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_0]] : memref<10x20x30x40x50x60x70x80xf32>
+// CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_0]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           %[[VAL_14:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_3]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_3]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_17:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
-// CHECK:           %[[VAL_19:.*]] = memref.buffer_cast %[[VAL_2]] : memref<10x20x30x40x50x60x70x80xf32>
+// CHECK:           %[[VAL_19:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           %[[VAL_20:.*]] = memref.alloc() : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           memref.copy %[[VAL_19]], %[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32> to memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           scf.for %[[VAL_21:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_12]] {
@ -81,7 +81,7 @@
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_50:.*]] = memref.tensor_load %[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32>
+// CHECK:           %[[VAL_50:.*]] = bufferization.to_tensor %[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           return %[[VAL_50]] : tensor<10x20x30x40x50x60x70x80xf32>
 // CHECK:         }
 func @mul(%arga: tensor<10x20x30x40x50x60x70x80xf32>,
--- a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
@ -23,7 +23,7 @@
 // CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_1]] : memref<20x30x10xf32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30x10xf32>
 // CHECK:           %[[VAL_9:.*]] = memref.alloc() : memref<20x30x10xf32>
 // CHECK:           memref.copy %[[VAL_8]], %[[VAL_9]] : memref<20x30x10xf32> to memref<20x30x10xf32>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
@ -38,7 +38,7 @@
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_18:.*]] = memref.tensor_load %[[VAL_9]] : memref<20x30x10xf32>
+// CHECK:           %[[VAL_18:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<20x30x10xf32>
 // CHECK:           return %[[VAL_18]] : tensor<20x30x10xf32>
 // CHECK:         }
 func @sparse_static_dims(%arga: tensor<10x20x30xf32, #X>,
@ -62,7 +62,7 @@ func @sparse_static_dims(%arga: tensor<10x20x30xf32, #X>,
 // CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?x?xf32>
 // CHECK:           %[[VAL_7:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?x?xf32>
 // CHECK:           %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_2]] : tensor<?x?x?xf32>
-// CHECK:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<?x?x?xf32>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?x?xf32>
 // CHECK:           %[[VAL_10:.*]] = memref.alloc(%[[VAL_6]], %[[VAL_7]], %[[VAL_8]]) : memref<?x?x?xf32>
 // CHECK:           memref.copy %[[VAL_9]], %[[VAL_10]] : memref<?x?x?xf32> to memref<?x?x?xf32>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_4]] {
@ -77,7 +77,7 @@ func @sparse_static_dims(%arga: tensor<10x20x30xf32, #X>,
 // CHECK:               }
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_19:.*]] = memref.tensor_load %[[VAL_10]] : memref<?x?x?xf32>
+// CHECK:           %[[VAL_19:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<?x?x?xf32>
 // CHECK:           return %[[VAL_19]] : tensor<?x?x?xf32>
 // CHECK:         }
 func @sparse_dynamic_dims(%arga: tensor<?x?x?xf32, #X>,
--- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
@ -26,7 +26,7 @@
 // CHECK-HIR:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-HIR:           %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-HIR:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<f32>
+// CHECK-HIR:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK-HIR:           %[[VAL_10:.*]] = memref.alloc() : memref<f32>
 // CHECK-HIR:           memref.copy %[[VAL_9]], %[[VAL_10]] : memref<f32> to memref<f32>
 // CHECK-HIR:           %[[VAL_11:.*]] = memref.load %[[VAL_10]][] : memref<f32>
@ -46,7 +46,7 @@
 // CHECK-HIR:             scf.yield %[[VAL_15]] : f32
 // CHECK-HIR:           }
 // CHECK-HIR:           memref.store %[[VAL_12]], %[[VAL_10]][] : memref<f32>
-// CHECK-HIR:           %[[VAL_30:.*]] = memref.tensor_load %[[VAL_10]] : memref<f32>
+// CHECK-HIR:           %[[VAL_30:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<f32>
 // CHECK-HIR:           return %[[VAL_30]] : tensor<f32>
 // CHECK-HIR:         }
 //
@ -60,7 +60,7 @@
 // CHECK-MIR:           %[[VAL_6:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_3]]) : (!llvm.ptr<i8>, index) -> index
 // CHECK-MIR:           %[[VAL_7:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_2]]) : (!llvm.ptr<i8>, index) -> index
 // CHECK-MIR:           %[[VAL_8:.*]] = call @sparseValuesF32(%[[VAL_0]]) : (!llvm.ptr<i8>) -> memref<?xf32>
-// CHECK-MIR:           %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<f32>
+// CHECK-MIR:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK-MIR:           %[[VAL_10:.*]] = memref.alloc() : memref<f32>
 // CHECK-MIR:           memref.copy %[[VAL_9]], %[[VAL_10]] : memref<f32> to memref<f32>
 // CHECK-MIR:           %[[VAL_11:.*]] = memref.load %[[VAL_10]][] : memref<f32>
@ -80,7 +80,7 @@
 // CHECK-MIR:             scf.yield %[[VAL_15]] : f32
 // CHECK-MIR:           }
 // CHECK-MIR:           memref.store %[[VAL_12]], %[[VAL_10]][] : memref<f32>
-// CHECK-MIR:           %[[VAL_30:.*]] = memref.tensor_load %[[VAL_10]] : memref<f32>
+// CHECK-MIR:           %[[VAL_30:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<f32>
 // CHECK-MIR:           return %[[VAL_30]] : tensor<f32>
 // CHECK-MIR:         }
 func @sparse_dynamic_dims(%arga: tensor<?x?x?xf32, #X>,
--- a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
@ -33,8 +33,8 @@
 // CHECK:           %[[VAL_11:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_7]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_7]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
-// CHECK:           %[[VAL_14:.*]] = memref.buffer_cast %[[VAL_1]] : memref<f32>
-// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_4]] : memref<32x16xf32>
+// CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_4]] : memref<32x16xf32>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_14]][] : memref<f32>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
@ -56,7 +56,7 @@
 // CHECK:               memref.store %[[VAL_33]], %[[VAL_15]]{{\[}}%[[VAL_20]], %[[VAL_25]]] : memref<32x16xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_34:.*]] = memref.tensor_load %[[VAL_15]] : memref<32x16xf32>
+// CHECK:           %[[VAL_34:.*]] = bufferization.to_tensor %[[VAL_15]] : memref<32x16xf32>
 // CHECK:           return %[[VAL_34]] : tensor<32x16xf32>
 // CHECK:         }
 func @mul(%arga: tensor<32x16xf32, #SparseMatrix>,
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir
@ -32,7 +32,7 @@
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.pointers %[[VAL_2]], %[[VAL_8]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_2]], %[[VAL_8]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK:           %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_0]] : memref<f64>
+// CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f64>
 // CHECK:           %[[VAL_16:.*]] = tensor.extract %[[VAL_0]][] : tensor<f64>
 // CHECK:           %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_8]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f64) {
 // CHECK:             %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref<?xindex>
@ -109,7 +109,7 @@
 // CHECK:             scf.yield %[[VAL_84]] : f64
 // CHECK:           }
 // CHECK:           memref.store %[[VAL_86:.*]], %[[VAL_15]][] : memref<f64>
-// CHECK:           %[[VAL_87:.*]] = memref.tensor_load %[[VAL_15]] : memref<f64>
+// CHECK:           %[[VAL_87:.*]] = bufferization.to_tensor %[[VAL_15]] : memref<f64>
 // CHECK:           return %[[VAL_87]] : tensor<f64>
 // CHECK:         }
 func @sparse_matrix_sum(%argx: tensor<f64> {linalg.inplaceable = true},
--- a/mlir/test/Dialect/Standard/bufferize.mlir
+++ b/mlir/test/Dialect/Standard/bufferize.mlir
@ -4,10 +4,10 @@
 // CHECK-SAME:                 %[[PRED:.*]]: i1,
 // CHECK-SAME:                 %[[TRUE_VAL:.*]]: tensor<f32>,
 // CHECK-SAME:                 %[[FALSE_VAL:.*]]: tensor<f32>) -> tensor<f32> {
-// CHECK-DAG:           %[[TRUE_VAL_MEMREF:.*]] = memref.buffer_cast %[[TRUE_VAL]] : memref<f32>
-// CHECK-DAG:           %[[FALSE_VAL_MEMREF:.*]] = memref.buffer_cast %[[FALSE_VAL]] : memref<f32>
+// CHECK-DAG:           %[[TRUE_VAL_MEMREF:.*]] = bufferization.to_memref %[[TRUE_VAL]] : memref<f32>
+// CHECK-DAG:           %[[FALSE_VAL_MEMREF:.*]] = bufferization.to_memref %[[FALSE_VAL]] : memref<f32>
 // CHECK:           %[[RET_MEMREF:.*]] = select %[[PRED]], %[[TRUE_VAL_MEMREF]], %[[FALSE_VAL_MEMREF]] : memref<f32>
-// CHECK:           %[[RET:.*]] = memref.tensor_load %[[RET_MEMREF]] : memref<f32>
+// CHECK:           %[[RET:.*]] = bufferization.to_tensor %[[RET_MEMREF]] : memref<f32>
 // CHECK:           return %[[RET]] : tensor<f32>
 func @select(%arg0: i1, %arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> {
  %0 = select %arg0, %arg1, %arg2 : tensor<f32>
--- a/mlir/test/Dialect/Standard/func-bufferize.mlir
+++ b/mlir/test/Dialect/Standard/func-bufferize.mlir
@ -39,7 +39,7 @@ func @call_sink(%arg0: tensor<f32>) {

 // CHECK-LABEL:   func @unconverted_op_in_body() -> memref<f32> {
 // CHECK:           %[[TENSOR:.*]] = "test.source"() : () -> tensor<f32>
-// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<f32>
+// CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<f32>
 // CHECK:           return %[[MEMREF]] : memref<f32>
 func @unconverted_op_in_body() -> tensor<f32> {
  %0 = "test.source"() : () -> tensor<f32>
--- a/mlir/test/Dialect/Standard/tensor-constant-bufferize.mlir
+++ b/mlir/test/Dialect/Standard/tensor-constant-bufferize.mlir
@ -15,7 +15,7 @@
 // CHECK: @basic
 func @basic() -> tensor<3x4xf32> {
  // CHECK: %[[MEMREF:.*]] = memref.get_global @__constant_3x4xf32 : memref<3x4xf32>
-  // CHECK: %[[TENSOR:.*]] = memref.tensor_load %[[MEMREF]]
+  // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[MEMREF]]
  %0 = arith.constant dense<7.0> : tensor<3x4xf32>
  // CHECK: return %[[TENSOR]]
  return %0 : tensor<3x4xf32>
--- a/mlir/test/Dialect/Tensor/bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/bufferize.mlir
@ -3,7 +3,7 @@
 // CHECK-LABEL:   func @dim(
 // CHECK-SAME:              %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:              %[[INDEX:.*]]: index) -> index {
-// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<f32>
+// CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<f32>
 // CHECK:           %[[EXTENT:.*]] = memref.dim %[[MEMREF]], %[[INDEX]] : memref<f32>
 // CHECK:           return %[[EXTENT]] : index
 func @dim(%arg0: tensor<f32>, %arg1: index) -> index {
@ -13,9 +13,9 @@ func @dim(%arg0: tensor<f32>, %arg1: index) -> index {

 // CHECK-LABEL:   func @tensor.cast(
 // CHECK-SAME:                      %[[TENSOR:.*]]: tensor<?xindex>) -> tensor<2xindex> {
-// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]]
+// CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]]
 // CHECK:           %[[CASTED:.*]] = memref.cast %[[MEMREF]] : memref<?xindex> to memref<2xindex>
-// CHECK:           %[[RET:.*]] = memref.tensor_load %[[CASTED]]
+// CHECK:           %[[RET:.*]] = bufferization.to_tensor %[[CASTED]]
 // CHECK:           return %[[RET]] : tensor<2xindex>
 func @tensor.cast(%arg0: tensor<?xindex>) -> tensor<2xindex> {
  %0 = tensor.cast %arg0 : tensor<?xindex> to tensor<2xindex>
@ -24,9 +24,9 @@ func @tensor.cast(%arg0: tensor<?xindex>) -> tensor<2xindex> {

 // CHECK-LABEL:   func @tensor.cast_from_unranked(
 // CHECK-SAME:                                    %[[TENSOR:.*]]: tensor<*xf32>) -> tensor<2xf32> {
-// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<*xf32>
+// CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<*xf32>
 // CHECK:           %[[CASTED_MEMREF:.*]] = memref.cast %[[MEMREF]] : memref<*xf32> to memref<2xf32>
-// CHECK:           %[[RET:.*]] = memref.tensor_load %[[CASTED_MEMREF]] : memref<2xf32>
+// CHECK:           %[[RET:.*]] = bufferization.to_tensor %[[CASTED_MEMREF]] : memref<2xf32>
 // CHECK:           return %[[RET]] : tensor<2xf32>
 func @tensor.cast_from_unranked(%arg0: tensor<*xf32>) -> tensor<2xf32> {
  %0 = tensor.cast %arg0 : tensor<*xf32> to tensor<2xf32>
@ -35,9 +35,9 @@ func @tensor.cast_from_unranked(%arg0: tensor<*xf32>) -> tensor<2xf32> {

 // CHECK-LABEL:   func @tensor.cast_to_unranked(
 // CHECK-SAME:                                  %[[TENSOR:.*]]: tensor<2xf32>) -> tensor<*xf32> {
-// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<2xf32>
+// CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<2xf32>
 // CHECK:           %[[CASTED_MEMREF:.*]] = memref.cast %[[MEMREF]] : memref<2xf32> to memref<*xf32>
-// CHECK:           %[[RET:.*]] = memref.tensor_load %[[CASTED_MEMREF]] : memref<*xf32>
+// CHECK:           %[[RET:.*]] = bufferization.to_tensor %[[CASTED_MEMREF]] : memref<*xf32>
 // CHECK:           return %[[RET]] : tensor<*xf32>
 func @tensor.cast_to_unranked(%arg0: tensor<2xf32>) -> tensor<*xf32> {
  %0 = tensor.cast %arg0 : tensor<2xf32> to tensor<*xf32>
@ -47,7 +47,7 @@ func @tensor.cast_to_unranked(%arg0: tensor<2xf32>) -> tensor<*xf32> {
 // CHECK-LABEL:   func @tensor.extract(
 // CHECK-SAME:                  %[[TENSOR:.*]]: tensor<?xf32>,
 // CHECK-SAME:                  %[[IDX:.*]]: index) -> f32 {
-// CHECK:           %[[MEMREF:.*]] = memref.buffer_cast %[[TENSOR]] : memref<?xf32>
+// CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<?xf32>
 // CHECK:           %[[RET:.*]] = memref.load %[[MEMREF]][%[[IDX]]] : memref<?xf32>
 // CHECK:           return %[[RET]] : f32
 // CHECK:         }
@ -64,7 +64,7 @@ func @tensor.extract(%arg0: tensor<?xf32>, %arg1: index) -> f32 {
 // CHECK:           store %[[ELEM0]], %[[MEMREF]][%[[C0]]]
 // CHECK:           %[[C1:.*]] = arith.constant 1 : index
 // CHECK:           store %[[ELEM1]], %[[MEMREF]][%[[C1]]]
-// CHECK:           %[[RET:.*]] = memref.tensor_load %[[MEMREF]]
+// CHECK:           %[[RET:.*]] = bufferization.to_tensor %[[MEMREF]]
 // CHECK:           return %[[RET]] : tensor<2xindex>
 func @tensor.from_elements(%arg0: index, %arg1: index) -> tensor<2xindex> {
  %0 = tensor.from_elements %arg0, %arg1 : tensor<2xindex>
@ -74,7 +74,7 @@ func @tensor.from_elements(%arg0: index, %arg1: index) -> tensor<2xindex> {
 // CHECK-LABEL:   func @tensor.generate(
 // CHECK-SAME:                                       %[[ARG:.*]]: tensor<*xf32>,
 // CHECK-SAME:                                       %[[DYNAMIC_EXTENT:.*]]: index) -> tensor<?xindex> {
-// CHECK:           %[[CASTED:.*]] = memref.buffer_cast %[[ARG]] : memref<*xf32>
+// CHECK:           %[[CASTED:.*]] = bufferization.to_memref %[[ARG]] : memref<*xf32>
 // CHECK:           %[[MEMREF:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) : memref<?xindex>
 // CHECK:           %[[C0:.*]] = arith.constant 0 : index
 // CHECK:           %[[C1:.*]] = arith.constant 1 : index
@ -83,7 +83,7 @@ func @tensor.from_elements(%arg0: index, %arg1: index) -> tensor<2xindex> {
 // CHECK:             store %[[ELEM]], %[[MEMREF]][%[[I]]] : memref<?xindex>
 // CHECK:             scf.yield
 // CHECK:           }
-// CHECK:           %[[RET:.*]] = memref.tensor_load %[[MEMREF]] : memref<?xindex>
+// CHECK:           %[[RET:.*]] = bufferization.to_tensor %[[MEMREF]] : memref<?xindex>
 // CHECK:           return %[[RET]] : tensor<?xindex>
 // CHECK:         }
 func @tensor.generate(%arg: tensor<*xf32>, %dynamic_extent: index) -> tensor<?xindex> {
@ -109,7 +109,7 @@ func @tensor.generate(%arg: tensor<*xf32>, %dynamic_extent: index) -> tensor<?xi
 // CHECK:             store %[[VAL_7]], %[[MEMREF]][%[[I]], %[[J]]] : memref<16x?xindex>
 // CHECK:             scf.yield
 // CHECK:           }
-// CHECK:           %[[RET:.*]] = memref.tensor_load %[[MEMREF]] : memref<16x?xindex>
+// CHECK:           %[[RET:.*]] = bufferization.to_tensor %[[MEMREF]] : memref<16x?xindex>
 // CHECK:           return %[[RET]] : tensor<16x?xindex>
 // CHECK:         }
 func @tensor.generate_static_and_dynamic(%arg0: index) -> tensor<16x?xindex> {
--- a/mlir/test/IR/core-ops.mlir
+++ b/mlir/test/IR/core-ops.mlir
@ -440,18 +440,18 @@ func @test_splat_op(%s : f32) {
 }

 // CHECK-LABEL: func @tensor_load_store
-func @tensor_load_store(%0 : memref<4x4xi32>) {
-  // CHECK: %[[TENSOR:.*]] = memref.tensor_load %[[MEMREF:.*]] : memref<4x4xi32>
-  %1 = memref.tensor_load %0 : memref<4x4xi32>
+func @tensor_load_store(%0 : memref<4x4xi32>, %1 : tensor<4x4xi32>) {
+  // CHECK-SAME: (%[[MEMREF:.*]]: memref<4x4xi32>,
+  // CHECK-SAME:  %[[TENSOR:.*]]: tensor<4x4xi32>)
  // CHECK: memref.tensor_store %[[TENSOR]], %[[MEMREF]] : memref<4x4xi32>
  memref.tensor_store %1, %0 : memref<4x4xi32>
  return
 }

 // CHECK-LABEL: func @unranked_tensor_load_store
-func @unranked_tensor_load_store(%0 : memref<*xi32>) {
-  // CHECK: %[[TENSOR:.*]] = memref.tensor_load %[[MEMREF:.*]] : memref<*xi32>
-  %1 = memref.tensor_load %0 : memref<*xi32>
+func @unranked_tensor_load_store(%0 : memref<*xi32>, %1 : tensor<*xi32>) {
+  // CHECK-SAME: (%[[MEMREF:.*]]: memref<*xi32>,
+  // CHECK-SAME:  %[[TENSOR:.*]]: tensor<*xi32>)
  // CHECK: memref.tensor_store %[[TENSOR]], %[[MEMREF]] : memref<*xi32>
  memref.tensor_store %1, %0 : memref<*xi32>
  return
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
@ -195,7 +195,7 @@ module {
    // CHECK: ( -4, -3, -2, -1, 0, 1, 2, 3, 4, 305 )
    //
    %c0 = call @sparse_cast_s32_to_f32(%1) : (tensor<10xi32, #SV>) -> tensor<10xf32>
-    %m0 = memref.buffer_cast %c0 : memref<10xf32>
+    %m0 = bufferization.to_memref %c0 : memref<10xf32>
    %v0 = vector.transfer_read %m0[%z], %f: memref<10xf32>, vector<10xf32>
    vector.print %v0 : vector<10xf32>

@ -203,7 +203,7 @@ module {
    // CHECK: ( 4.29497e+09, 4.29497e+09, 4.29497e+09, 4.29497e+09, 0, 1, 2, 3, 4, 305 )
    //
    %c1 = call @sparse_cast_u32_to_f32(%1) : (tensor<10xi32, #SV>) -> tensor<10xf32>
-    %m1 = memref.buffer_cast %c1 : memref<10xf32>
+    %m1 = bufferization.to_memref %c1 : memref<10xf32>
    %v1 = vector.transfer_read %m1[%z], %f: memref<10xf32>, vector<10xf32>
    vector.print %v1 : vector<10xf32>

@ -211,7 +211,7 @@ module {
    // CHECK: ( -4, -3, -2, -1, 0, 1, 2, 3, 4, 305 )
    //
    %c2 = call @sparse_cast_f32_to_s32(%3) : (tensor<10xf32, #SV>) -> tensor<10xi32>
-    %m2 = memref.buffer_cast %c2 : memref<10xi32>
+    %m2 = bufferization.to_memref %c2 : memref<10xi32>
    %v2 = vector.transfer_read %m2[%z], %i: memref<10xi32>, vector<10xi32>
    vector.print %v2 : vector<10xi32>

@ -219,7 +219,7 @@ module {
    // CHECK: ( 4294967295, 4294967294, 4294967293, 4294967292, 0, 1, 2, 3, 4, 305 )
    //
    %c3 = call @sparse_cast_f64_to_u32(%7) : (tensor<10xf64, #SV>) -> tensor<10xi32>
-    %m3 = memref.buffer_cast %c3 : memref<10xi32>
+    %m3 = bufferization.to_memref %c3 : memref<10xi32>
    %v3 = vector.transfer_read %m3[%z], %i: memref<10xi32>, vector<10xi32>
    %vu = vector.bitcast %v3 : vector<10xi32> to vector<10xui32>
    vector.print %vu : vector<10xui32>
@ -228,7 +228,7 @@ module {
    // CHECK: ( -4.4, -3.3, -2.2, -1.1, 0, 1.1, 2.2, 3.3, 4.4, 305.5 )
    //
    %c4 = call @sparse_cast_f32_to_f64(%3) : (tensor<10xf32, #SV>) -> tensor<10xf64>
-    %m4 = memref.buffer_cast %c4 : memref<10xf64>
+    %m4 = bufferization.to_memref %c4 : memref<10xf64>
    %v4 = vector.transfer_read %m4[%z], %d: memref<10xf64>, vector<10xf64>
    vector.print %v4 : vector<10xf64>

@ -236,7 +236,7 @@ module {
    // CHECK: ( -4.4, -3.3, -2.2, -1.1, 0, 1.1, 2.2, 3.3, 4.4, 305.5 )
    //
    %c5 = call @sparse_cast_f64_to_f32(%5) : (tensor<10xf64, #SV>) -> tensor<10xf32>
-    %m5 = memref.buffer_cast %c5 : memref<10xf32>
+    %m5 = bufferization.to_memref %c5 : memref<10xf32>
    %v5 = vector.transfer_read %m5[%z], %f: memref<10xf32>, vector<10xf32>
    vector.print %v5 : vector<10xf32>

@ -244,7 +244,7 @@ module {
    // CHECK: ( -4, -3, -2, -1, 0, 1, 2, 3, 4, 305 )
    //
    %c6 = call @sparse_cast_s32_to_u64(%1) : (tensor<10xi32, #SV>) -> tensor<10xi64>
-    %m6 = memref.buffer_cast %c6 : memref<10xi64>
+    %m6 = bufferization.to_memref %c6 : memref<10xi64>
    %v6 = vector.transfer_read %m6[%z], %l: memref<10xi64>, vector<10xi64>
    vector.print %v6 : vector<10xi64>

@ -252,7 +252,7 @@ module {
    // CHECK: ( 4294967292, 4294967293, 4294967294, 4294967295, 0, 1, 2, 3, 4, 305 )
    //
    %c7 = call @sparse_cast_u32_to_s64(%1) : (tensor<10xi32, #SV>) -> tensor<10xi64>
-    %m7 = memref.buffer_cast %c7 : memref<10xi64>
+    %m7 = bufferization.to_memref %c7 : memref<10xi64>
    %v7 = vector.transfer_read %m7[%z], %l: memref<10xi64>, vector<10xi64>
    vector.print %v7 : vector<10xi64>

@ -260,7 +260,7 @@ module {
    // CHECK: ( -4, -3, -2, -1, 0, 1, 2, 3, 4, 49 )
    //
    %c8 = call @sparse_cast_i32_to_i8(%1) : (tensor<10xi32, #SV>) -> tensor<10xi8>
-    %m8 = memref.buffer_cast %c8 : memref<10xi8>
+    %m8 = bufferization.to_memref %c8 : memref<10xi8>
    %v8 = vector.transfer_read %m8[%z], %b: memref<10xi8>, vector<10xi8>
    vector.print %v8 : vector<10xi8>

@ -268,7 +268,7 @@ module {
    // CHECK: ( -1064514355, -1068289229, -1072902963, -1081291571, 0, 1066192077, 1074580685, 1079194419, 1082969293, 1134084096 )
    //
    %c9 = call @sparse_cast_f32_as_s32(%3) : (tensor<10xf32, #SV>) -> tensor<10xi32>
-    %m9 = memref.buffer_cast %c9 : memref<10xi32>
+    %m9 = bufferization.to_memref %c9 : memref<10xi32>
    %v9 = vector.transfer_read %m9[%z], %i: memref<10xi32>, vector<10xi32>
    vector.print %v9 : vector<10xi32>

--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir
@ -59,56 +59,56 @@ module {
  }
  func @dumpAndRelease_234(%arg0: tensor<2x3x4xf64>) {
    call @dump(%arg0) : (tensor<2x3x4xf64>) -> ()
-    %1 = memref.buffer_cast %arg0 : memref<2x3x4xf64>
+    %1 = bufferization.to_memref %arg0 : memref<2x3x4xf64>
    memref.dealloc %1 : memref<2x3x4xf64>
    return
  }
  func @dumpAndRelease_p34(%arg0: tensor<?x3x4xf64>) {
    %0 = tensor.cast %arg0 : tensor<?x3x4xf64> to tensor<2x3x4xf64>
    call @dump(%0) : (tensor<2x3x4xf64>) -> ()
-    %1 = memref.buffer_cast %arg0 : memref<?x3x4xf64>
+    %1 = bufferization.to_memref %arg0 : memref<?x3x4xf64>
    memref.dealloc %1 : memref<?x3x4xf64>
    return
  }
  func @dumpAndRelease_2p4(%arg0: tensor<2x?x4xf64>) {
    %0 = tensor.cast %arg0 : tensor<2x?x4xf64> to tensor<2x3x4xf64>
    call @dump(%0) : (tensor<2x3x4xf64>) -> ()
-    %1 = memref.buffer_cast %arg0 : memref<2x?x4xf64>
+    %1 = bufferization.to_memref %arg0 : memref<2x?x4xf64>
    memref.dealloc %1 : memref<2x?x4xf64>
    return
  }
  func @dumpAndRelease_23p(%arg0: tensor<2x3x?xf64>) {
    %0 = tensor.cast %arg0 : tensor<2x3x?xf64> to tensor<2x3x4xf64>
    call @dump(%0) : (tensor<2x3x4xf64>) -> ()
-    %1 = memref.buffer_cast %arg0 : memref<2x3x?xf64>
+    %1 = bufferization.to_memref %arg0 : memref<2x3x?xf64>
    memref.dealloc %1 : memref<2x3x?xf64>
    return
  }
  func @dumpAndRelease_2pp(%arg0: tensor<2x?x?xf64>) {
    %0 = tensor.cast %arg0 : tensor<2x?x?xf64> to tensor<2x3x4xf64>
    call @dump(%0) : (tensor<2x3x4xf64>) -> ()
-    %1 = memref.buffer_cast %arg0 : memref<2x?x?xf64>
+    %1 = bufferization.to_memref %arg0 : memref<2x?x?xf64>
    memref.dealloc %1 : memref<2x?x?xf64>
    return
  }
  func @dumpAndRelease_p3p(%arg0: tensor<?x3x?xf64>) {
    %0 = tensor.cast %arg0 : tensor<?x3x?xf64> to tensor<2x3x4xf64>
    call @dump(%0) : (tensor<2x3x4xf64>) -> ()
-    %1 = memref.buffer_cast %arg0 : memref<?x3x?xf64>
+    %1 = bufferization.to_memref %arg0 : memref<?x3x?xf64>
    memref.dealloc %1 : memref<?x3x?xf64>
    return
  }
  func @dumpAndRelease_pp4(%arg0: tensor<?x?x4xf64>) {
    %0 = tensor.cast %arg0 : tensor<?x?x4xf64> to tensor<2x3x4xf64>
    call @dump(%0) : (tensor<2x3x4xf64>) -> ()
-    %1 = memref.buffer_cast %arg0 : memref<?x?x4xf64>
+    %1 = bufferization.to_memref %arg0 : memref<?x?x4xf64>
    memref.dealloc %1 : memref<?x?x4xf64>
    return
  }
  func @dumpAndRelease_ppp(%arg0: tensor<?x?x?xf64>) {
    %0 = tensor.cast %arg0 : tensor<?x?x?xf64> to tensor<2x3x4xf64>
    call @dump(%0) : (tensor<2x3x4xf64>) -> ()
-    %1 = memref.buffer_cast %arg0 : memref<?x?x?xf64>
+    %1 = bufferization.to_memref %arg0 : memref<?x?x?xf64>
    memref.dealloc %1 : memref<?x?x?xf64>
    return
  }
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
@ -79,7 +79,7 @@ module {
    // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
    // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
    //
-    %m = memref.buffer_cast %0 : memref<6x6xi32>
+    %m = bufferization.to_memref %0 : memref<6x6xi32>
    %v = vector.transfer_read %m[%c0, %c0], %i0
      : memref<6x6xi32>, vector<6x6xi32>
    vector.print %v : vector<6x6xi32>
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
@ -86,7 +86,7 @@ module {
        memref.store %d0, %xdata[%i, %j] : memref<7x3xf64>
      }
    }
-    %x = memref.tensor_load %xdata : memref<7x3xf64>
+    %x = bufferization.to_tensor %xdata : memref<7x3xf64>

    // Read the sparse tensor from file, construct sparse storage.
    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
@ -106,7 +106,7 @@ module {
    // CHECK: ( 0, 0, 0 )
    // CHECK: ( 7, 0, 0 )
    //
-    %r = memref.buffer_cast %0 : memref<7x3xf64>
+    %r = bufferization.to_memref %0 : memref<7x3xf64>
    scf.for %i = %c0 to %c7 step %c1 {
      %v = vector.transfer_read %r[%i, %c0], %d0: memref<7x3xf64>, vector<3xf64>
      vector.print %v : vector<3xf64>
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir
@ -114,7 +114,7 @@ module {
    %d0 = arith.constant 0.0 : f64
    %c0 = arith.constant 0 : index
    %dm = sparse_tensor.convert %arg0 : tensor<?x?xf64, #DCSR> to tensor<?x?xf64>
-    %0 = memref.buffer_cast %dm : memref<?x?xf64>
+    %0 = bufferization.to_memref %dm : memref<?x?xf64>
    %1 = vector.transfer_read %0[%c0, %c0], %d0: memref<?x?xf64>, vector<4x8xf64>
    vector.print %1 : vector<4x8xf64>
    memref.dealloc %0 : memref<?x?xf64>
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
@ -94,8 +94,8 @@ module {
    scf.for %i = %c0 to %c4 step %c1 {
      memref.store %i0, %xdata[%i] : memref<?xi32>
    }
-    %b = memref.tensor_load %bdata : memref<?xi32>
-    %x = memref.tensor_load %xdata : memref<?xi32>
+    %b = bufferization.to_tensor %bdata : memref<?xi32>
+    %x = bufferization.to_tensor %xdata : memref<?xi32>

    // Call kernel.
    %0 = call @kernel_matvec(%a, %b, %x)
@ -105,7 +105,7 @@ module {
    //
    // CHECK: ( 889, 1514, -21, -3431 )
    //
-    %m = memref.buffer_cast %0 : memref<?xi32>
+    %m = bufferization.to_memref %0 : memref<?xi32>
    %v = vector.transfer_read %m[%c0], %i0: memref<?xi32>, vector<4xi32>
    vector.print %v : vector<4xi32>

--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
@ -100,7 +100,7 @@ module {
        memref.store %k, %cdata[%i, %j] : memref<?x?xf64>
      }
    }
-    %c = memref.tensor_load %cdata : memref<?x?xf64>
+    %c = bufferization.to_tensor %cdata : memref<?x?xf64>

    %ddata = memref.alloc(%c4, %c5) : memref<?x?xf64>
    scf.for %i = %c0 to %c4 step %c1 {
@ -112,7 +112,7 @@ module {
        memref.store %k, %ddata[%i, %j] : memref<?x?xf64>
      }
    }
-    %d = memref.tensor_load %ddata : memref<?x?xf64>
+    %d = bufferization.to_tensor %ddata : memref<?x?xf64>

    %adata = memref.alloc(%c2, %c5) : memref<?x?xf64>
    scf.for %i = %c0 to %c2 step %c1 {
@ -120,7 +120,7 @@ module {
        memref.store %i0, %adata[%i, %j] : memref<?x?xf64>
      }
    }
-    %a = memref.tensor_load %adata : memref<?x?xf64>
+    %a = bufferization.to_tensor %adata : memref<?x?xf64>

    // Call kernel.
    %0 = call @kernel_mttkrp(%b, %c, %d, %a)
@ -132,7 +132,7 @@ module {
    // CHECK: ( ( 16075, 21930, 28505, 35800, 43815 ),
    // CHECK:   ( 10000, 14225, 19180, 24865, 31280 ) )
    //
-    %m = memref.buffer_cast %0 : memref<?x?xf64>
+    %m = bufferization.to_memref %0 : memref<?x?xf64>
    %v = vector.transfer_read %m[%c0, %c0], %i0
          : memref<?x?xf64>, vector<2x5xf64>
    vector.print %v : vector<2x5xf64>
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
@ -81,7 +81,7 @@ module {
    // CHECK-SAME: ( -254, 0, 256, -300, -30, -6 ),
    // CHECK-SAME: ( 1397, 0, -1408, 100, 10, 33 ) )
    //
-    %m = memref.buffer_cast %0 : memref<5x6xi32>
+    %m = bufferization.to_memref %0 : memref<5x6xi32>
    %v = vector.transfer_read %m[%c0, %c0], %i0
      : memref<5x6xi32>, vector<5x6xi32>
    vector.print %v : vector<5x6xi32>
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
@ -201,19 +201,19 @@ module {
    // CHECK: 15
    // CHECK: 10
    //
-    %m0 = memref.buffer_cast %0 : memref<i32>
+    %m0 = bufferization.to_memref %0 : memref<i32>
    call @dump_i32(%m0) : (memref<i32>) -> ()
-    %m1 = memref.buffer_cast %1 : memref<f32>
+    %m1 = bufferization.to_memref %1 : memref<f32>
    call @dump_f32(%m1) : (memref<f32>) -> ()
-    %m2 = memref.buffer_cast %2 : memref<i32>
+    %m2 = bufferization.to_memref %2 : memref<i32>
    call @dump_i32(%m2) : (memref<i32>) -> ()
-    %m3 = memref.buffer_cast %3 : memref<f32>
+    %m3 = bufferization.to_memref %3 : memref<f32>
    call @dump_f32(%m3) : (memref<f32>) -> ()
-    %m4 = memref.buffer_cast %4 : memref<i32>
+    %m4 = bufferization.to_memref %4 : memref<i32>
    call @dump_i32(%m4) : (memref<i32>) -> ()
-    %m5 = memref.buffer_cast %5 : memref<i32>
+    %m5 = bufferization.to_memref %5 : memref<i32>
    call @dump_i32(%m5) : (memref<i32>) -> ()
-    %m6 = memref.buffer_cast %6 : memref<i32>
+    %m6 = bufferization.to_memref %6 : memref<i32>
    call @dump_i32(%m6) : (memref<i32>) -> ()

    // Release the resources.
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
@ -97,9 +97,9 @@ module {
        memref.store %d, %bdata[%j, %i] : memref<?x?xf32>
      }
    }
-    %a = memref.tensor_load %adata : memref<?x?xf32>
-    %b = memref.tensor_load %bdata : memref<?x?xf32>
-    %x = memref.tensor_load %xdata : memref<?x?xf32>
+    %a = bufferization.to_tensor %adata : memref<?x?xf32>
+    %b = bufferization.to_tensor %bdata : memref<?x?xf32>
+    %x = bufferization.to_tensor %xdata : memref<?x?xf32>

    // Read the sparse matrix from file, construct sparse storage.
    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
@ -118,7 +118,7 @@ module {
    // CHECK: ( 164, 0, 0, 640, 0 )
    // CHECK: ( 0, 520, 0, 0, 1250 )
    //
-    %r = memref.buffer_cast %0 : memref<?x?xf32>
+    %r = bufferization.to_memref %0 : memref<?x?xf32>
    scf.for %i = %c0 to %c5 step %c1 {
      %v = vector.transfer_read %r[%i, %c0], %d0: memref<?x?xf32>, vector<5xf32>
      vector.print %v : vector<5xf32>
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
@ -156,8 +156,8 @@ module {
    // CHECK-SAME: ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ),
    // CHECK-SAME: ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 192 ) )
    //
-    %m0 = memref.buffer_cast %0 : memref<8x8xf64>
-    %m1 = memref.buffer_cast %1 : memref<8x8xf64>
+    %m0 = bufferization.to_memref %0 : memref<8x8xf64>
+    %m1 = bufferization.to_memref %1 : memref<8x8xf64>
    %v0 = vector.transfer_read %m0[%c0, %c0], %d0
        : memref<8x8xf64>, vector<8x8xf64>
    %v1 = vector.transfer_read %m1[%c0, %c0], %d0
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
@ -97,8 +97,8 @@ module {
        memref.store %i0, %xdata[%i, %j] : memref<?x?xf64>
      }
    }
-    %b = memref.tensor_load %bdata : memref<?x?xf64>
-    %x = memref.tensor_load %xdata : memref<?x?xf64>
+    %b = bufferization.to_tensor %bdata : memref<?x?xf64>
+    %x = bufferization.to_tensor %xdata : memref<?x?xf64>

    // Call kernel.
    %0 = call @kernel_spmm(%a, %b, %x)
@ -108,7 +108,7 @@ module {
    //
    // CHECK: ( ( 3548, 3550, 3552, 3554 ), ( 6052, 6053, 6054, 6055 ), ( -56, -63, -70, -77 ), ( -13704, -13709, -13714, -13719 ) )
    //
-    %m = memref.buffer_cast %0 : memref<?x?xf64>
+    %m = bufferization.to_memref %0 : memref<?x?xf64>
    %v = vector.transfer_read %m[%c0, %c0], %i0: memref<?x?xf64>, vector<4x4xf64>
    vector.print %v : vector<4x4xf64>

--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
@ -73,7 +73,7 @@ module {
    // initialized to zero.
    %xdata = memref.alloc() : memref<f64>
    memref.store %d0, %xdata[] : memref<f64>
-    %x = memref.tensor_load %xdata : memref<f64>
+    %x = bufferization.to_tensor %xdata : memref<f64>

    // Read the sparse matrix from file, construct sparse storage.
    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
@ -87,7 +87,7 @@ module {
    //
    // CHECK: 30.2
    //
-    %m = memref.buffer_cast %0 : memref<f64>
+    %m = bufferization.to_memref %0 : memref<f64>
    %v = memref.load %m[] : memref<f64>
    vector.print %v : f64

--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir
@ -154,7 +154,7 @@ module {
    vector.print %1 : vector<16xf64>
    // Dump the dense vector to verify structure is correct.
    %dv = sparse_tensor.convert %arg0 : tensor<?xf64, #SparseVector> to tensor<?xf64>
-    %2 = memref.buffer_cast %dv : memref<?xf64>
+    %2 = bufferization.to_memref %dv : memref<?xf64>
    %3 = vector.transfer_read %2[%c0], %d0: memref<?xf64>, vector<32xf64>
    vector.print %3 : vector<32xf64>
    memref.dealloc %2 : memref<?xf64>
@ -181,7 +181,7 @@ module {
    // Setup memory for a single reduction scalar.
    %xdata = memref.alloc() : memref<f64>
    memref.store %d1, %xdata[] : memref<f64>
-    %x = memref.tensor_load %xdata : memref<f64>
+    %x = bufferization.to_tensor %xdata : memref<f64>

    // Call sparse vector kernels.
    %0 = call @vector_scale(%sv1)
@ -228,7 +228,7 @@ module {
    %m4 = sparse_tensor.values %4 : tensor<?xf64, #DenseVector> to memref<?xf64>
    %v4 = vector.load %m4[%c0]: memref<?xf64>, vector<32xf64>
    vector.print %v4 : vector<32xf64>
-    %m5 = memref.buffer_cast %5 : memref<f64>
+    %m5 = bufferization.to_memref %5 : memref<f64>
    %v5 = memref.load %m5[] : memref<f64>
    vector.print %v5 : f64

--- a/Show More
+++ b/Show More