[mlir][NFC] Move SubTensorOp and SubTensorInsertOp to TensorDialect

The main goal of this commit is to remove the dependency of Standard dialect on the Tensor dialect. * Rename ops: SubTensorOp --> ExtractTensorOp, SubTensorInsertOp --> InsertTensorOp * Some helper functions are (already) duplicated between the Tensor dialect and the MemRef dialect. To keep this commit smaller, this will be cleaned up in a separate commit. * Additional dialect dependencies: Shape --> Tensor, Tensor --> Standard * Remove dialect dependencies: Standard --> Tensor * Move canonicalization test cases to correct dialect (Tensor/MemRef). Differential Revision: https://reviews.llvm.org/D104499
2021-06-22 00:03:47 +09:00 · 2021-06-22 00:03:47 +09:00 · 83bf801f5f
parent 64b2676ca8
commit 83bf801f5f
58 changed files with 1851 additions and 1818 deletions
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@ -367,7 +367,8 @@ def ConvertShapeToStandard : Pass<"convert-shape-to-std", "ModuleOp"> {
  let dependentDialects = [
    "memref::MemRefDialect",
    "StandardOpsDialect",
-    "scf::SCFDialect"
+    "scf::SCFDialect",
+    "tensor::TensorDialect"
  ];
 }

@ -504,7 +505,7 @@ def TosaToSCF : Pass<"tosa-to-scf"> {

 def TosaToStandard : Pass<"tosa-to-standard"> {
  let summary = "Lower TOSA to the Standard dialect";
-  let dependentDialects = ["StandardOpsDialect"];
+  let dependentDialects = ["StandardOpsDialect", "tensor::TensorDialect"];
  let description = [{
    Pass that converts TOSA operations to the equivalent operations using the
    operations in the Standard dialect.
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@ -579,12 +579,11 @@ def Linalg_TiledLoopOp : Linalg_Op<"tiled_loop", [

    Tensor-based version:

-    The body region of the loop contains `subtensor` operations applied to
+    The body region of the loop contains `extract_slice` operations applied to
    every tensor argument of TiledLoopOp.

    The body region must contain exactly one block that terminates with
-    `linalg.yield` with the operands resulting from `subtensor_insert`
-    operations.
+    `linalg.yield` with the operands resulting from `insert_slice` operations.

    Example:

@ -594,16 +593,16 @@ def Linalg_TiledLoopOp : Linalg_Op<"tiled_loop", [
        outs(%out : tensor<24x64xi8>)
        iterators("parallel")
        distribution("block_x") {
-      %lhs_sub = subtensor %lhs[%i, 0] [%c4, %c64] [1, 1]
+      %lhs_sub = tensor.extract_slice %lhs[%i, 0] [%c4, %c64] [1, 1]
          : tensor<24x64xi8> to tensor<?x?xi8>
-      %rhs_sub = subtensor %rhs[%i, 0] [%c4, %c64] [1, 1]
+      %rhs_sub = tensor.extract_slice %rhs[%i, 0] [%c4, %c64] [1, 1]
          : tensor<24x64xi8> to tensor<?x?xi8>
-      %out_sub = subtensor %out[%i, 0] [%c4, %c64] [1, 1]
+      %out_sub = tensor.extract_slice %out[%i, 0] [%c4, %c64] [1, 1]
          : tensor<24x64xi8> to tensor<?x?xi8>

      %result_sub = linalg.generic ...

-      %result = subtensor_insert %result_sub into %out[%i, 0][%c4, %c64][1, 1]
+      %result = tensor.insert_slice %result_sub into %out[%i, 0][%c4, %c64][1, 1]
        : tensor<?x?xi8> into tensor<24x64xi8>
      linalg.yield %result : tensor<24x64xi8>
    }
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
@ -47,7 +47,7 @@ void hoistRedundantVectorTransfersOnTensor(FuncOp func);
 /// If hoistPaddingOnTensors is called with `nLoops` = 2 on the following IR.
 /// ```
 ///    scf.for (%i, %j, %k)
-///      %st0 = subtensor f(%i, %k) : ... to tensor<?x?xf32>
+///      %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
 ///      %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
 ///      ^bb0( ... ):
 ///        linalg.yield %pad
@ -61,16 +61,17 @@ void hoistRedundantVectorTransfersOnTensor(FuncOp func);
 ///    scf.for (%i) {
 ///      %packed_init = linalg.init_tensor range(%j) : tensor<?x4x8xf32>
 ///      %packed = scf.for (%k) iter_args(%p : %packed_init) {
-///        %st0 = subtensor f(%i, %k) : ... to tensor<?x?xf32>
+///        %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
 ///        %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
 ///        ^bb0( ... ):
 ///          linalg.yield %pad
 ///        } : tensor<?x?xf32> to tensor<4x8xf32>
-///        %1 = subtensor_insert %0 ... : tensor<4x8xf32> to tensor<?x4x8xf32>
+///        %1 = tensor.insert_slice %0 ...
+///            : tensor<4x8xf32> to tensor<?x4x8xf32>
 ///        scf.yield %1: tensor<?x4x8xf32>
 ///      } -> tensor<?x4x8xf32>
 ///      scf.for (%j, %k) {
-///        %st0 = subtensor %packed [%k, 0, 0][1, 4, 8][1, 1, 1] :
+///        %st0 = tensor.extract_slice %packed [%k, 0, 0][1, 4, 8][1, 1, 1] :
 ///                 tensor<?x4x8xf32> to tensor<4x8xf32>
 ///        compute(%st0)
 ///      }
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@ -12,6 +12,7 @@
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/Identifier.h"
 #include "mlir/IR/PatternMatch.h"
@ -1077,12 +1078,12 @@ LogicalResult applyStagedPatterns(
    const FrozenRewritePatternSet &stage2Patterns,
    function_ref<LogicalResult(Operation *)> stage3Lambda = nullptr);

-/// Rewrite subtensor(pad_tensor(x)) into pad_tensor(subtensor(x)).
-struct SubTensorOfPadTensorSwapPattern
-    : public OpRewritePattern<SubTensorOp> {
-  using OpRewritePattern<SubTensorOp>::OpRewritePattern;
+/// Rewrite extract_slice(pad_tensor(x)) into pad_tensor(extract_slice(x)).
+struct ExtractSliceOfPadTensorSwapPattern
+    : public OpRewritePattern<tensor::ExtractSliceOp> {
+  using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;

-  LogicalResult matchAndRewrite(SubTensorOp subTensorOp,
+  LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
                                PatternRewriter &rewriter) const override;
 };

--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@ -78,7 +78,7 @@ bool isProducerLastWriteOfView(const LinalgDependenceGraph &graph,
 bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer,
                   Value consumedView, LinalgOp producer);

-/// Creates subtensor/subview ops for all `tiledOperands` of the given
+/// Creates extract_slice/subview ops for all `tiledOperands` of the given
 /// `linalgOp` with `builder`, assuming `linalgOp` is being fused into a loop
 /// nest for tiling with the given induction variables `ivs` and tile sizes
 /// `tileSizes`. `sizeBounds` are the iteration space bounds for *all* the
@ -118,15 +118,17 @@ Optional<FusionInfo> fuseProducerOfBuffer(OpBuilder &b,
                                          const LinalgDependenceGraph &graph);
 /// Tensor counterpart of `fuseProducerOfBuffer`.
 /// This implements the fusion part of the "tileAndFuse on tensors"
-/// transformation and thus requires the `consumerOpOperand` to be a `subtensor`
-/// op (generally obtained by applying the tiling transformation).
+/// transformation and thus requires the `consumerOpOperand` to be a
+/// `extract_slice` op (generally obtained by applying the tiling
+/// transformation).
 Optional<FusionInfo> fuseProducerOfTensor(OpBuilder &b,
                                          OpOperand &consumerOpOperand);
 /// Tensor counterpart of `fuseProducerOfBuffer`.
 /// This implements the fusion part of the "tileAndFuse on tensors"
-/// transformation and thus requires the `consumerOpOperand` to be a `subtensor`
-/// op (generally obtained by applying the tiling transformation).
-/// Assumes `producerOfTensor` is a Linalg op that produces `consumerOpOperand`.
+/// transformation and thus requires the `consumerOpOperand` to be a
+/// `extract_slice` op (generally obtained by applying the tiling
+/// transformation). Assumes `producerOfTensor` is a Linalg op that produces
+/// `consumerOpOperand`.
 Optional<FusionInfo> fuseProducerOfTensor(OpBuilder &b,
                                          OpResult producerOpResult,
                                          OpOperand &consumerOpOperand);
--- a/mlir/include/mlir/Dialect/Shape/IR/Shape.h
+++ b/mlir/include/mlir/Dialect/Shape/IR/Shape.h
@ -14,6 +14,7 @@
 #ifndef MLIR_SHAPE_IR_SHAPE_H
 #define MLIR_SHAPE_IR_SHAPE_H

+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
--- a/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td
+++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td
@ -35,6 +35,7 @@ def ShapeDialect : Dialect {
  }];

  let cppNamespace = "::mlir::shape";
+  let dependentDialects = ["tensor::TensorDialect"];

  let hasConstantMaterializer = 1;
  let hasOperationAttrVerify = 1;
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h
@ -23,7 +23,6 @@
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "mlir/Interfaces/VectorInterfaces.h"
-#include "mlir/Interfaces/ViewLikeInterface.h"

 // Pull in all enum type definitions and utility function declarations.
 #include "mlir/Dialect/StandardOps/IR/OpsEnums.h.inc"
@ -34,12 +33,6 @@ class Builder;
 class FuncOp;
 class OpBuilder;
 class PatternRewriter;
-
-/// Return the list of Range (i.e. offset, size, stride). Each Range
-/// entry contains either the dynamic value or a ConstantIndexOp constructed
-/// with `b` at location `loc`.
-SmallVector<Range, 8> getOrCreateRanges(OffsetSizeAndStrideOpInterface op,
-                                        OpBuilder &b, Location loc);
 } // namespace mlir

 #define GET_OP_CLASSES
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@ -21,7 +21,6 @@ include "mlir/Interfaces/CastInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/VectorInterfaces.td"
-include "mlir/Interfaces/ViewLikeInterface.td"

 def StandardOps_Dialect : Dialect {
  let name = "std";
@ -1754,245 +1753,6 @@ def SubIOp : IntBinaryOp<"subi"> {
  let hasCanonicalizer = 1;
 }

-//===----------------------------------------------------------------------===//
-// SubTensorOp
-//===----------------------------------------------------------------------===//
-
-def SubTensorOp : BaseOpWithOffsetSizesAndStrides<
-    StandardOps_Dialect, "subtensor", [NoSideEffect, AttrSizedOperandSegments,
-                                       OffsetSizeAndStrideOpInterface]> {
-  let summary = "subtensor operation";
-  let description = [{
-    The "subtensor" operation extract a tensor from another tensor as
-    specified by the operation's offsets, sizes and strides arguments.
-
-    The subtensor operation supports the following arguments:
-
-    * source: the "base" tensor from which to extract a subtensor.
-    * offsets: tensor-rank number of offsets into the "base" tensor from which
-               to extract the subtensor.
-    * sizes: tensor-rank number of sizes which specify the sizes of the result
-             tensor type.
-    * strides: tensor-rank number of strides specifying subsampling in each
-               dimension.
-
-    The representation based on offsets, sizes and strides support a
-    partially-static specification via attributes specified through the
-    `static_offsets`, `static_sizes` and `static_strides` arguments. A special
-    sentinel value ShapedType::kDynamicSize and
-    ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has
-    a dynamic value.
-
-    After buffer-allocation, the "subtensor" op is expected to lower into a
-    "subview" op.
-
-    A subtensor operation may additionally reduce the rank of the resulting
-    tensor by removing dimensions that are statically known to be of size 1.
-
-    Example:
-
-    ```
-    // Rank-reducing subtensor.
-    %1 = subtensor %0[0, 0, 0][1, 16, 4][1, 1, 1] :
-      tensor<8x16x4xf32> to tensor<16x4xf32>
-    %3 = subtensor %2[3, 4, 2][1, 6, 3][1, 1, 1] :
-      tensor<8x16x4xf32> to tensor<6x3xf32>
-    ```
-  }];
-
-  let arguments = (ins
-    AnyRankedTensor:$source,
-    Variadic<Index>:$offsets,
-    Variadic<Index>:$sizes,
-    Variadic<Index>:$strides,
-    I64ArrayAttr:$static_offsets,
-    I64ArrayAttr:$static_sizes,
-    I64ArrayAttr:$static_strides
-  );
-  let results = (outs AnyRankedTensor:$result);
-
-  let assemblyFormat = [{
-    $source ``
-    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
-    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
-    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
-    attr-dict `:` type($source) `to` type($result)
-  }];
-
-  let builders = [
-    // Build a SubTensorOp with mixed static and dynamic entries and inferred
-    // result type.
-    OpBuilder<(ins "Value":$source, "ArrayRef<OpFoldResult>":$offsets,
-      "ArrayRef<OpFoldResult>":$sizes, "ArrayRef<OpFoldResult>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubTensorOp with mixed static and dynamic entries and custom
-    // result type. If the type passed is nullptr, it is inferred.
-    OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source,
-      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
-      "ArrayRef<OpFoldResult>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubTensorOp with dynamic entries and custom result type. If the
-    // type passed is nullptr, it is inferred.
-    OpBuilder<(ins "Value":$source, "ValueRange":$offsets,
-      "ValueRange":$sizes, "ValueRange":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubTensorOp with dynamic entries and inferred result type.
-    OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source,
-      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
-  ];
-
-  let extraClassDeclaration = extraBaseClassDeclaration # [{
-    /// Returns the type of the base tensor operand.
-    RankedTensorType getSourceType() {
-      return source().getType().cast<RankedTensorType>();
-    }
-
-    /// The result of a subtensor is always a tensor.
-    RankedTensorType getType() {
-      return getResult().getType().cast<RankedTensorType>();
-    }
-
-    /// A subtensor result type can be fully inferred from the source type and
-    /// the static representation of offsets, sizes and strides. Special
-    /// sentinels encode the dynamic case.
-    static Type inferResultType(RankedTensorType sourceRankedTensorType,
-                                ArrayRef<int64_t> staticOffsets,
-                                ArrayRef<int64_t> staticSizes,
-                                ArrayRef<int64_t> staticStrides);
-    static Type inferResultType(RankedTensorType sourceRankedTensorType,
-                                ArrayRef<OpFoldResult> staticOffsets,
-                                ArrayRef<OpFoldResult> staticSizes,
-                                ArrayRef<OpFoldResult> staticStrides);
-    static Type inferRankReducedResultType(unsigned resultRank,
-                                           RankedTensorType sourceRankedTensorType,
-                                           ArrayRef<int64_t> staticOffsets,
-                                           ArrayRef<int64_t> staticSizes,
-                                           ArrayRef<int64_t> staticStrides);
-    static Type inferRankReducedResultType(unsigned resultRank,
-                                           RankedTensorType sourceRankedTensorType,
-                                           ArrayRef<OpFoldResult> staticOffsets,
-                                           ArrayRef<OpFoldResult> staticSizes,
-                                           ArrayRef<OpFoldResult> staticStrides);
-
-    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
-    /// and `static_strides` attributes.
-    std::array<unsigned, 3> getArrayAttrMaxRanks() {
-      unsigned rank = getSourceType().getRank();
-      return {rank, rank, rank};
-    }
-
-    /// Return the number of leading operands before the `offsets`, `sizes` and
-    /// and `strides` operands.
-    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
-  }];
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// SubTensorInsertOp
-//===----------------------------------------------------------------------===//
-
-def SubTensorInsertOp : BaseOpWithOffsetSizesAndStrides<
-    StandardOps_Dialect, "subtensor_insert",
-    [NoSideEffect, AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface,
-     TypesMatchWith<"expected result type to match dest type",
-                    "dest", "result", "$_self">]> {
-  let summary = "subtensor_insert operation";
-  let description = [{
-    The "subtensor_insert" operation insert a tensor `source` into another
-    tensor `dest` as specified by the operation's offsets, sizes and strides
-    arguments.
-
-    It returns a copy of `dest` with the proper subtensor updated with the value
-    of `source`.
-
-    The subtensor_insert operation has the encodes the following information:
-
-    * source: the tensor that is inserted.
-    * dest: the tensor into which the source tensor is inserted.
-    * offsets: tensor-rank number of offsets into the "base" tensor from which
-               to extract the subtensor.
-    * sizes: tensor-rank number of sizes which specify the sizes of the result
-             tensor type.
-    * strides: tensor-rank number of strides that specify subsampling in each
-               dimension.
-
-    The representation based on offsets, sizes and strides support a
-    partially-static specification via attributes specified through the
-    `static_offsets`, `static_sizes` and `static_strides` arguments. A special
-    sentinel value ShapedType::kDynamicSize and
-    ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has
-    a dynamic value.
-
-    After buffer-allocation, the "subtensor_insert" op is expected to become
-    an in-place buffer update.
-  }];
-
-  let arguments = (ins
-    AnyRankedTensor:$source,
-    AnyRankedTensor:$dest,
-    Variadic<Index>:$offsets,
-    Variadic<Index>:$sizes,
-    Variadic<Index>:$strides,
-    I64ArrayAttr:$static_offsets,
-    I64ArrayAttr:$static_sizes,
-    I64ArrayAttr:$static_strides
-  );
-  let results = (outs AnyRankedTensor:$result);
-
-  let assemblyFormat = [{
-    $source `into` $dest ``
-    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
-    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
-    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
-    attr-dict `:` type($source) `into` type($dest)
-  }];
-
-  let verifier = ?;
-
-  let builders = [
-    // Build a SubTensorInsertOp with mixed static and dynamic entries.
-    OpBuilder<(ins "Value":$source, "Value":$dest,
-      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
-      "ArrayRef<OpFoldResult>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a SubTensorInsertOp with dynamic entries.
-    OpBuilder<(ins "Value":$source, "Value":$dest,
-      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
-  ];
-
-  let extraClassDeclaration = extraBaseClassDeclaration # [{
-    /// Returns the type of the base tensor operand.
-    RankedTensorType getSourceType() {
-      return source().getType().cast<RankedTensorType>();
-    }
-
-    /// The result of a subtensor_insert is always a tensor.
-    RankedTensorType getType() {
-      return getResult().getType().cast<RankedTensorType>();
-    }
-
-    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
-    /// and `static_strides` attributes.
-    std::array<unsigned, 3> getArrayAttrMaxRanks() {
-      unsigned rank = getType().getRank();
-      return {rank, rank, rank};
-    }
-
-    /// Return the number of leading operands before the `offsets`, `sizes` and
-    /// and `strides` operands.
-    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 2; }
-  }];
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-}
-
-
 //===----------------------------------------------------------------------===//
 // SwitchOp
 //===----------------------------------------------------------------------===//
--- a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
+++ b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
@ -16,6 +16,21 @@
 #include "mlir/Interfaces/CastInterfaces.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/ViewLikeInterface.h"
+
+//===----------------------------------------------------------------------===//
+// Tensor Dialect Helpers
+//===----------------------------------------------------------------------===//
+
+namespace mlir {
+
+/// Return the list of Range (i.e. offset, size, stride). Each Range
+/// entry contains either the dynamic value or a ConstantIndexOp constructed
+/// with `b` at location `loc`.
+SmallVector<Range, 8> getOrCreateRanges(OffsetSizeAndStrideOpInterface op,
+                                        OpBuilder &b, Location loc);
+
+} // namespace mlir

 //===----------------------------------------------------------------------===//
 // Tensor Dialect
@ -41,8 +56,8 @@ namespace tensor {
 /// source tensor. This is useful to fold a tensor.cast into a consuming op and
 /// implement canonicalization patterns for ops in different dialects that may
 /// consume the results of tensor.cast operations. Such foldable tensor.cast
-/// operations are typically inserted as `subtensor` ops and are canonicalized,
-/// to preserve the type compatibility of their uses.
+/// operations are typically inserted as `extract_slice` ops and are
+/// canonicalized, to preserve the type compatibility of their uses.
 ///
 /// Returns true when all conditions are met:
 /// 1. source and result are ranked tensors with same element type and rank.
@ -64,7 +79,6 @@ bool canFoldIntoConsumerOp(CastOp castOp);
 /// Performs folding of any operand of `op` if it comes from a tensor::CastOp
 /// that can be folded.
 LogicalResult foldTensorCast(Operation *op);
-
 } // namespace tensor
 } // namespace mlir

--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
@ -13,6 +13,7 @@ include "mlir/Dialect/Tensor/IR/TensorBase.td"
 include "mlir/Interfaces/CastInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/ViewLikeInterface.td"

 class Tensor_Op<string mnemonic, list<OpTrait> traits = []>
    : Op<Tensor_Dialect, mnemonic, traits> {
@ -99,6 +100,144 @@ def Tensor_ExtractOp : Tensor_Op<"extract",
  let hasFolder = 1;
 }

+
+//===----------------------------------------------------------------------===//
+// ExtractSliceOp
+//===----------------------------------------------------------------------===//
+
+def Tensor_ExtractSliceOp : BaseOpWithOffsetSizesAndStrides<
+    Tensor_Dialect, "extract_slice", [NoSideEffect, AttrSizedOperandSegments,
+                                      OffsetSizeAndStrideOpInterface]> {
+  let summary = "extract slice operation";
+  let description = [{
+    The "extract_slice" operation extract a tensor from another tensor as
+    specified by the operation's offsets, sizes and strides arguments.
+
+    The extract_slice operation supports the following arguments:
+
+    * source: the "base" tensor from which to extract a slice.
+    * offsets: tensor-rank number of offsets into the "base" tensor from which
+               to extract the slice.
+    * sizes: tensor-rank number of sizes which specify the sizes of the result
+             tensor type.
+    * strides: tensor-rank number of strides specifying subsampling in each
+               dimension.
+
+    The representation based on offsets, sizes and strides support a
+    partially-static specification via attributes specified through the
+    `static_offsets`, `static_sizes` and `static_strides` arguments. A special
+    sentinel value ShapedType::kDynamicSize and
+    ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has
+    a dynamic value.
+
+    After buffer-allocation, the "extract_slice" op is expected to lower into a
+    "subview" op.
+
+    An extract_slice operation may additionally reduce the rank of the resulting
+    tensor by removing dimensions that are statically known to be of size 1.
+
+    Example:
+
+    ```
+    // Rank-reducing extract_slice.
+    %1 = tensor.extract_slice %0[0, 0, 0][1, 16, 4][1, 1, 1] :
+      tensor<8x16x4xf32> to tensor<16x4xf32>
+    %3 = tensor.extract_slice %2[3, 4, 2][1, 6, 3][1, 1, 1] :
+      tensor<8x16x4xf32> to tensor<6x3xf32>
+    ```
+  }];
+
+  let arguments = (ins
+    AnyRankedTensor:$source,
+    Variadic<Index>:$offsets,
+    Variadic<Index>:$sizes,
+    Variadic<Index>:$strides,
+    I64ArrayAttr:$static_offsets,
+    I64ArrayAttr:$static_sizes,
+    I64ArrayAttr:$static_strides
+  );
+  let results = (outs AnyRankedTensor:$result);
+
+  let assemblyFormat = [{
+    $source ``
+    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
+    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
+    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
+    attr-dict `:` type($source) `to` type($result)
+  }];
+
+  let builders = [
+    // Build an ExtractSliceOp with mixed static and dynamic entries and
+    // inferred result type.
+    OpBuilder<(ins "Value":$source, "ArrayRef<OpFoldResult>":$offsets,
+      "ArrayRef<OpFoldResult>":$sizes, "ArrayRef<OpFoldResult>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build an ExtractSliceOp with mixed static and dynamic entries and custom
+    // result type. If the type passed is nullptr, it is inferred.
+    OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source,
+      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
+      "ArrayRef<OpFoldResult>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build an ExtractSliceOp with dynamic entries and custom result type. If
+    // the type passed is nullptr, it is inferred.
+    OpBuilder<(ins "Value":$source, "ValueRange":$offsets,
+      "ValueRange":$sizes, "ValueRange":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build an ExtractSliceOp with dynamic entries and inferred result type.
+    OpBuilder<(ins "RankedTensorType":$resultType, "Value":$source,
+      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
+  ];
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    /// Returns the type of the base tensor operand.
+    RankedTensorType getSourceType() {
+      return source().getType().cast<RankedTensorType>();
+    }
+
+    /// The result of an extract_slice is always a tensor.
+    RankedTensorType getType() {
+      return getResult().getType().cast<RankedTensorType>();
+    }
+
+    /// An extract_slice result type can be fully inferred from the source type
+    /// and the static representation of offsets, sizes and strides. Special
+    /// sentinels encode the dynamic case.
+    static Type inferResultType(RankedTensorType sourceRankedTensorType,
+                                ArrayRef<int64_t> staticOffsets,
+                                ArrayRef<int64_t> staticSizes,
+                                ArrayRef<int64_t> staticStrides);
+    static Type inferResultType(RankedTensorType sourceRankedTensorType,
+                                ArrayRef<OpFoldResult> staticOffsets,
+                                ArrayRef<OpFoldResult> staticSizes,
+                                ArrayRef<OpFoldResult> staticStrides);
+    static Type inferRankReducedResultType(unsigned resultRank,
+                                           RankedTensorType sourceRankedTensorType,
+                                           ArrayRef<int64_t> staticOffsets,
+                                           ArrayRef<int64_t> staticSizes,
+                                           ArrayRef<int64_t> staticStrides);
+    static Type inferRankReducedResultType(unsigned resultRank,
+                                           RankedTensorType sourceRankedTensorType,
+                                           ArrayRef<OpFoldResult> staticOffsets,
+                                           ArrayRef<OpFoldResult> staticSizes,
+                                           ArrayRef<OpFoldResult> staticStrides);
+
+    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
+    /// and `static_strides` attributes.
+    std::array<unsigned, 3> getArrayAttrMaxRanks() {
+      unsigned rank = getSourceType().getRank();
+      return {rank, rank, rank};
+    }
+
+    /// Return the number of leading operands before the `offsets`, `sizes` and
+    /// and `strides` operands.
+    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
+  }];
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // FromElementsOp
 //===----------------------------------------------------------------------===//
@ -200,7 +339,7 @@ def Tensor_InsertOp : Tensor_Op<"insert",
    The `tensor.insert` op writes a tensor into a tensor `dest`as specified by
    the operation's indices.

-    It returns a copy of `dest` with the proper subtensor updated with the value
+    It returns a copy of `dest` with the proper slice updated with the value
    of `scalar`.

    The arity of indices must match the rank of the tensor `dest` (i.e., if a
@ -234,6 +373,107 @@ def Tensor_InsertOp : Tensor_Op<"insert",
  let hasFolder = 1;
 }

+//===----------------------------------------------------------------------===//
+// InsertSliceOp
+//===----------------------------------------------------------------------===//
+
+def Tensor_InsertSliceOp : BaseOpWithOffsetSizesAndStrides<
+    Tensor_Dialect, "insert_slice",
+    [NoSideEffect, AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface,
+     TypesMatchWith<"expected result type to match dest type",
+                    "dest", "result", "$_self">]> {
+  let summary = "insert_slice operation";
+  let description = [{
+    The "insert_slice" operation insert a tensor `source` into another
+    tensor `dest` as specified by the operation's offsets, sizes and strides
+    arguments.
+
+    It returns a copy of `dest` with the proper slice updated with the value
+    of `source`.
+
+    The insert_slice operation supports the following arguments:
+
+    * source: the tensor that is inserted.
+    * dest: the tensor into which the source tensor is inserted.
+    * offsets: tensor-rank number of offsets into the `dest` tensor into which
+               the slice is inserted.
+    * sizes: tensor-rank number of sizes which specify the sizes of the result
+             tensor type.
+    * strides: tensor-rank number of strides that specify subsampling in each
+               dimension.
+
+    The representation based on offsets, sizes and strides support a
+    partially-static specification via attributes specified through the
+    `static_offsets`, `static_sizes` and `static_strides` arguments. A special
+    sentinel value ShapedType::kDynamicSize and
+    ShapedType::kDynamicStrideOrOffset encodes that the corresponding entry has
+    a dynamic value.
+
+    After buffer-allocation, the "insert_slice" op is expected to become an
+    in-place buffer update.
+  }];
+
+  let arguments = (ins
+    AnyRankedTensor:$source,
+    AnyRankedTensor:$dest,
+    Variadic<Index>:$offsets,
+    Variadic<Index>:$sizes,
+    Variadic<Index>:$strides,
+    I64ArrayAttr:$static_offsets,
+    I64ArrayAttr:$static_sizes,
+    I64ArrayAttr:$static_strides
+  );
+  let results = (outs AnyRankedTensor:$result);
+
+  let assemblyFormat = [{
+    $source `into` $dest ``
+    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
+    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
+    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
+    attr-dict `:` type($source) `into` type($dest)
+  }];
+
+  let verifier = ?;
+
+  let builders = [
+    // Build a InsertSliceOp with mixed static and dynamic entries.
+    OpBuilder<(ins "Value":$source, "Value":$dest,
+      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
+      "ArrayRef<OpFoldResult>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build a InsertSliceOp with dynamic entries.
+    OpBuilder<(ins "Value":$source, "Value":$dest,
+      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
+  ];
+
+  let extraClassDeclaration = extraBaseClassDeclaration # [{
+    /// Returns the type of the base tensor operand.
+    RankedTensorType getSourceType() {
+      return source().getType().cast<RankedTensorType>();
+    }
+
+    /// The result of a insert_slice is always a tensor.
+    RankedTensorType getType() {
+      return getResult().getType().cast<RankedTensorType>();
+    }
+
+    /// Return the expected rank of each of the`static_offsets`, `static_sizes`
+    /// and `static_strides` attributes.
+    std::array<unsigned, 3> getArrayAttrMaxRanks() {
+      unsigned rank = getType().getRank();
+      return {rank, rank, rank};
+    }
+
+    /// Return the number of leading operands before the `offsets`, `sizes` and
+    /// and `strides` operands.
+    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 2; }
+  }];
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // ReshapeOp
 //===----------------------------------------------------------------------===//
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
@ -1384,7 +1384,7 @@ def Tosa_GatherOp : Tosa_Op<"gather", [NoSideEffect]> {
  let summary = "Gather operation,";

  let description = [{
-    Generate a tensor for which each element in the output is a subtensor of the
+    Generate a tensor for which each element in the output is a slice of the
    values tensor based on the value of indices.
  }];

--- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
+++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
@ -627,10 +627,11 @@ LogicalResult SplitAtOpConversion::matchAndRewrite(
  Value index = b.create<SelectOp>(indexIsNegative, add, originalIndex);

  Value one = b.create<ConstantIndexOp>(1);
-  Value head = b.create<SubTensorOp>(transformed.operand(), zero, index, one);
+  Value head =
+      b.create<tensor::ExtractSliceOp>(transformed.operand(), zero, index, one);
  Value tailSize = b.create<SubIOp>(rank, index);
-  Value tail =
-      b.create<SubTensorOp>(transformed.operand(), index, tailSize, one);
+  Value tail = b.create<tensor::ExtractSliceOp>(transformed.operand(), index,
+                                                tailSize, one);
  rewriter.replaceOp(op, {head, tail});
  return success();
 }
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@ -1741,8 +1741,8 @@ struct ConcatConverter : public OpConversionPattern<tosa::ConcatOp> {

    for (auto arg : args) {
      sizes[axis] = rewriter.create<memref::DimOp>(loc, arg, axisValue);
-      result = rewriter.create<SubTensorInsertOp>(loc, arg, result, offsets,
-                                                  sizes, strides);
+      result = rewriter.create<tensor::InsertSliceOp>(loc, arg, result, offsets,
+                                                      sizes, strides);
      offsets[axis] = rewriter.create<AddIOp>(loc, offsets[axis], sizes[axis]);
    }
    rewriter.replaceOp(op, result);
--- a/mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp
+++ b/mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp
@ -12,6 +12,7 @@

 #include "mlir/Conversion/TosaToStandard/TosaToStandard.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@ -42,7 +43,7 @@ public:
    SmallVector<int64_t> strides;
    strides.resize(sliceOp.getType().template cast<ShapedType>().getRank(), 1);

-    rewriter.replaceOpWithNewOp<SubTensorOp>(
+    rewriter.replaceOpWithNewOp<tensor::ExtractSliceOp>(
        sliceOp, sliceOp.getType(), input, ValueRange({}), ValueRange({}),
        ValueRange({}), sliceOp.start(), sliceOp.size(),
        rewriter.getI64ArrayAttr(strides));
--- a/mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp
+++ b/mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp
@ -35,6 +35,7 @@ public:
    target.addIllegalOp<tosa::SliceOp>();
    target.addIllegalOp<tosa::ApplyScaleOp>();
    target.addLegalDialect<StandardOpsDialect>();
+    target.addLegalDialect<tensor::TensorDialect>();

    mlir::tosa::populateTosaToStandardConversionPatterns(&patterns);
    if (failed(applyPartialConversion(getOperation(), target,
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@ -16,6 +16,7 @@
 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
@ -746,22 +747,23 @@ struct ReplaceStaticShapeDims : OpRewritePattern<InitTensorOp> {

 namespace {
 /// Since `init_tensor` operation creates a tensor needed only for its shape, a
-/// subtensor of this is also needed only for its shape. The result can be
-/// replaced by a new init_tensor operation of the same size as the subtensor
-/// op.
-struct FoldInitTensorWithSubTensorOp : public OpRewritePattern<SubTensorOp> {
-  using OpRewritePattern<SubTensorOp>::OpRewritePattern;
+/// slice of this is also needed only for its shape. The result can be
+/// replaced by a new init_tensor operation of the same size as the extract
+/// slice op.
+struct FoldInitTensorWithExtractSliceOp
+    : public OpRewritePattern<tensor::ExtractSliceOp> {
+  using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;

-  LogicalResult matchAndRewrite(SubTensorOp subtensorOp,
+  LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
                                PatternRewriter &rewriter) const override {
-    if (!subtensorOp.source().getDefiningOp<linalg::InitTensorOp>())
+    if (!sliceOp.source().getDefiningOp<linalg::InitTensorOp>())
      return failure();
    rewriter.replaceOpWithNewOp<linalg::InitTensorOp>(
-        subtensorOp, subtensorOp.sizes(),
+        sliceOp, sliceOp.sizes(),
        llvm::to_vector<4>(llvm::map_range(
-            subtensorOp.static_sizes(),
+            sliceOp.static_sizes(),
            [](Attribute attr) { return attr.cast<IntegerAttr>().getInt(); })),
-        subtensorOp.getSourceType().getElementType());
+        sliceOp.getSourceType().getElementType());
    return success();
  }
 };
@ -797,7 +799,7 @@ struct FoldInitTensorWithTensorReshapeOp

 void InitTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                               MLIRContext *context) {
-  results.add<FoldInitTensorWithSubTensorOp,
+  results.add<FoldInitTensorWithExtractSliceOp,
              FoldInitTensorWithTensorReshapeOp<TensorExpandShapeOp>,
              FoldInitTensorWithTensorReshapeOp<TensorCollapseShapeOp>,
              ReplaceStaticShapeDims>(context);
--- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
@ -15,6 +15,7 @@
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/BuiltinDialect.h"
 #include "mlir/IR/Operation.h"
@ -232,8 +233,8 @@ public:
  }
 };

-/// Convert `subtensor %t [offsets][sizes][strides] -> %st` to an alloc + copy
-/// pattern.
+/// Convert `extract_slice %t [offsets][sizes][strides] -> %st` to an
+/// alloc + copy pattern.
 /// ```
 ///   %a = alloc(sizes)
 ///   %sv = subview %source [offsets][sizes][strides]
@ -242,21 +243,22 @@ public:
 ///
 /// This pattern is arguable a std pattern once linalg::CopyOp becomes
 /// std::CopyOp.
-class SubTensorOpConverter : public OpConversionPattern<SubTensorOp> {
+class ExtractSliceOpConverter
+    : public OpConversionPattern<tensor::ExtractSliceOp> {
 public:
-  using OpConversionPattern<SubTensorOp>::OpConversionPattern;
+  using OpConversionPattern<tensor::ExtractSliceOp>::OpConversionPattern;

  LogicalResult
-  matchAndRewrite(SubTensorOp op, ArrayRef<Value> operands,
+  matchAndRewrite(tensor::ExtractSliceOp op, ArrayRef<Value> operands,
                  ConversionPatternRewriter &rewriter) const final {
-    SubTensorOpAdaptor adaptor(operands, op->getAttrDictionary());
+    tensor::ExtractSliceOpAdaptor adaptor(operands, op->getAttrDictionary());
    Value sourceMemref = adaptor.source();
    assert(sourceMemref.getType().isa<MemRefType>());

    MemRefType subviewMemRefType =
        getTypeConverter()->convertType(op.getType()).cast<MemRefType>();
    // op.sizes() capture exactly the dynamic alloc operands matching the
-    // subviewMemRefType thanks to subview/subtensor canonicalization and
+    // subviewMemRefType thanks to subview/slice canonicalization and
    // verification.
    Value alloc = rewriter.create<memref::AllocOp>(
        op.getLoc(), subviewMemRefType, op.sizes());
@ -269,7 +271,7 @@ public:
  }
 };

-/// Convert `subtensor_insert %source into %dest [offsets][sizes][strides] ->
+/// Convert `insert_slice %source into %dest [offsets][sizes][strides] ->
 /// %t` to an buffer_cast + subview + copy + tensor_load pattern.
 /// buffer_cast and tensor_load are inserted automatically by the
 /// conversion infra:
@ -281,15 +283,15 @@ public:
 ///
 /// This pattern is arguable a std pattern once linalg::CopyOp becomes
 /// std::CopyOp.
-class SubTensorInsertOpConverter
-    : public OpConversionPattern<SubTensorInsertOp> {
+class InsertSliceOpConverter
+    : public OpConversionPattern<tensor::InsertSliceOp> {
 public:
-  using OpConversionPattern<SubTensorInsertOp>::OpConversionPattern;
+  using OpConversionPattern<tensor::InsertSliceOp>::OpConversionPattern;

  LogicalResult
-  matchAndRewrite(SubTensorInsertOp op, ArrayRef<Value> operands,
+  matchAndRewrite(tensor::InsertSliceOp op, ArrayRef<Value> operands,
                  ConversionPatternRewriter &rewriter) const final {
-    SubTensorInsertOpAdaptor adaptor(operands, op->getAttrDictionary());
+    tensor::InsertSliceOpAdaptor adaptor(operands, op->getAttrDictionary());
    Value sourceMemRef = adaptor.source();
    assert(sourceMemRef.getType().isa<MemRefType>());

@ -323,7 +325,8 @@ struct LinalgBufferizePass : public LinalgBufferizeBase<LinalgBufferizePass> {
    // Mark all Standard operations legal.
    target.addLegalDialect<AffineDialect, math::MathDialect,
                           memref::MemRefDialect, StandardOpsDialect>();
-    target.addIllegalOp<InitTensorOp, SubTensorOp, SubTensorInsertOp>();
+    target.addIllegalOp<InitTensorOp, tensor::ExtractSliceOp,
+                        tensor::InsertSliceOp>();

    // Mark all Linalg operations illegal as long as they work on tensors.
    auto isLegalOperation = [&](Operation *op) {
@ -355,8 +358,8 @@ void mlir::linalg::populateLinalgBufferizePatterns(
      BufferizeInitTensorOp,
      BufferizeTensorReshapeOp<TensorExpandShapeOp>,
      BufferizeTensorReshapeOp<TensorCollapseShapeOp>,
-      SubTensorOpConverter,
-      SubTensorInsertOpConverter
+      ExtractSliceOpConverter,
+      InsertSliceOpConverter
    >(typeConverter, patterns.getContext());
  // clang-format on
 }
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp
@ -77,7 +77,7 @@
 //        out of the function at each call site.
 //
 //   iii. as an optimization over ii., it may be possible to reuse an argument
-//        and only want to return a subtensor.
+//        and only want to return a slice.
 //        This may forego allocation by letting *all* callers decide whether to
 //        pass a new *aliasing* memref function argument (i.e. a subview).
 //        Without loss of generality, callers may agree to allocate a new buffer
@ -284,7 +284,7 @@ LLVM_ATTRIBUTE_UNUSED static InPlaceSpec getInPlace(Value v) {
 //   5. Wheher an op bufferizes to a memory read.
 //   6. Wheher an op bufferizes to a memory write.
 // These interfaces are necessary to distinguish between various cases and allow
-// special inplace behavior for (SubTensorOp, SubTensorInsertOp) pairs.
+// special inplace behavior for (ExtractSliceOp, InsertSliceOp) pairs.
 //===----------------------------------------------------------------------===//

 /// Return `true` if the op is explicitly supported by bufferization or if it
@ -295,8 +295,8 @@ static bool hasKnownBufferizationAliasingBehavior(Operation *op) {
      // clang-format off
      isa<LinalgOp,
          ReturnOp,
-          SubTensorOp,
-          SubTensorInsertOp,
+          ExtractSliceOp,
+          InsertSliceOp,
          VectorTransferOpInterface>(op)
      // clang-format on
      || (none_of(op->getResultTypes(),
@ -339,8 +339,7 @@ static OpResult getInplaceableOpResult(VectorTransferOpInterface op,
 /// Return the OpResult that may bufferize into the same buffer as `opOperand`
 /// when the op is bufferized inplace.
 /// Return null if no such result exists.
-static OpResult getInplaceableOpResult(SubTensorInsertOp op,
-                                       OpOperand &opOperand) {
+static OpResult getInplaceableOpResult(InsertSliceOp op, OpOperand &opOperand) {
  if (opOperand.get() != op.dest())
    return OpResult();
  return op->getResult(0);
@ -357,12 +356,12 @@ static OpResult getInplaceableOpResult(OpOperand &opOperand) {
        // Ops that perform destructive updates on operand(s) to produce
        // result(s).
        .Case<LinalgOp,
-              SubTensorInsertOp,
+              InsertSliceOp,
              VectorTransferOpInterface>(
            [&](auto op) { return getInplaceableOpResult(op, opOperand); })
-        // SubTensorOp is special, when bufferized inplace it just returns an
+        // ExtractSliceOp is special, when bufferized inplace it just returns an
        // alias to its operand. Its result is never inplaceable on its operand.
-        .Case([&](SubTensorOp op) { return OpResult(); })
+        .Case([&](ExtractSliceOp op) { return OpResult(); })
        // Other ops.
        .Default([&](Operation *op) { return OpResult(); });
  // clang-format on
@ -380,10 +379,10 @@ static Optional<OpResult> getAliasingOpResult(OpOperand &opOperand) {
  return TypeSwitch<Operation *, OpResult>(opOperand.getOwner())
      // ReturnOp has no result.
      .Case([&](ReturnOp op) { return OpResult(); })
-      // SubTensorOp is different: its result is not inplaceable on op.source
+      // ExtractSliceOp is different: its result is not inplaceable on op.source
      // but when bufferized inplace, the result is an aliasing subregion of
      // op.source.
-      .Case([&](SubTensorOp op) { return op->getResult(0); })
+      .Case([&](ExtractSliceOp op) { return op->getResult(0); })
      .Default(
          [&](Operation *op) { return getInplaceableOpResult(opOperand); });
 }
@ -395,8 +394,9 @@ static bool bufferizesToMemoryRead(OpOperand &opOperand) {
  // it. Conservatively return true.
  if (!maybeOpResult)
    return true;
-  // SubTensorOp alone doesn't bufferize to a memory read, one of its uses may.
-  if (isa<SubTensorOp>(opOperand.getOwner()))
+  // ExtractSliceOp alone doesn't bufferize to a memory read, one of its uses
+  // may.
+  if (isa<ExtractSliceOp>(opOperand.getOwner()))
    return false;
  if (auto linalgOp = dyn_cast<LinalgOp>(opOperand.getOwner()))
    return linalgOp.isInputTensor(&opOperand) ||
@ -425,8 +425,9 @@ bufferizesToMemoryWrite(OpOperand &opOperand,
  // A ReturnOp is not a write.
  if (isa<ReturnOp>(opOperand.getOwner()))
    return false;
-  // SubTensorOp alone doesn't bufferize to a memory write, one of its uses may.
-  if (maybeOpResult->getDefiningOp<SubTensorOp>())
+  // ExtractSliceOp alone doesn't bufferize to a memory write, one of its uses
+  // may.
+  if (maybeOpResult->getDefiningOp<ExtractSliceOp>())
    return false;
  // If we have a matching OpResult, this is a write.
  // Additionally allow to restrict to only inPlace write, if so specified.
@ -442,10 +443,10 @@ namespace {

 /// The BufferizationAliasInfo class maintains a list of buffer aliases and
 /// equivalence classes to support bufferization.
-/// SubTensorOps have special behavior, they act as a level of indirection for
-/// bufferization. They don't create reads or writes themselves and analysis
+/// ExtractSliceOps have special behavior, they act as a level of indirection
+/// for bufferization. They don't create reads or writes themselves and analysis
 /// needs to look through their uses.
-/// SubTensorOp + SubTensorInsertOp have special joint behavior: they may
+/// ExtractSliceOp + InsertSliceOp have special joint behavior: they may
 /// bufferize to the same buffer (i.e. subview), which is what introduces the
 /// need for bufferization classes.
 /// Some of these functionalities could be refactored in a Bufferizer class that
@ -469,7 +470,7 @@ public:

  /// Return true if the buffer to which `operand` would bufferize is equivalent
  /// to some use that would bufferize to a write to a buffer.
-  bool aliasesInPlaceWrite(SubTensorOp subTensorOp) const;
+  bool aliasesInPlaceWrite(ExtractSliceOp extractSliceOp) const;

  /// Merge result's and operand's aliasing sets and iterate to a fixed point.
  void bufferizeInPlace(OpResult result, OpOperand &operand,
@ -495,10 +496,10 @@ public:
  bool existsNonDominatingRead(OpOperand &opOperand,
                               const DominanceInfo &domInfo) const;

-  /// Return true if the source of a `subTensorInsertOp` bufferizes to an
-  /// equivalent SubTensorOp.
-  bool isSourceEquivalentToAMatchingSubTensorOp(
-      SubTensorInsertOp subTensorInsertOp) const;
+  /// Return true if the source of a `insertSliceOp` bufferizes to an
+  /// equivalent ExtractSliceOp.
+  bool isSourceEquivalentToAMatchingExtractSliceOp(
+      InsertSliceOp insertSliceOp) const;

  /// Print to `os`.
  void print(raw_ostream &os) const;
@ -519,13 +520,13 @@ private:
  /// Iteratively merge alias sets until a fixed-point.
  void mergeAliasesToFixedPoint();

-  /// Return true if the (SubTensorOp, SubTensorInsertOp) pair match (i.e.
+  /// Return true if the (ExtractSliceOp, InsertSliceOp) pair match (i.e.
  /// equivalent operand / result and same offset/sizes/strides specification).
  ///
  /// This is one particular type of relationship between ops on tensors that
  /// reduce to an equivalence on buffers. This should be generalized and
  /// exposed as interfaces on the proper types.
-  bool areEquivalentSubTensorOps(SubTensorOp st, SubTensorInsertOp sti) const;
+  bool areEquivalentExtractSliceOps(ExtractSliceOp st, InsertSliceOp sti) const;

  /// Return true if there is a `candidateOp` that would write to memory after
  /// bufferization and such that:
@ -658,10 +659,10 @@ bool BufferizationAliasInfo::aliasesNonWriteableBuffer(
 /// Return true if the buffer to which `operand` would bufferize is equivalent
 /// to some use that would bufferize to a write to a buffer.
 bool BufferizationAliasInfo::aliasesInPlaceWrite(
-    SubTensorOp subTensorOp) const {
+    ExtractSliceOp extractSliceOp) const {
  LDBG("----Start aliasesInPlaceWrite\n");
-  LDBG("-------for op: " << *subTensorOp.getOperation() << '\n');
-  for (Value v : getAliasInfoRef(subTensorOp.result())) {
+  LDBG("-------for op: " << *extractSliceOp.getOperation() << '\n');
+  for (Value v : getAliasInfoRef(extractSliceOp.result())) {
    for (auto &use : v.getUses()) {
      if (bufferizesToMemoryWrite(use, InPlaceSpec::True)) {
        LDBG("-----------wants to bufferize to inPlace write: "
@ -670,7 +671,7 @@ bool BufferizationAliasInfo::aliasesInPlaceWrite(
      }
    }
  }
-  LDBG("----------->subtensor does not alias an inplace write");
+  LDBG("----------->extract_slice does not alias an inplace write");
  return false;
 }

@ -796,16 +797,16 @@ bool BufferizationAliasInfo::existsNonDominatingRead(
  return false;
 }

-/// Return true if the source of a `subTensorInsertOp` bufferizes to an
-/// equivalent SubTensorOp.
-bool BufferizationAliasInfo::isSourceEquivalentToAMatchingSubTensorOp(
-    SubTensorInsertOp subTensorInsertOp) const {
-  auto leaderIt = equivalentInfo.findLeader(subTensorInsertOp.source());
+/// Return true if the source of a `insertSliceOp` bufferizes to an
+/// equivalent ExtractSliceOp.
+bool BufferizationAliasInfo::isSourceEquivalentToAMatchingExtractSliceOp(
+    InsertSliceOp insertSliceOp) const {
+  auto leaderIt = equivalentInfo.findLeader(insertSliceOp.source());
  for (auto mit = leaderIt, meit = equivalentInfo.member_end(); mit != meit;
       ++mit) {
-    if (areEquivalentSubTensorOps(
-            dyn_cast_or_null<SubTensorOp>(mit->v.getDefiningOp()),
-            subTensorInsertOp))
+    if (areEquivalentExtractSliceOps(
+            dyn_cast_or_null<ExtractSliceOp>(mit->v.getDefiningOp()),
+            insertSliceOp))
      return true;
  }
  return false;
@ -874,8 +875,8 @@ void BufferizationAliasInfo::mergeAliasesToFixedPoint() {
 /// This is one particular type of relationship between ops on tensors that
 /// reduce to an equivalence on buffers. This should be generalized and exposed
 /// as interfaces on the proper types.
-bool BufferizationAliasInfo::areEquivalentSubTensorOps(
-    SubTensorOp st, SubTensorInsertOp sti) const {
+bool BufferizationAliasInfo::areEquivalentExtractSliceOps(
+    ExtractSliceOp st, InsertSliceOp sti) const {
  if (!st || !sti)
    return false;
  if (!equivalentInfo.isEquivalent(st.source(), sti.dest()))
@ -950,47 +951,47 @@ bool BufferizationAliasInfo::isClobberedWriteBeforeRead(
    return false;
  }

-  // The case `opToBufferize` isa SubTensorOp is important enough that we look
-  // for it specifically. The key information to discover is whether the
-  // aliasing read or write come from a matching SubTensorInsertOp.
+  // The case `opToBufferize` isa ExtractSliceOp is important enough that we
+  // look for it specifically. The key information to discover is whether the
+  // aliasing read or write come from a matching InsertSliceOp.
  // Such a pattern is introduced by tiling and is the key inplace condition
  // not to miss.
-  if (auto subTensorOp = dyn_cast<SubTensorOp>(opToBufferize)) {
-    if (auto subTensorInsertOp = dyn_cast<SubTensorInsertOp>(aliasingReadOp)) {
-      // %1 = subtensor %0[%offset_sizes_and_strides_1]
+  if (auto extractSliceOp = dyn_cast<ExtractSliceOp>(opToBufferize)) {
+    if (auto insertSliceOp = dyn_cast<InsertSliceOp>(aliasingReadOp)) {
+      // %1 = extract_slice %0[%offset_sizes_and_strides_1]
      //
      // ... // 0 or more of inplace compute that reduces to: %X is an
      //     // aliasingWrite equivalent to %1.
      // %W = inplace_write(%1)
      //
-      // // aliasingRead %Y in subtensor_insert
-      // ... = subtensor_insert %W into %R[%offset_sizes_and_strides_1]
-      if (aliasingRead.get() == subTensorInsertOp.dest() &&
+      // // aliasingRead %Y in insert_slice
+      // ... = insert_slice %W into %R[%offset_sizes_and_strides_1]
+      if (aliasingRead.get() == insertSliceOp.dest() &&
          // TODO: This is currently too restrictive and misses clobberings.
          // When available, use container-containee analysis: the condition
          // should be that the `aliasingWrite` is contained within
-          // `subTensorInsertOp.source()`.
+          // `insertSliceOp.source()`.
          equivalentInfo.isEquivalent(aliasingWrite.get(),
-                                      subTensorInsertOp.source()) &&
-          areEquivalentSubTensorOps(subTensorOp, subTensorInsertOp)) {
-        LDBG("---->clobbering matching subtensor/subtensor_insert\n");
+                                      insertSliceOp.source()) &&
+          areEquivalentExtractSliceOps(extractSliceOp, insertSliceOp)) {
+        LDBG("---->clobbering matching extract_slice/insert_slice\n");
        return true;
      }
-      // %1 = subtensor %0[%offset_sizes_and_strides_1]
+      // %1 = extract_slice %0[%offset_sizes_and_strides_1]
      //
      // ... // bunch of inplace ops that reduce to %X, equivalent to %1.
      // %X = inplace_write(%1)
      //
-      // // aliasingRead %X in subtensor_insert
-      // // aliasingWrite %Y in subtensor_insert
-      // ... = subtensor_insert %X into %Y[%offset_sizes_and_strides_1]
+      // // aliasingRead %X in insert_slice
+      // // aliasingWrite %Y in insert_slice
+      // ... = insert_slice %X into %Y[%offset_sizes_and_strides_1]
      if (aliasingReadOp == aliasingWriteOp) {
-        assert(aliasingRead.get() == subTensorInsertOp.source() &&
-               "expected read to source of subtensor_insert");
-        assert(aliasingWrite.get() == subTensorInsertOp.dest() &&
-               "expected write to dest of subtensor_insert");
-        if (areEquivalentSubTensorOps(subTensorOp, subTensorInsertOp)) {
-          LDBG("---->clobbering matching subtensor/subtensor_insert\n");
+        assert(aliasingRead.get() == insertSliceOp.source() &&
+               "expected read to source of insert_slice");
+        assert(aliasingWrite.get() == insertSliceOp.dest() &&
+               "expected write to dest of insert_slice");
+        if (areEquivalentExtractSliceOps(extractSliceOp, insertSliceOp)) {
+          LDBG("---->clobbering matching extract_slice/insert_slice\n");
          return true;
        }
      }
@ -1262,114 +1263,114 @@ static LogicalResult bufferize(OpBuilder &b, ReturnOp returnOp,
  return success();
 }

-/// Bufferize SubTensorOp to subview with optional alloc + copy depending on
+/// Bufferize ExtractSliceOp to subview with optional alloc + copy depending on
 /// whether or not it is marked inplaceable.
-/// Note that `getInplaceableOpResult` on a SubTensorOp always returns null.
-/// As consequence a SubTensorOp always alloc + copy when taken in
+/// Note that `getInplaceableOpResult` on a ExtractSliceOp always returns null.
+/// As consequence a ExtractSliceOp always alloc + copy when taken in
 /// isolation.
-static LogicalResult bufferize(OpBuilder &b, SubTensorOp subTensorOp,
+static LogicalResult bufferize(OpBuilder &b, ExtractSliceOp extractSliceOp,
                               BlockAndValueMapping &bvm,
                               const BufferizationAliasInfo &aliasInfo) {
-  LDBG("bufferize: " << *subTensorOp << '\n');
+  LDBG("bufferize: " << *extractSliceOp << '\n');

  // Take a guard before anything else.
  OpBuilder::InsertionGuard g(b);
-  b.setInsertionPoint(subTensorOp);
+  b.setInsertionPoint(extractSliceOp);

-  Location loc = subTensorOp.getLoc();
+  Location loc = extractSliceOp.getLoc();
  // Bail if source was not bufferized.
-  Value srcMemref = lookup(bvm, subTensorOp.source());
+  Value srcMemref = lookup(bvm, extractSliceOp.source());
  if (!srcMemref)
    return failure();
  auto srcMemrefType = srcMemref.getType().cast<MemRefType>();
-  auto dstTensorType = subTensorOp.result().getType().cast<RankedTensorType>();
+  auto dstTensorType =
+      extractSliceOp.result().getType().cast<RankedTensorType>();

  // If not inplaceable, alloc.
  Value alloc;
-  auto inPlace = getInPlace(subTensorOp->getResult(0));
+  auto inPlace = getInPlace(extractSliceOp->getResult(0));
  if (inPlace != InPlaceSpec::True) {
-    alloc =
-        createNewAllocDeallocPairForShapedValue(b, loc, subTensorOp.result());
+    alloc = createNewAllocDeallocPairForShapedValue(b, loc,
+                                                    extractSliceOp.result());
    b.setInsertionPointAfter(alloc.getDefiningOp());
  }

  // Bufferize to subview.
  auto subviewMemRefType =
      memref::SubViewOp::inferRankReducedResultType(
-          dstTensorType.getRank(), srcMemrefType, subTensorOp.getMixedOffsets(),
-          subTensorOp.getMixedSizes(), subTensorOp.getMixedStrides())
+          dstTensorType.getRank(), srcMemrefType,
+          extractSliceOp.getMixedOffsets(), extractSliceOp.getMixedSizes(),
+          extractSliceOp.getMixedStrides())
          .cast<MemRefType>();
  Value subView = b.create<memref::SubViewOp>(
-      loc, subviewMemRefType, srcMemref, subTensorOp.getMixedOffsets(),
-      subTensorOp.getMixedSizes(), subTensorOp.getMixedStrides());
+      loc, subviewMemRefType, srcMemref, extractSliceOp.getMixedOffsets(),
+      extractSliceOp.getMixedSizes(), extractSliceOp.getMixedStrides());

  /// If not inplaceable, copy.
  if (alloc) {
-    b.create<CopyOp>(subTensorOp.getLoc(), subView, alloc);
+    b.create<CopyOp>(extractSliceOp.getLoc(), subView, alloc);
    subView = alloc;
  }

-  map(bvm, subTensorOp.result(), subView);
+  map(bvm, extractSliceOp.result(), subView);
  return success();
 }

-static LogicalResult bufferize(OpBuilder &b,
-                               SubTensorInsertOp subTensorInsertOp,
+static LogicalResult bufferize(OpBuilder &b, InsertSliceOp insertSliceOp,
                               BlockAndValueMapping &bvm,
                               const BufferizationAliasInfo &aliasInfo) {
-  LDBG("bufferize: " << *subTensorInsertOp << '\n');
+  LDBG("bufferize: " << *insertSliceOp << '\n');

  // Take a guard before anything else.
  OpBuilder::InsertionGuard g(b);
-  b.setInsertionPoint(subTensorInsertOp);
-  Location loc = subTensorInsertOp.getLoc();
+  b.setInsertionPoint(insertSliceOp);
+  Location loc = insertSliceOp.getLoc();

-  Value dstMemref = lookup(bvm, subTensorInsertOp.dest());
+  Value dstMemref = lookup(bvm, insertSliceOp.dest());
  if (!dstMemref)
    return failure();
-  auto inPlace = getInPlace(subTensorInsertOp->getResult(0));
+  auto inPlace = getInPlace(insertSliceOp->getResult(0));
  if (inPlace != InPlaceSpec::True) {
-    // Since subtensor_insert arise from tiling and introducing loops, this
+    // Since insert_slice arise from tiling and introducing loops, this
    // case is generally a deal breaker. When used with loops, this ends up
    // cloning the whole tensor on every single iteration and is a symptom
    // of a catastrophically bad scheduling decision.
    // TODO: be very loud about it or even consider failing the pass.
-    Value newDstMemref = createNewAllocDeallocPairForShapedValue(
-        b, loc, subTensorInsertOp.result());
+    Value newDstMemref =
+        createNewAllocDeallocPairForShapedValue(b, loc, insertSliceOp.result());
    b.setInsertionPointAfter(newDstMemref.getDefiningOp());
-    b.create<CopyOp>(subTensorInsertOp.getLoc(), dstMemref, newDstMemref);
+    b.create<CopyOp>(insertSliceOp.getLoc(), dstMemref, newDstMemref);
    dstMemref = newDstMemref;
  }
  auto dstMemrefType = dstMemref.getType().cast<MemRefType>();

-  Value srcMemref = lookup(bvm, subTensorInsertOp.source());
+  Value srcMemref = lookup(bvm, insertSliceOp.source());
  if (!srcMemref)
    return failure();
  auto subviewMemRefType =
      memref::SubViewOp::inferRankReducedResultType(
-          subTensorInsertOp.getSourceType().getRank(), dstMemrefType,
-          subTensorInsertOp.getMixedOffsets(),
-          subTensorInsertOp.getMixedSizes(),
-          subTensorInsertOp.getMixedStrides())
+          insertSliceOp.getSourceType().getRank(), dstMemrefType,
+          insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(),
+          insertSliceOp.getMixedStrides())
          .cast<MemRefType>();

  // A copy of the source buffer is needed if either:
  //   - The producer of `source` is not inplace. This is the case where a
-  //     subtensor is computed out of place into the inplace full tensor.
+  //     slice is computed out of place into the inplace full tensor.
  //   - The result is not inplace. This is the case where the whole tensor is
  //     cloned and the clone needs to be updated.
-  if (!aliasInfo.isSourceEquivalentToAMatchingSubTensorOp(subTensorInsertOp) ||
+  if (!aliasInfo.isSourceEquivalentToAMatchingExtractSliceOp(insertSliceOp) ||
      inPlace != InPlaceSpec::True) {
-    LDBG("subtensor_insert needs extra source copy: "
-         << subTensorInsertOp.source() << " -> copy\n");
+    LDBG("insert_slice needs extra source copy: " << insertSliceOp.source()
+                                                  << " -> copy\n");
    // Take a subview of the dst.
    Value subView = b.create<memref::SubViewOp>(
-        loc, subviewMemRefType, dstMemref, subTensorInsertOp.getMixedOffsets(),
-        subTensorInsertOp.getMixedSizes(), subTensorInsertOp.getMixedStrides());
-    b.create<CopyOp>(subTensorInsertOp.getLoc(), srcMemref, subView);
+        loc, subviewMemRefType, dstMemref, insertSliceOp.getMixedOffsets(),
+        insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides());
+    b.create<CopyOp>(insertSliceOp.getLoc(), srcMemref, subView);
  }

-  map(bvm, subTensorInsertOp.result(), dstMemref);
+  map(bvm, insertSliceOp.result(), dstMemref);

  return success();
 }
@ -1433,54 +1434,54 @@ static LogicalResult bufferize(OpBuilder &b, VectorTransferOpInterface op,
 //===----------------------------------------------------------------------===//

 ///
-/// Rationale for bufferizing `%1 = subtensor %0[...]` inplace.
+/// Rationale for bufferizing `%1 = tensor.extract_slice %0[...]` inplace.
 /// ===========================================================
 ///
-/// When bufferized out of place, a SubTensorOp lowers to alloc + copy. This
+/// When bufferized out of place, a ExtractSlice lowers to alloc + copy. This
 /// cannot change the flow of information for either the source or the
 /// result buffers.
 ///
-/// When bufferized inplace, a SubTensorOp does not by itself create any read or
-/// write from memory. Instead, it has the effect of merging the alias sets of
-/// the source and the result buffers.
+/// When bufferized inplace, a ExtractSliceOp does not by itself create any read
+/// or write from memory. Instead, it has the effect of merging the alias sets
+/// of the source and the result buffers.
 ///
 /// An analysis is required to ensure inplace bufferization would not result in
 /// RaW dependence violations.
-static void bufferizableInPlaceAnalysis(SubTensorOp subTensorOp,
+static void bufferizableInPlaceAnalysis(ExtractSliceOp extractSliceOp,
                                        BufferizationAliasInfo &aliasInfo,
                                        const DominanceInfo &domInfo) {
  LDBG('\n');
-  LDBG("Try to bufferize subtensor inplace: " << *subTensorOp << '\n');
+  LDBG("Try to bufferize extract_slice inplace: " << *extractSliceOp << '\n');

-  // If `subTensorOp` were to be bufferized inplace, it cannot end up
+  // If `extractSliceOp` were to be bufferized inplace, it cannot end up
  // aliasing a write into a non-writeable buffer.
  bool wouldCreateAliasingWriteToNonWriteableBuffer =
-      aliasInfo.aliasesInPlaceWrite(subTensorOp) &&
-      aliasInfo.aliasesNonWriteableBuffer(subTensorOp->getOpOperand(0));
+      aliasInfo.aliasesInPlaceWrite(extractSliceOp) &&
+      aliasInfo.aliasesNonWriteableBuffer(extractSliceOp->getOpOperand(0));

  if (wouldCreateAliasingWriteToNonWriteableBuffer)
    LDBG("->the corresponding buffer is not writeable\n");
  LDBG("->bufferizes to writeable inplace buffer\n");

-  // In any of subTensorOp.result's aliases, can we find 2 such that we hit
+  // In any of extractSliceOp.result's aliases, can we find 2 such that we hit
  // an interfering write?
-  Value s = subTensorOp.source(), r = subTensorOp.result();
+  Value s = extractSliceOp.source(), r = extractSliceOp.result();
  bool foundInterference = wouldCreateAliasingWriteToNonWriteableBuffer ||
                           // Do not consider (s, s) and (r, r) as all the
                           // aliasings already exist by construction; we are
                           // interested in new interfering aliases only.
                           aliasInfo.wouldCreateReadAfterWriteInterference(
-                               s, r, subTensorOp, domInfo) ||
+                               s, r, extractSliceOp, domInfo) ||
                           aliasInfo.wouldCreateReadAfterWriteInterference(
-                               r, s, subTensorOp, domInfo);
+                               r, s, extractSliceOp, domInfo);
  if (foundInterference) {
-    setInPlaceOpResult(subTensorOp->getResult(0), InPlaceSpec::False);
+    setInPlaceOpResult(extractSliceOp->getResult(0), InPlaceSpec::False);
  } else {
-    setInPlaceOpResult(subTensorOp->getResult(0), InPlaceSpec::True);
-    aliasInfo.bufferizeInPlace(subTensorOp->getResult(0),
-                               subTensorOp->getOpOperand(0));
+    setInPlaceOpResult(extractSliceOp->getResult(0), InPlaceSpec::True);
+    aliasInfo.bufferizeInPlace(extractSliceOp->getResult(0),
+                               extractSliceOp->getOpOperand(0));
  }
-  LDBG("Done bufferizing subtensor\n");
+  LDBG("Done bufferizing extract_slice\n");
 }

 /// Analyze the (opOperand, result) pair to determine whether the result can
@ -1490,8 +1491,8 @@ static void bufferizableInPlaceAnalysis(OpOperand &operand, OpResult result,
                                        BufferizationAliasInfo &aliasInfo,
                                        const DominanceInfo &domInfo) {
  Operation *op = result.getDefiningOp();
-  assert(result && !isa<SubTensorOp>(op) &&
-         "expected OpResult not coming from a SubTensorOp");
+  assert(result && !isa<ExtractSliceOp>(op) &&
+         "expected OpResult not coming from a ExtractSliceOp");

  int64_t resultNumber = result.getResultNumber();
  (void)resultNumber;
@ -1541,48 +1542,47 @@ static void inPlaceAnalysisFuncOpInternals(FuncOp funcOp,
         "expected a funcOp definition with a body");

  // Collect ops so we can build our own traversal.
-  SmallVector<SubTensorOp> subTensorOps;
-  SmallVector<SubTensorInsertOp> subTensorInsertOps;
-  SmallVector<Operation *> nonSubTensorOps;
+  SmallVector<ExtractSliceOp> extractSliceOps;
+  SmallVector<InsertSliceOp> insertSliceOps;
+  SmallVector<Operation *> nonSliceOps;
  funcOp.walk([&](Operation *op) {
-    if (auto subTensorOp = dyn_cast<SubTensorOp>(op))
-      return subTensorOps.push_back(subTensorOp);
-    if (auto subTensorInsertOp = dyn_cast<SubTensorInsertOp>(op))
-      return subTensorInsertOps.push_back(subTensorInsertOp);
+    if (auto extractSliceOp = dyn_cast<ExtractSliceOp>(op))
+      return extractSliceOps.push_back(extractSliceOp);
+    if (auto insertSliceOp = dyn_cast<InsertSliceOp>(op))
+      return insertSliceOps.push_back(insertSliceOp);
    auto isaTensor = [](Type t) { return t.isa<TensorType>(); };
    // No tensors => no buffers.
    if (none_of(op->getOperandTypes(), isaTensor) &&
        none_of(op->getResultTypes(), isaTensor))
      return;
-    nonSubTensorOps.push_back(op);
+    nonSliceOps.push_back(op);
  });

-  // Bufferize SubTensorInsertOp greedily: we almost never want to bufferize
+  // Bufferize InsertSliceOp greedily: we almost never want to bufferize
  // the tensor "inserted into" to become out-of-place. This implementation
-  // does not distinguish between different SubTensorInsertOps. If we want
-  // finer-grained behavior, we could order the SubTensorInsertOps with some
-  // metric.
-  // Walk SubTensorInsertOps in reverse for better interference behavior.
-  for (SubTensorInsertOp subTensorInsertOp : reverse(subTensorInsertOps)) {
-    OpOperand &destOpOperand = subTensorInsertOp->getOpOperand(1);
+  // does not distinguish between different InsertSliceOp. If we want
+  // finer-grained behavior, we could order the InsertSliceOp with some metric.
+  // Walk InsertSliceOp in reverse for better interference behavior.
+  for (InsertSliceOp insertSliceOp : reverse(insertSliceOps)) {
+    OpOperand &destOpOperand = insertSliceOp->getOpOperand(1);
    bufferizableInPlaceAnalysis(destOpOperand,
                                getInplaceableOpResult(destOpOperand),
                                aliasInfo, domInfo);
  }

-  // Bufferize all ops except SubTensorOp and SubTensorInsertOp which are
-  // handled separately.
+  // Bufferize all ops except ExtractSliceOp and InsertSliceOp which are handled
+  // separately.
  // Walk other ops in reverse for better interference behavior.
-  for (Operation *op : reverse(nonSubTensorOps))
+  for (Operation *op : reverse(nonSliceOps))
    for (OpOperand &opOperand : op->getOpOperands())
      if (OpResult result = getInplaceableOpResult(opOperand))
        bufferizableInPlaceAnalysis(opOperand, result, aliasInfo, domInfo);

-  // Finally, bufferize SubTensorOp.
-  // Walk SubTensorOps in reverse for better clobbering behavior: it is easier
-  // to detect clobbers of smaller subtensors before larger ones.
-  for (SubTensorOp subTensorOp : reverse(subTensorOps))
-    bufferizableInPlaceAnalysis(subTensorOp, aliasInfo, domInfo);
+  // Finally, bufferize ExtractSliceOp.
+  // Walk ExtractSliceOps in reverse for better clobbering behavior: it is
+  // easier to detect clobbers of smaller slices before larger ones.
+  for (ExtractSliceOp extractSliceOp : reverse(extractSliceOps))
+    bufferizableInPlaceAnalysis(extractSliceOp, aliasInfo, domInfo);

  LDBG("End InPlaceAnalysisFuncOpInternals:\n" << funcOp << '\n');
 }
@ -1611,8 +1611,8 @@ bufferizeFuncOpInternals(FuncOp funcOp, BlockAndValueMapping &bvm,
            .Case<memref::DimOp,
                  LinalgOp,
                  ReturnOp,
-                  SubTensorOp,
-                  SubTensorInsertOp,
+                  ExtractSliceOp,
+                  InsertSliceOp,
                  VectorTransferOpInterface>(
                [&](auto op) { return bufferize(b, op, bvm, aliasInfo); })
            // clang-format on
--- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
@ -18,6 +18,7 @@
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BuiltinTypes.h"
@ -457,8 +458,8 @@ struct ReplaceUnitExtents : public OpRewritePattern<GenericOp> {
 };
 } // namespace

-/// Get the reassociation maps to fold the result of a subtensor (or source of a
-/// subtensor_insert) operation with given offsets, and sizes to its
+/// Get the reassociation maps to fold the result of a extract_slice (or source
+/// of a insert_slice) operation with given offsets, and sizes to its
 /// rank-reduced version. This is only done for the cases where the size is 1
 /// and offset is 0. Strictly speaking the offset 0 is not required in general,
 /// but non-zero offsets are not handled by SPIR-V backend at this point (and
@ -486,41 +487,41 @@ getReassociationMapForFoldingUnitDims(ArrayRef<OpFoldResult> mixedSizes) {
 }

 namespace {
-/// Convert `subtensor` operations to rank-reduced versions.
-struct UseRankReducedSubTensorOp : public OpRewritePattern<SubTensorOp> {
-  using OpRewritePattern<SubTensorOp>::OpRewritePattern;
+/// Convert `extract_slice` operations to rank-reduced versions.
+struct UseRankReducedExtractSliceOp
+    : public OpRewritePattern<tensor::ExtractSliceOp> {
+  using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;

-  LogicalResult matchAndRewrite(SubTensorOp subTensorOp,
+  LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,
                                PatternRewriter &rewriter) const override {
-    RankedTensorType resultType = subTensorOp.getType();
-    SmallVector<OpFoldResult> offsets = subTensorOp.getMixedOffsets();
-    SmallVector<OpFoldResult> sizes = subTensorOp.getMixedSizes();
-    SmallVector<OpFoldResult> strides = subTensorOp.getMixedStrides();
+    RankedTensorType resultType = sliceOp.getType();
+    SmallVector<OpFoldResult> offsets = sliceOp.getMixedOffsets();
+    SmallVector<OpFoldResult> sizes = sliceOp.getMixedSizes();
+    SmallVector<OpFoldResult> strides = sliceOp.getMixedStrides();
    auto reassociation = getReassociationMapForFoldingUnitDims(sizes);
    if (!reassociation ||
        reassociation->size() == static_cast<size_t>(resultType.getRank()))
      return failure();
-    auto rankReducedType =
-        SubTensorOp::inferRankReducedResultType(reassociation->size(),
-                                                subTensorOp.getSourceType(),
-                                                offsets, sizes, strides)
-            .cast<RankedTensorType>();
+    auto rankReducedType = tensor::ExtractSliceOp::inferRankReducedResultType(
+                               reassociation->size(), sliceOp.getSourceType(),
+                               offsets, sizes, strides)
+                               .cast<RankedTensorType>();

-    Location loc = subTensorOp.getLoc();
-    Value newSubTensor = rewriter.create<SubTensorOp>(
-        loc, rankReducedType, subTensorOp.source(), offsets, sizes, strides);
-    rewriter.replaceOpWithNewOp<TensorExpandShapeOp>(
-        subTensorOp, resultType, newSubTensor, *reassociation);
+    Location loc = sliceOp.getLoc();
+    Value newSlice = rewriter.create<tensor::ExtractSliceOp>(
+        loc, rankReducedType, sliceOp.source(), offsets, sizes, strides);
+    rewriter.replaceOpWithNewOp<TensorExpandShapeOp>(sliceOp, resultType,
+                                                     newSlice, *reassociation);
    return success();
  }
 };

-/// Convert `subtensor_insert` operations to rank-reduced versions.
-struct UseRankReducedSubTensorInsertOp
-    : public OpRewritePattern<SubTensorInsertOp> {
-  using OpRewritePattern<SubTensorInsertOp>::OpRewritePattern;
+/// Convert `insert_slice` operations to rank-reduced versions.
+struct UseRankReducedInsertSliceOp
+    : public OpRewritePattern<tensor::InsertSliceOp> {
+  using OpRewritePattern<tensor::InsertSliceOp>::OpRewritePattern;

-  LogicalResult matchAndRewrite(SubTensorInsertOp insertOp,
+  LogicalResult matchAndRewrite(tensor::InsertSliceOp insertOp,
                                PatternRewriter &rewriter) const override {
    RankedTensorType sourceType = insertOp.getSourceType();
    SmallVector<OpFoldResult> offsets = insertOp.getMixedOffsets();
@ -533,7 +534,7 @@ struct UseRankReducedSubTensorInsertOp
    Location loc = insertOp.getLoc();
    auto reshapedSource = rewriter.create<TensorCollapseShapeOp>(
        loc, insertOp.source(), *reassociation);
-    rewriter.replaceOpWithNewOp<SubTensorInsertOp>(
+    rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(
        insertOp, reshapedSource, insertOp.dest(), insertOp.getMixedOffsets(),
        insertOp.getMixedSizes(), insertOp.getMixedStrides());
    return success();
@ -546,8 +547,9 @@ struct UseRankReducedSubTensorInsertOp
 void mlir::linalg::populateFoldUnitExtentDimsPatterns(
    RewritePatternSet &patterns) {
  auto *context = patterns.getContext();
-  patterns.add<FoldUnitDimLoops, ReplaceUnitExtents, UseRankReducedSubTensorOp,
-               UseRankReducedSubTensorInsertOp>(context);
+  patterns.add<FoldUnitDimLoops, ReplaceUnitExtents,
+               UseRankReducedExtractSliceOp, UseRankReducedInsertSliceOp>(
+      context);
  TensorCollapseShapeOp::getCanonicalizationPatterns(patterns, context);
  TensorExpandShapeOp::getCanonicalizationPatterns(patterns, context);
 }
--- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
@ -48,8 +48,8 @@ using llvm::dbgs;
 ///      are 2 cases:
 ///      a) buffer case: use the SSA value of the views and a simple alias
 ///         analysis on subview ops to determine producer-consumer dependences;
-///      b) tensor case: use SSA use-def chains on subtensor ops;
-///   2. greedily fuse the linalg ops that produce the subview/subtensor.
+///      b) tensor case: use SSA use-def chains on extract_slice ops;
+///   2. greedily fuse the linalg ops that produce the subview/extract_slice.
 ///   3. inspect the fused ops and determine whether they have other remaining
 ///      LinalgOp uses. If not, then erase the original producing linalg op.
 ///
@ -73,13 +73,14 @@ getShapeDefiningLoopRange(LinalgOp op, unsigned loopDepth,
  // Extract the subranges from the linearized ranges.
  for (OpOperand *opOperand : op.getInputAndOutputOperands()) {
    // The method `getRangeFromOperandShape` requires using SubViewOp or
-    // SubTensorOps. If the value isnt defined from there continue.
+    // ExtractSliceOps. If the value isn't defined from there continue.
    // todo: The method should be adapted to get the values from
    // `ViewInterface`. The interface needs a `getOrCreateRanges` method which
    // currently returns a `linalg.range`. The fix here is to move this op to
    // `std` dialect and add the method to `ViewInterface`.
-    if (fromSubViewOpOnly && !isa_and_nonnull<memref::SubViewOp, SubTensorOp>(
-                                 opOperand->get().getDefiningOp()))
+    if (fromSubViewOpOnly &&
+        !isa_and_nonnull<memref::SubViewOp, tensor::ExtractSliceOp>(
+            opOperand->get().getDefiningOp()))
      continue;

    AffineMap map = op.getTiedIndexingMap(opOperand);
@ -221,7 +222,7 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
    SmallVector<int64_t, 4> staticSizesVector(rank, ShapedType::kDynamicSize);
    SmallVector<int64_t, 4> staticStridesVector(
        rank, ShapedType::kDynamicStrideOrOffset);
-    resultTypes.push_back(SubTensorOp::inferResultType(
+    resultTypes.push_back(tensor::ExtractSliceOp::inferResultType(
        t.cast<RankedTensorType>(), staticOffsetsVector, staticSizesVector,
        staticStridesVector));
  }
@ -252,15 +253,15 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
 }

 /// Get the loop range for a dimension `dim` based on the `shapedOperand`. It is
-/// expected to be defined by a subview op or a subtensor op.
+/// expected to be defined by a subview op or an extract_slice op.
 static Range getRangeFromOperandShape(OpBuilder &b, Location loc,
                                      Value shapedOperand, unsigned dim) {
  Operation *shapeProducingOp = shapedOperand.getDefiningOp();
  if (auto subViewOp = dyn_cast<memref::SubViewOp>(shapeProducingOp))
    return subViewOp.getOrCreateRanges(b, loc)[dim];
-  if (auto subTensorOp = dyn_cast<SubTensorOp>(shapeProducingOp))
-    return subTensorOp.getOrCreateRanges(b, loc)[dim];
-  llvm_unreachable("SubviewOp or SubTensorOp expected");
+  if (auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(shapeProducingOp))
+    return sliceOp.getOrCreateRanges(b, loc)[dim];
+  llvm_unreachable("SubviewOp or ExtractSliceOp expected");
 }

 /// Fuses the producer into the loop immediately enclosing the consumer.
@ -439,8 +440,8 @@ mlir::linalg::fuseProducerOfBuffer(OpBuilder &b, OpOperand &consumerOpOperand,
  if (!producerMap)
    return llvm::None;

-  // Must be a subview or a slice to guarantee there are loops we can fuse
-  // into.
+  // Must be a subview or an extract_slice to guarantee there are loops we can
+  // fuse into.
  auto subView = consumerOpOperand.get().getDefiningOp<memref::SubViewOp>();
  if (!subView) {
    LLVM_DEBUG(llvm::dbgs() << "\nNot fusable (not a subview)");
@ -473,8 +474,8 @@ static void getProducerOfTensor(Value tensor, OpResult &opResult) {
      opResult = tensor.cast<OpResult>();
      return;
    }
-    if (auto subTensorOp = tensor.getDefiningOp<SubTensorOp>()) {
-      tensor = subTensorOp.source();
+    if (auto sliceOp = tensor.getDefiningOp<tensor::ExtractSliceOp>()) {
+      tensor = sliceOp.source();
      continue;
    }
    if (auto blockArg = tensor.dyn_cast<BlockArgument>()) {
@ -512,11 +513,11 @@ mlir::linalg::fuseProducerOfTensor(OpBuilder &b, OpResult producerOpResult,

  Value inputTensor = consumerOpOperand.get();

-  // Must be a subtensor to guarantee there are loops we can fuse into.
-  auto subTensor = inputTensor.getDefiningOp<SubTensorOp>();
-  if (!subTensor) {
+  // Must be an extract_slice op to guarantee there are loops we can fuse into.
+  auto sliceOp = inputTensor.getDefiningOp<tensor::ExtractSliceOp>();
+  if (!sliceOp) {
    LLVM_DEBUG(llvm::dbgs()
-               << "\nNot fusable, not a subtensor: " << inputTensor);
+               << "\nNot fusable, not an extract_slice op: " << inputTensor);
    return {};
  }

--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@ -19,6 +19,7 @@
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/SCF/Utils.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Dialect/Vector/VectorUtils.h"
 #include "mlir/IR/BuiltinOps.h"
@ -42,13 +43,13 @@ namespace {
 /// instructions that need to be hoisted too.
 struct HoistableWrite {
  vector::TransferWriteOp transferWriteOp;
-  SubTensorInsertOp subTensorInsertOp;
+  tensor::InsertSliceOp insertSliceOp;
 };
 /// Represents a unit of hoistable TransferReadOp. This may comprise other
 /// instructions that need to be hoisted too.
 struct HoistableRead {
  vector::TransferReadOp transferReadOp;
-  SubTensorOp subTensorOp;
+  tensor::ExtractSliceOp extractSliceOp;
 };
 } // namespace

@ -71,7 +72,8 @@ static bool isEqualOffsetSizeOrStride(OpFoldResult op1, OpFoldResult op2) {
 }

 /// Return true is all offsets, sizes and strides are equal.
-static bool sameOffsetsSizesAndStrides(SubTensorOp s, SubTensorInsertOp si) {
+static bool sameOffsetsSizesAndStrides(tensor::ExtractSliceOp s,
+                                       tensor::InsertSliceOp si) {
  if (s.static_offsets().size() != si.static_offsets().size())
    return false;
  if (s.static_sizes().size() != si.static_sizes().size())
@ -99,38 +101,37 @@ static HoistableRead findMatchingTransferRead(HoistableWrite write,

  LLVM_DEBUG(DBGS() << "findMatchingTransferRead for: "
                    << *write.transferWriteOp.getOperation() << "\n");
-  if (write.subTensorInsertOp)
-    LLVM_DEBUG(DBGS() << "findMatchingTransferRead subTensorInsertOp: "
-                      << *write.subTensorInsertOp.getOperation() << "\n");
+  if (write.insertSliceOp)
+    LLVM_DEBUG(DBGS() << "findMatchingTransferRead inserSliceOp: "
+                      << *write.insertSliceOp.getOperation() << "\n");

  for (Operation *user : srcTensor.getUsers()) {
    LLVM_DEBUG(DBGS() << "findMatchingTransferRead inspect user: " << *user
                      << "\n");

-    // If HoistableWrite involves a SubTensorInsertOp, we need to find a
-    // matching SubTensorOp.
-    SubTensorOp subTensorOp;
+    // If HoistableWrite involves a InsertSliceOp, we need to find a
+    // matching ExtractSliceOp.
+    tensor::ExtractSliceOp sliceOp;
    Operation *maybeTransferReadUser = user;
-    if (write.subTensorInsertOp) {
-      subTensorOp = dyn_cast<SubTensorOp>(user);
-      if (!subTensorOp || subTensorOp.getResult().getType() !=
-                              write.subTensorInsertOp.source().getType())
+    if (write.insertSliceOp) {
+      sliceOp = dyn_cast<tensor::ExtractSliceOp>(user);
+      if (!sliceOp || sliceOp.getResult().getType() !=
+                          write.insertSliceOp.source().getType())
        continue;

      LLVM_DEBUG(DBGS() << "check whether sameOffsetsSizesAndStrides: "
-                        << *subTensorOp << " vs " << *write.subTensorInsertOp
-                        << "\n");
-      if (!sameOffsetsSizesAndStrides(subTensorOp, write.subTensorInsertOp))
+                        << *sliceOp << " vs " << *write.insertSliceOp << "\n");
+      if (!sameOffsetsSizesAndStrides(sliceOp, write.insertSliceOp))
        continue;

      LLVM_DEBUG(DBGS() << "sameOffsetsSizesAndStrides: SUCCESS\n");
-      // If we got here, subTensorOp is hoistable iff it has exactly 2 uses:
+      // If we got here, sliceOp is hoistable iff it has exactly 2 uses:
      //   1. the transfer_write we want to hoist.
      //   2. a matching transfer_read.
      // Anything else, we skip.
      bool skip = false;
      Operation *otherUser = nullptr;
-      for (Operation *u : subTensorOp->getUsers()) {
+      for (Operation *u : sliceOp->getUsers()) {
        if (u == write.transferWriteOp)
          continue;
        if (otherUser) {
@ -149,7 +150,7 @@ static HoistableRead findMatchingTransferRead(HoistableWrite write,
    auto read = dyn_cast<vector::TransferReadOp>(maybeTransferReadUser);
    if (read && read.indices() == write.transferWriteOp.indices() &&
        read.getVectorType() == write.transferWriteOp.getVectorType())
-      return HoistableRead{read, subTensorOp};
+      return HoistableRead{read, sliceOp};
  }
  return HoistableRead();
 }
@ -168,13 +169,13 @@ static bool tensorChunkAccessedByUnknownOp(HoistableWrite write,
      Operation *user = use.getOwner();
      // Skip the candidate use, only inspect the "other" uses.
      if (user == candidateRead.transferReadOp ||
-          user == candidateRead.subTensorOp || user == write.transferWriteOp ||
-          user == write.subTensorInsertOp)
+          user == candidateRead.extractSliceOp ||
+          user == write.transferWriteOp || user == write.insertSliceOp)
        continue;
-      // Consider all transitive uses through a subtensor / subtensor_insert.
+      // Consider all transitive uses through a extract_slice / insert_slice.
      // TODO: atm we just bail because a stronger analysis is needed for these
      // cases.
-      if (isa<SubTensorOp, SubTensorInsertOp>(user))
+      if (isa<tensor::ExtractSliceOp, tensor::InsertSliceOp>(user))
        return true;
      // Consider all transitive uses through a vector.transfer_write.
      if (auto writeUser = dyn_cast<vector::TransferWriteOp>(user)) {
@ -214,7 +215,7 @@ static bool tensorChunkAccessedByUnknownOp(HoistableWrite write,

 /// Return the `forOp`-invariant HoistableWrite that produces `yieldOperand`.
 /// Return the null HoistableWrite() if it is not comprised of a
-/// vector.transfer_write + optional subtensor_insert or if any of the indexings
+/// vector.transfer_write + optional insert_slice or if any of the indexings
 /// is `forOp`-dependent.
 static HoistableWrite
 getLoopInvariantTransferWriteOpDefining(scf::ForOp forOp,
@ -229,26 +230,26 @@ getLoopInvariantTransferWriteOpDefining(scf::ForOp forOp,
    return HoistableWrite{write, nullptr};
  }

-  if (auto subTensorInsertOp = v.getDefiningOp<SubTensorInsertOp>()) {
-    // Inserted subTensor must come from vector.transfer_write.
+  if (auto insertSliceOp = v.getDefiningOp<tensor::InsertSliceOp>()) {
+    // Inserted slice must come from vector.transfer_write.
    auto write =
-        subTensorInsertOp.source().getDefiningOp<vector::TransferWriteOp>();
+        insertSliceOp.source().getDefiningOp<vector::TransferWriteOp>();
    if (!write)
      return HoistableWrite();

    // Tensor inserted into must be a BBArg at position matching yieldOperand's.
-    auto bbArg = subTensorInsertOp.dest().dyn_cast<BlockArgument>();
+    auto bbArg = insertSliceOp.dest().dyn_cast<BlockArgument>();
    if (!bbArg || bbArg.getOwner()->getParentOp() != forOp ||
        bbArg.getArgNumber() != /*num iv=*/1 + yieldOperand.getOperandNumber())
      return HoistableWrite();

    // Indexing inserted into must not depend on `forOp`.
-    for (Value operand : subTensorInsertOp->getOperands().drop_front(
-             SubTensorInsertOp::getOffsetSizeAndStrideStartOperandIndex()))
+    for (Value operand : insertSliceOp->getOperands().drop_front(
+             tensor::InsertSliceOp::getOffsetSizeAndStrideStartOperandIndex()))
      if (!forOp.isDefinedOutsideOfLoop(operand))
        return HoistableWrite();

-    return HoistableWrite{write, subTensorInsertOp};
+    return HoistableWrite{write, insertSliceOp};
  }

  return HoistableWrite();
@ -260,18 +261,18 @@ static void hoistReadWrite(HoistableRead read, HoistableWrite write,
  scf::ForOp forOp = cast<scf::ForOp>(tensorBBArg.getOwner()->getParentOp());
  assert(read.transferReadOp && write.transferWriteOp &&
         "expected transfer_read and transfer_write ops to be set");
-  assert(((read.subTensorOp && write.subTensorInsertOp) ||
-          (!read.subTensorOp && !write.subTensorInsertOp)) &&
-         "expected matching subtensor / subtensor_insert");
+  assert(((read.extractSliceOp && write.insertSliceOp) ||
+          (!read.extractSliceOp && !write.insertSliceOp)) &&
+         "expected matching extract_slice / insert_slice");
  LLVM_DEBUG(DBGS() << "In forOp:\n"
                    << *forOp.getOperation()
                    << "\nHoist: " << *read.transferReadOp.getOperation()
                    << "\nHoist: " << *write.transferWriteOp.getOperation()
                    << "\nInvolving: " << tensorBBArg << "\n");

-  // If a read subtensor is present, hoist it.
-  if (read.subTensorOp && failed(forOp.moveOutOfLoop({read.subTensorOp})))
-    llvm_unreachable("Unexpected failure moving subtensor out of loop");
+  // If a read slice is present, hoist it.
+  if (read.extractSliceOp && failed(forOp.moveOutOfLoop({read.extractSliceOp})))
+    llvm_unreachable("Unexpected failure moving extract_slice out of loop");

  // Hoist the transfer_read op.
  if (failed(forOp.moveOutOfLoop({read.transferReadOp})))
@ -282,20 +283,20 @@ static void hoistReadWrite(HoistableRead read, HoistableWrite write,
  unsigned initArgNumber = tensorBBArg.getArgNumber() - /*numIvs=*/1;

  // Update the source tensor.
-  if (read.subTensorOp)
-    read.subTensorOp.sourceMutable().assign(forOp.initArgs()[initArgNumber]);
+  if (read.extractSliceOp)
+    read.extractSliceOp.sourceMutable().assign(forOp.initArgs()[initArgNumber]);
  else
    read.transferReadOp.sourceMutable().assign(forOp.initArgs()[initArgNumber]);

  // Hoist write after.
-  if (write.subTensorInsertOp)
-    write.subTensorInsertOp->moveAfter(forOp);
+  if (write.insertSliceOp)
+    write.insertSliceOp->moveAfter(forOp);
  write.transferWriteOp->moveAfter(forOp);

  // Update the yield.
  auto yieldOp = cast<scf::YieldOp>(forOp.region().front().getTerminator());
-  if (write.subTensorInsertOp)
-    yieldOp->setOperand(initArgNumber, write.subTensorInsertOp.dest());
+  if (write.insertSliceOp)
+    yieldOp->setOperand(initArgNumber, write.insertSliceOp.dest());
  else
    yieldOp->setOperand(initArgNumber, write.transferWriteOp.source());

@ -306,13 +307,13 @@ static void hoistReadWrite(HoistableRead read, HoistableWrite write,
  // Transfer write has been hoisted, need to update the vector and tensor
  // source. Replace the result of the loop to use the new tensor created
  // outside the loop.
-  // Depending on whether a subtensor_insert is present or not, it carries the
+  // Depending on whether a insert_slice is present or not, it carries the
  // update on the tensor operands.
-  if (write.subTensorInsertOp) {
+  if (write.insertSliceOp) {
    newForOp.getResult(initArgNumber)
-        .replaceAllUsesWith(write.subTensorInsertOp.getResult());
-    write.transferWriteOp.sourceMutable().assign(read.subTensorOp.result());
-    write.subTensorInsertOp.destMutable().assign(read.subTensorOp.source());
+        .replaceAllUsesWith(write.insertSliceOp.getResult());
+    write.transferWriteOp.sourceMutable().assign(read.extractSliceOp.result());
+    write.insertSliceOp.destMutable().assign(read.extractSliceOp.source());
  } else {
    newForOp.getResult(initArgNumber)
        .replaceAllUsesWith(write.transferWriteOp.getResult(0));
@ -350,9 +351,9 @@ void mlir::linalg::hoistRedundantVectorTransfersOnTensor(FuncOp func) {
        LLVM_DEBUG(dbgs() << "\n";
                   DBGS() << "Candidate write for hoisting: "
                          << *write.transferWriteOp.getOperation() << "\n");
-        if (write.subTensorInsertOp)
-          LLVM_DEBUG(DBGS() << "Candidate subtensor_insert for hoisting: "
-                            << *write.subTensorInsertOp.getOperation() << "\n");
+        if (write.insertSliceOp)
+          LLVM_DEBUG(DBGS() << "Candidate insert_slice for hoisting: "
+                            << *write.insertSliceOp.getOperation() << "\n");
        if (llvm::any_of(write.transferWriteOp.indices(),
                         [&forOp](Value index) {
                           return !forOp.isDefinedOutsideOfLoop(index);
@ -788,8 +789,8 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp,
  // The implementation proceeds in a stack-like fashion:
  //   1. Iteratively clone and step into the loops, pushing the `packedTensor`
  //      deeper in the stack.
-  //   2. Create a SubTensorInsert at the top of the stack.
-  //   3. Iteratively pop and yield the result of the SubTensorInsertOp across
+  //   2. Create a InsertSliceOp at the top of the stack.
+  //   3. Iteratively pop and yield the result of the InsertSliceOp across
  //     the cloned loops.
  SmallVector<Value> clonedLoopIvs, leadingPackedTensorIndexings;
  clonedLoopIvs.reserve(nLoops);
@ -799,10 +800,10 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp,
  backwardSlice.insert(padTensorOp);
  // Stack step 1. iteratively clone loops and push `packedTensor`.
  for (Operation *op : backwardSlice) {
-    // Specifically sit out in the subtenso(packedTensor) case: this is the
+    // Specifically sit out in the extract_slice(packedTensor) case: this is the
    // piece we seek to replace.
-    if (auto subTensor = dyn_cast<SubTensorOp>(op))
-      if (bvm.lookupOrDefault(subTensor.source()) == packedTensor)
+    if (auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op))
+      if (bvm.lookupOrDefault(sliceOp.source()) == packedTensor)
        continue;
    auto effects = dyn_cast<MemoryEffectOpInterface>(op);
    bool hasNoEffects = !effects || effects.hasNoEffect();
@ -839,7 +840,7 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp,
    packedTensor = clonedForOp.getRegionIterArgs().front();
  }

-  // Stack step 2. create SubTensorInsertOp at the top of the stack.
+  // Stack step 2. create InsertSliceOp at the top of the stack.
  // offsets = [clonedLoopIvs, 0 .. 0].
  SmallVector<OpFoldResult> offsets(leadingPackedTensorIndexings.begin(),
                                    leadingPackedTensorIndexings.end());
@ -856,8 +857,8 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp,
  SmallVector<OpFoldResult> strides(nLoops + paddedRank, b.getIndexAttr(1));

  Value inserted =
-      b.create<SubTensorInsertOp>(loc, bvm.lookup(padTensorOp.result()),
-                                  packedTensor, offsets, sizes, strides);
+      b.create<tensor::InsertSliceOp>(loc, bvm.lookup(padTensorOp.result()),
+                                      packedTensor, offsets, sizes, strides);

  // Stack step 3. iteratively pop the stack and propagate the yield.
  Value valueToYield = inserted;
@ -869,7 +870,7 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp,
  }

  // Now the packed tensor is ready, replace the original padding op by a
-  // 1x..x1 SubTensor [originalLoopIvs, 0 .. 0][1 .. 1, paddedShape][1 .. 1].
+  // 1x..x1 slice [originalLoopIvs, 0 .. 0][1 .. 1, paddedShape][1 .. 1].
  b.setInsertionPoint(padTensorOp);
  SmallVector<Value> loopIterationCounts =
      llvm::to_vector<4>(llvm::map_range(packingLoops, [&](Operation *loop) {
@ -888,8 +889,8 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp,
  packedTensor =
      scf::getForInductionVarOwner(clonedLoopIvs.front())->getResult(0);
  padTensorOp.replaceAllUsesWith(
-      b.create<SubTensorOp>(loc, padTensorOp.getResultType(), packedTensor,
-                            offsets, sizes, strides)
+      b.create<tensor::ExtractSliceOp>(loc, padTensorOp.getResultType(),
+                                       packedTensor, offsets, sizes, strides)
          ->getResult(0));

  Operation *toErase = padTensorOp;
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@ -254,18 +254,18 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes,

    res = op.clone(b, loc, resultTensorTypes, tiledOperands);

-    // Insert a subtensor_insert for each output tensor.
+    // Insert a insert_slice for each output tensor.
    unsigned resultIdx = 0;
    for (OpOperand *opOperand : op.getOutputTensorOperands()) {
      // TODO: use an interface/adaptor to avoid leaking position in
      // `tiledOperands`.
      Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
-      if (auto subtensor = outputTensor.getDefiningOp<SubTensorOp>()) {
-        tensorResults.push_back(b.create<SubTensorInsertOp>(
-            loc, subtensor.source().getType(), res->getResult(resultIdx),
-            subtensor.source(), subtensor.offsets(), subtensor.sizes(),
-            subtensor.strides(), subtensor.static_offsets(),
-            subtensor.static_sizes(), subtensor.static_strides()));
+      if (auto sliceOp = outputTensor.getDefiningOp<tensor::ExtractSliceOp>()) {
+        tensorResults.push_back(b.create<tensor::InsertSliceOp>(
+            loc, sliceOp.source().getType(), res->getResult(resultIdx),
+            sliceOp.source(), sliceOp.offsets(), sliceOp.sizes(),
+            sliceOp.strides(), sliceOp.static_offsets(), sliceOp.static_sizes(),
+            sliceOp.static_strides()));
      } else {
        tensorResults.push_back(res->getResult(resultIdx));
      }
@ -406,7 +406,7 @@ void mlir::linalg::populateLinalgTilingCanonicalizationPatterns(
  scf::ForOp::getCanonicalizationPatterns(patterns, ctx);
  scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx);
  ConstantIndexOp::getCanonicalizationPatterns(patterns, ctx);
-  SubTensorOp::getCanonicalizationPatterns(patterns, ctx);
+  tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx);
  memref::SubViewOp::getCanonicalizationPatterns(patterns, ctx);
  tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
  memref::ViewOp::getCanonicalizationPatterns(patterns, ctx);
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@ -16,6 +16,7 @@
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/AffineExpr.h"
@ -128,14 +129,13 @@ static LogicalResult padOperandToSmallestStaticBoundingBox(
  // Already static shape, no need to pad.
  if (llvm::none_of(opToPad.getShape(opOperand), ShapedType::isDynamic))
    return success();
-  auto subtensor = opOperand->get().getDefiningOp<SubTensorOp>();
-  // Not a subtensor, cannot construct a static bounding box.
-  if (!subtensor)
+  auto sliceOp = opOperand->get().getDefiningOp<tensor::ExtractSliceOp>();
+  // Not a slice op, cannot construct a static bounding box.
+  if (!sliceOp)
    return failure();
  SmallVector<int64_t> staticSizes;
  staticSizes.reserve(opToPad.getRank(opOperand));
-  auto shapedOp =
-      cast<OffsetSizeAndStrideOpInterface>(subtensor.getOperation());
+  auto shapedOp = cast<OffsetSizeAndStrideOpInterface>(sliceOp.getOperation());
  for (auto size : shapedOp.getMixedSizes()) {
    auto indexAttr = size.is<Attribute>()
                         ? size.get<Attribute>().dyn_cast<IntegerAttr>()
@ -195,8 +195,8 @@ static LogicalResult rewriteAsPaddedOp(PatternRewriter &rewriter,
  linalg::LinalgOp paddedOp =
      opToPad.clone(rewriter, loc, resultTensorTypes, newOperands);

-  // Recover the subtensor out of the new static results. This keeps the
-  // original linalg op around because it uses the dims of the original results.
+  // Recover the slice out of the new static results. This keeps the original
+  // linalg op around because it uses the dims of the original results.
  // This later folds away.
  SmallVector<Value> paddedSubviewResults;
  paddedSubviewResults.reserve(opToPad->getNumResults());
@ -211,7 +211,7 @@ static LogicalResult rewriteAsPaddedOp(PatternRewriter &rewriter,
          return dimOp.getResult();
        }));
    SmallVector<OpFoldResult> strides(rank, rewriter.getIndexAttr(1));
-    paddedSubviewResults.push_back(rewriter.create<SubTensorOp>(
+    paddedSubviewResults.push_back(rewriter.create<tensor::ExtractSliceOp>(
        loc, std::get<1>(it), offsets, sizes, strides));
  }
  // Replace the transient `opToPad` locally, except for uses that we just
@ -679,7 +679,7 @@ LogicalResult PadTensorOpTransformationPattern::matchAndRewrite(
      rewriter.create<linalg::FillOp>(loc, initTensor, padValue).result();

  // Copy original contents into new tensor
-  // Uses linalg.generic, but could be done with std.subtensor_insert
+  // Uses linalg.generic, but could be done with tensor.insert_slice
  SmallVector<AffineExpr, 4> outputExprs;
  for (unsigned i = 0; i < resultShapedType.getRank(); ++i) {
    outputExprs.push_back(getAffineDimExpr(i, rewriter.getContext()) +
@ -719,13 +719,13 @@ static OpFoldResult asOpFoldResult(OpBuilder &builder, Value val) {
  return val;
 }

-LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(
-    SubTensorOp subTensorOp, PatternRewriter &rewriter) const {
-  auto padOp = subTensorOp.source().getDefiningOp<PadTensorOp>();
+LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
+    tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const {
+  auto padOp = sliceOp.source().getDefiningOp<PadTensorOp>();
  if (!padOp)
    return failure();
  // Only unit stride supported.
-  if (!subTensorOp.hasUnitStride())
+  if (!sliceOp.hasUnitStride())
    return failure();
  // Only constant padding value supported.
  Value padValue = padOp.getConstantPaddingValue();
@ -734,7 +734,7 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(

  // Helper variables and functions for various arithmetic operations. These are
  // used extensively for computing new offset/length and padding values.
-  Location loc = subTensorOp.getLoc();
+  Location loc = sliceOp.getLoc();
  AffineExpr dim0, dim1;
  bindDims(rewriter.getContext(), dim0, dim1);
  // Add two integers.
@ -786,8 +786,8 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(
  int64_t rank = padOp.getSourceType().getRank();
  for (unsigned dim = 0; dim < rank; ++dim) {
    auto low = asValue(rewriter, loc, padOp.getMixedLowPad()[dim]);
-    auto offset = asValue(rewriter, loc, subTensorOp.getMixedOffsets()[dim]);
-    auto length = asValue(rewriter, loc, subTensorOp.getMixedSizes()[dim]);
+    auto offset = asValue(rewriter, loc, sliceOp.getMixedOffsets()[dim]);
+    auto length = asValue(rewriter, loc, sliceOp.getMixedSizes()[dim]);
    auto srcSize = rewriter.createOrFold<memref::DimOp>(
        loc, padOp.source(), dim);

@ -805,19 +805,19 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(
    //
    // The original read could also have started in the high padding zone.
    // In that case, set the offset to the end of source tensor. The new
-    // SubTensorOp length will be zero in that case. (Effectively reading no
+    // ExtractSliceOp length will be zero in that case. (Effectively reading no
    // data from the source.)
    Value newOffset = min(max(sub(offset, low), zero), srcSize);
    newOffsets.push_back(asOpFoldResult(rewriter, newOffset));

-    // The original SubTensorOp was reading until position `offset + length`.
+    // The original ExtractSliceOp was reading until position `offset + length`.
    // Therefore, the corresponding position within the source tensor is:
    //
    // offset + length - low
    //
-    // In case the original SubTensorOp stopped reading within the low padding
-    // zone, this value can be negative. In that case, the end position of the
-    // read should be zero. (Similar to newOffset.)
+    // In case the original ExtractSliceOp stopped reading within the low
+    // padding zone, this value can be negative. In that case, the end position
+    // of the read should be zero. (Similar to newOffset.)
    //
    // The original read could also have stopped in the high padding zone.
    // In that case, set the end positition of the read should be the end of the
@ -825,7 +825,7 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(
    //
    // endLoc = min(max(offset - low + length, 0), srcSize)
    //
-    // The new SubTensorOp length is `endLoc - newOffset`.
+    // The new ExtractSliceOp length is `endLoc - newOffset`.
    Value endLoc = min(max(add(sub(offset, low), length), zero), srcSize);
    Value newLength = sub(endLoc, newOffset);
    newLengths.push_back(asOpFoldResult(rewriter, newLength));
@ -842,7 +842,7 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(
    }

    // The amount of high padding is simply the number of elements remaining,
-    // so that the result has the same length as the original SubTensorOp.
+    // so that the result has the same length as the original ExtractSliceOp.
    Value newHigh = sub(sub(length, newLength), newLow);
    appendIndex(newHigh, newHighs, staticNewHighs);

@ -852,22 +852,20 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(

  // Insert cast to ensure that types match. (May be folded away.)
  auto castResult = [&](Value val) -> Value {
-    auto castOp = rewriter.create<tensor::CastOp>(
-        loc, subTensorOp.getType(), val);
+    auto castOp = rewriter.create<tensor::CastOp>(loc, sliceOp.getType(), val);
    return castOp;
  };

  // In cases where the original data source is unused: Emit a GenerateOp and
-  // do not generate a SubTensorOp. (The result shape of the SubTensorOp would
+  // do not generate a SliceOp. (The result shape of the SliceOp would
  // have a dimension of size 0, the semantics of which is unclear.)
  auto createGenerateOp = [&]() {
-    // The shape of the GenerateOp is the same as the existing SubTensorOp.
-    RankedTensorType type = subTensorOp.getType();
+    // The shape of the GenerateOp is the same as the existing SliceOp.
+    RankedTensorType type = sliceOp.getType();
    SmallVector<Value> dynDims;
    for (unsigned i = 0; i < type.getRank(); ++i) {
      if (type.isDynamicDim(i))
-        dynDims.push_back(
-            asValue(rewriter, loc, subTensorOp.getMixedOffsets()[i]));
+        dynDims.push_back(asValue(rewriter, loc, sliceOp.getMixedOffsets()[i]));
    }

    // Create GenerateOp.
@ -891,14 +889,14 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(
    return castResult(generateOp);
  };

-  // Emit a SubTensorOp and a PadTensorOp. Should not be used in cases where
-  // the result shape of the new SubTensorOp has a zero dimension.
+  // Emit a SliceOp and a PadTensorOp. Should not be used in cases where
+  // the result shape of the new SliceOp has a zero dimension.
  auto createPadTensorOfSubTensor = [&]() {
    // Create pad_tensor(subtensor(x)).
-    auto newSubTensorOp = rewriter.create<SubTensorOp>(
+    auto newSliceOp = rewriter.create<tensor::ExtractSliceOp>(
        loc, padOp.source(), newOffsets, newLengths, newStrides);
    auto newPadTensorOp = rewriter.create<PadTensorOp>(
-        loc, newSubTensorOp, staticNewLows, staticNewHighs, newLows, newHighs);
+        loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs);

    // Copy region to new PadTensorOp.
    BlockAndValueMapping bvm;
@ -911,27 +909,29 @@ LogicalResult SubTensorOfPadTensorSwapPattern::matchAndRewrite(
  // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known
  // that the original data source x is not used.
  if (hasZeroLen) {
-    rewriter.replaceOp(subTensorOp, createGenerateOp());
+    rewriter.replaceOp(sliceOp, createGenerateOp());
    return success();
  }

  // If there are dynamic dimensions: Generate an scf.if check to avoid creating
-  // SubTensorOps with result dimensions of size 0 at runtime.
+  // SliceOps with result dimensions of size 0 at runtime.
  if (dynHasZeroLenCond) {
    auto result = rewriter.create<scf::IfOp>(
-        loc, subTensorOp.getType(), dynHasZeroLenCond,
-        /*thenBuilder=*/[&](OpBuilder &b, Location loc) {
+        loc, sliceOp.getType(), dynHasZeroLenCond,
+        /*thenBuilder=*/
+        [&](OpBuilder &b, Location loc) {
          b.create<scf::YieldOp>(loc, createGenerateOp());
        },
-        /*elseBuilder=*/[&](OpBuilder &b, Location loc) {
+        /*elseBuilder=*/
+        [&](OpBuilder &b, Location loc) {
          b.create<scf::YieldOp>(loc, createPadTensorOfSubTensor());
        });
-    rewriter.replaceOp(subTensorOp, result.getResult(0));
+    rewriter.replaceOp(sliceOp, result.getResult(0));
    return success();
  }

  // All shapes are static and the data source is actually used. Rewrite into
  // pad_tensor(subtensor(x)).
-  rewriter.replaceOp(subTensorOp, createPadTensorOfSubTensor());
+  rewriter.replaceOp(sliceOp, createPadTensorOfSubTensor());
  return success();
 }
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@ -15,6 +15,7 @@
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/AffineExpr.h"
@ -677,9 +678,9 @@ static SmallVector<Value> ofrToIndexValues(OpBuilder &builder, Location loc,
 }

 /// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp and
-/// SubTensorInsertOp. For now, only constant padding values are supported.
+/// InsertSliceOp. For now, only constant padding values are supported.
 /// If there is enough static type information, TransferReadOps and
-/// TransferWriteOps may be generated instead of SubTensorInsertOps.
+/// TransferWriteOps may be generated instead of InsertSliceOps.
 struct GenericPadTensorOpVectorizationPattern
    : public OpRewritePattern<PadTensorOp> {
  using OpRewritePattern<PadTensorOp>::OpRewritePattern;
@ -723,7 +724,7 @@ struct GenericPadTensorOpVectorizationPattern
      return success();

    // Neither source type nor PadTensorOp result type have static shape. Such
-    // PadTensorOps cannot be vectorized. Generate a SubTensorInsertOp instead
+    // PadTensorOps cannot be vectorized. Generate a InsertSliceOp instead
    // for copying the PadOp source.

    auto sourceType = padOp.getSourceType();
@ -737,10 +738,10 @@ struct GenericPadTensorOpVectorizationPattern
        srcSizes.push_back(rewriter.getIndexAttr(sourceType.getDimSize(dim)));
      }
    }
-    // Strides of SubTensorInsertOp are all 1.
+    // Strides of InsertSliceOp are all 1.
    SmallVector<OpFoldResult> strides(sourceType.getRank(),
                                      rewriter.getIndexAttr(1));
-    rewriter.replaceOpWithNewOp<SubTensorInsertOp>(
+    rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(
        padOp, padOp.source(), fill, padOp.getMixedLowPad(), srcSizes, strides);

    return success();
@ -913,27 +914,29 @@ struct PadTensorOpVectorizationWithTransferReadPattern
 /// write. In such cases, the TransferWriteOp can write to the non-padded tensor
 /// value and apply out-of-bounds masking. E.g.:
 /// ```
-/// %0 = subtensor ...[...] [%s0, %s1] [1, 1] : tensor<...> to tensor<?x?xf32>
+/// %0 = tensor.extract_slice ...[...] [%s0, %s1] [1, 1]
+///     : tensor<...> to tensor<?x?xf32>
 /// %1 = linalg.pad_tensor %0 ... : tensor<?x?xf32> to tensor<17x5xf32>
 /// %2 = vector.transfer_write %vec, %1[...]
 ///     : vector<17x5xf32>, tensor<17x5xf32>
-/// %r = subtensor %2[0, 0] [%s0, %s1] [1, 1]
+/// %r = tensor.extract_slice %2[0, 0] [%s0, %s1] [1, 1]
 ///     : tensor<17x5xf32> to tensor<?x?xf32>
 /// ```
 /// is rewritten to:
 /// ```
-/// %0 = subtensor ...[...] [%s0, %s1] [1, 1] : tensor<...> to tensor<?x?xf32>
+/// %0 = tensor.extract_slice ...[...] [%s0, %s1] [1, 1]
+///     : tensor<...> to tensor<?x?xf32>
 /// %r = vector.transfer_write %vec, %0[...] : vector<17x5xf32>, tensor<?x?xf32>
 /// ```
-/// Note: It is important that the SubTensorOp %r resizes the result of the
+/// Note: It is important that the ExtractSliceOp %r resizes the result of the
 /// TransferWriteOp to the same size as the input of the TensorPadOp (or an even
 /// smaller size). Otherwise, %r's new (dynamic) dimensions would differ from
 /// %r's old dimensions.
 ///
 /// This rewrite is possible if:
 /// - Low padding is static 0.
-/// - `xferOp` has exactly one use, which is a SubTensorOp. This SubTensorOp
-///   trims the same amount of padding that was added beforehand.
+/// - `xferOp` has exactly one use, which is an ExtractSliceOp. This
+///   ExtractSliceOp trims the same amount of padding that was added beforehand.
 /// - Single, scalar padding value.
 struct PadTensorOpVectorizationWithTransferWritePattern
    : public VectorizePadTensorOpUserPattern<vector::TransferWriteOp> {
@ -947,9 +950,9 @@ struct PadTensorOpVectorizationWithTransferWritePattern
    // Pad value must be a constant.
    auto padValue = padOp.getConstantPaddingValue();
    if (!padValue) return failure();
-    // TransferWriteOp result must be directly consumed by a SubTensorOp.
+    // TransferWriteOp result must be directly consumed by an ExtractSliceOp.
    if (!xferOp->hasOneUse()) return failure();
-    auto trimPadding = dyn_cast<SubTensorOp>(*xferOp->user_begin());
+    auto trimPadding = dyn_cast<tensor::ExtractSliceOp>(*xferOp->user_begin());
    if (!trimPadding) return failure();
    // Only static zero offsets supported when trimming padding.
    if (!trimPadding.hasZeroOffset()) return failure();
@ -976,7 +979,8 @@ struct PadTensorOpVectorizationWithTransferWritePattern
  /// This is a conservative analysis. In case equal tensor sizes cannot be
  /// proven statically, this analysis returns `false` even though the tensor
  /// sizes may turn out to be equal at runtime.
-  bool hasSameTensorSize(Value beforePadding, SubTensorOp afterTrimming) const {
+  bool hasSameTensorSize(Value beforePadding,
+                         tensor::ExtractSliceOp afterTrimming) const {
    // If the input to PadTensorOp is a CastOp, try with with both CastOp result
    // and CastOp operand.
    if (auto castOp = beforePadding.getDefiningOp<tensor::CastOp>())
@ -1002,21 +1006,22 @@ struct PadTensorOpVectorizationWithTransferWritePattern
    if (t1.getNumDynamicDims() == 0) return true;

    // All dynamic sizes must be the same. The only supported case at the moment
-    // is when `beforePadding` is a SubTensorOp (or a cast thereof).
+    // is when `beforePadding` is an ExtractSliceOp (or a cast thereof).

-    // Apart from CastOp, only SubTensorOp is supported.
-    auto beforeSubtensor = beforePadding.getDefiningOp<SubTensorOp>();
-    if (!beforeSubtensor) return false;
+    // Apart from CastOp, only ExtractSliceOp is supported.
+    auto beforeSlice = beforePadding.getDefiningOp<tensor::ExtractSliceOp>();
+    if (!beforeSlice)
+      return false;

-    assert(static_cast<size_t>(t1.getRank())
-           == beforeSubtensor.getMixedSizes().size());
+    assert(static_cast<size_t>(t1.getRank()) ==
+           beforeSlice.getMixedSizes().size());
    assert(static_cast<size_t>(t2.getRank())
           == afterTrimming.getMixedSizes().size());

    for (unsigned i = 0; i < t1.getRank(); ++i) {
      // Skip static dimensions.
      if (!t1.isDynamicDim(i)) continue;
-      auto size1 = beforeSubtensor.getMixedSizes()[i];
+      auto size1 = beforeSlice.getMixedSizes()[i];
      auto size2 = afterTrimming.getMixedSizes()[i];

      // Case 1: Same value or same constant int.
@ -1042,10 +1047,11 @@ struct PadTensorOpVectorizationWithTransferWritePattern
  }
 };

-/// Rewrite use of PadTensorOp result in SubtensorInsertOp. E.g.:
+/// Rewrite use of PadTensorOp result in InsertSliceOp. E.g.:
 /// ```
 /// %0 = linalg.pad_tensor %src ... : tensor<?x?xf32> to tensor<17x5xf32>
-/// %r = subtensor_insert %0 into %dest[%a, %b, 0, 0] [1, 1, 17, 5] [1, 1, 1, 1]
+/// %r = tensor.insert_slice %0
+///     into %dest[%a, %b, 0, 0] [1, 1, 17, 5] [1, 1, 1, 1]
 ///     : tensor<17x5xf32> into tensor<?x?x17x5xf32>
 /// ```
 /// is rewritten to:
@ -1063,13 +1069,13 @@ struct PadTensorOpVectorizationWithTransferWritePattern
 ///   (Implies that sizes of `insertOp` are all static.)
 /// - Only unit strides in `insertOp`.
 /// - Single, scalar padding value.
-struct PadTensorOpVectorizationWithSubTensorInsertPattern
-    : public VectorizePadTensorOpUserPattern<SubTensorInsertOp> {
-  using VectorizePadTensorOpUserPattern<SubTensorInsertOp>
-      ::VectorizePadTensorOpUserPattern;
+struct PadTensorOpVectorizationWithInsertSlicePattern
+    : public VectorizePadTensorOpUserPattern<tensor::InsertSliceOp> {
+  using VectorizePadTensorOpUserPattern<
+      tensor::InsertSliceOp>::VectorizePadTensorOpUserPattern;

  LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp,
-                            SubTensorInsertOp insertOp) const override {
+                            tensor::InsertSliceOp insertOp) const override {
    // Low padding must be static 0.
    if (!padOp.hasZeroLowPad()) return failure();
    // Only unit stride supported.
@ -1103,8 +1109,8 @@ struct PadTensorOpVectorizationWithSubTensorInsertPattern
    auto read = rewriter.create<vector::TransferReadOp>(
        padOp.getLoc(), vecType, padOp.source(), readIndices, padValue);

-    // Generate TransferWriteOp: Write to SubTensorInsertOp's dest tensor at
-    // specified offsets. Write is fully in-bounds because a SubTensorInsertOp's
+    // Generate TransferWriteOp: Write to InsertSliceOp's dest tensor at
+    // specified offsets. Write is fully in-bounds because a InsertSliceOp's
    // source must fit into the destination at the specified offsets.
    auto writeIndices =
        ofrToIndexValues(rewriter, padOp.getLoc(), insertOp.getMixedOffsets());
@ -1123,7 +1129,7 @@ void mlir::linalg::populatePadTensorOpVectorizationPatterns(
  // Try these specialized patterns first before resorting to the generic one.
  patterns.add<PadTensorOpVectorizationWithTransferReadPattern,
               PadTensorOpVectorizationWithTransferWritePattern,
-               PadTensorOpVectorizationWithSubTensorInsertPattern>(
+               PadTensorOpVectorizationWithInsertSlicePattern>(
      patterns.getContext(), baseBenefit.getBenefit() + 1);
 }

--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@ -19,6 +19,7 @@
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/AffineMap.h"
@ -556,7 +557,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
    }
    LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");

-    // Construct a new subview / subtensor for the tile.
+    // Construct a new subview / extract_slice for the tile.
    SmallVector<OpFoldResult, 4> offsets, sizes, strides;
    offsets.reserve(rank);
    sizes.reserve(rank);
@ -585,7 +586,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
      Value size = makeComposedAffineApply(b, loc, s0 + 1, closedIntSize);
      LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n");

-      // The size of the subview / subtensor should be trimmed to avoid
+      // The size of the subview / extract_slice should be trimmed to avoid
      // out-of-bounds accesses, unless we statically know the subshape size
      // divides the shape size evenly.
      int64_t shapeSize = shape[r];
@ -619,8 +620,8 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
      tiledShapes.push_back(
          b.create<memref::SubViewOp>(loc, shapedOp, offsets, sizes, strides));
    else
-      tiledShapes.push_back(
-          b.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
+      tiledShapes.push_back(b.create<tensor::ExtractSliceOp>(
+          loc, shapedOp, offsets, sizes, strides));
  }

  return tiledShapes;
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@ -717,10 +717,10 @@ OpFoldResult DimOp::fold(ArrayRef<Attribute> operands) {
  // The size at the given index is now known to be a dynamic size.
  unsigned unsignedIndex = index.getValue().getZExtValue();

-  if (auto subtensor = dyn_cast_or_null<mlir::SubTensorOp>(definingOp)) {
-    assert(subtensor.isDynamicSize(unsignedIndex) &&
-           "Expected dynamic subtensor size");
-    return subtensor.getDynamicSize(unsignedIndex);
+  if (auto sliceOp = dyn_cast_or_null<tensor::ExtractSliceOp>(definingOp)) {
+    assert(sliceOp.isDynamicSize(unsignedIndex) &&
+           "Expected dynamic slice size");
+    return sliceOp.getDynamicSize(unsignedIndex);
  }

  // Fold dim to the size argument for an `AllocOp`, `ViewOp`, or `SubViewOp`.
@ -1314,7 +1314,7 @@ void ReinterpretCastOp::build(OpBuilder &b, OperationState &result,
 }

 // TODO: ponder whether we want to allow missing trailing sizes/strides that are
-// completed automatically, like we have for subview and subtensor.
+// completed automatically, like we have for subview and extract_slice.
 static LogicalResult verify(ReinterpretCastOp op) {
  // The source and result memrefs should be in the same memory space.
  auto srcType = op.source().getType().cast<BaseMemRefType>();
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@ -10,7 +10,6 @@

 #include "mlir/Dialect/CommonFolders.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
-#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@ -34,32 +33,6 @@

 using namespace mlir;

-/// Helper function to dispatch an OpFoldResult into either the `dynamicVec` if
-/// it is a Value or into `staticVec` if it is an IntegerAttr.
-/// In the case of a Value, a copy of the `sentinel` value is also pushed to
-/// `staticVec`. This is useful to extract mixed static and dynamic entries that
-/// come from an AttrSizedOperandSegments trait.
-static void dispatchIndexOpFoldResult(OpFoldResult ofr,
-                                      SmallVectorImpl<Value> &dynamicVec,
-                                      SmallVectorImpl<int64_t> &staticVec,
-                                      int64_t sentinel) {
-  if (auto v = ofr.dyn_cast<Value>()) {
-    dynamicVec.push_back(v);
-    staticVec.push_back(sentinel);
-    return;
-  }
-  APInt apInt = ofr.dyn_cast<Attribute>().cast<IntegerAttr>().getValue();
-  staticVec.push_back(apInt.getSExtValue());
-}
-
-static void dispatchIndexOpFoldResults(ArrayRef<OpFoldResult> ofrs,
-                                       SmallVectorImpl<Value> &dynamicVec,
-                                       SmallVectorImpl<int64_t> &staticVec,
-                                       int64_t sentinel) {
-  for (auto ofr : ofrs)
-    dispatchIndexOpFoldResult(ofr, dynamicVec, staticVec, sentinel);
-}
-
 /// If ofr is a constant integer, i.e., an IntegerAttr or a ConstantOp with an
 /// IntegerAttr, return the integer.
 llvm::Optional<int64_t> mlir::getConstantIntValue(OpFoldResult ofr) {
@ -227,7 +200,6 @@ static void printStandardCastOp(Operation *op, OpAsmPrinter &p) {
 }

 void StandardOpsDialect::initialize() {
-  getContext()->loadDialect<tensor::TensorDialect>();
  addOperations<
 #define GET_OP_LIST
 #include "mlir/Dialect/StandardOps/IR/Ops.cpp.inc"
@ -286,14 +258,6 @@ OpFoldResult AddIOp::fold(ArrayRef<Attribute> operands) {
                                        [](APInt a, APInt b) { return a + b; });
 }

-/// Extract int64_t values from the assumed ArrayAttr of IntegerAttr.
-static SmallVector<int64_t, 4> extractFromI64ArrayAttr(Attribute attr) {
-  return llvm::to_vector<4>(
-      llvm::map_range(attr.cast<ArrayAttr>(), [](Attribute a) -> int64_t {
-        return a.cast<IntegerAttr>().getInt();
-      }));
-}
-
 /// Canonicalize a sum of a constant and (constant - something) to simply be
 /// a sum of constants minus something. This transformation does similar
 /// transformations for additions of a constant with a subtract/add of
@ -2082,499 +2046,6 @@ bool UIToFPOp::areCastCompatible(TypeRange inputs, TypeRange outputs) {
  return areVectorCastSimpleCompatible(a, b, areCastCompatible);
 }

-//===----------------------------------------------------------------------===//
-// SubTensorOp
-//===----------------------------------------------------------------------===//
-
-/// A subtensor result type can be fully inferred from the source type and the
-/// static representation of offsets, sizes and strides. Special sentinels
-/// encode the dynamic case.
-Type SubTensorOp::inferResultType(RankedTensorType sourceRankedTensorType,
-                                  ArrayRef<int64_t> leadingStaticOffsets,
-                                  ArrayRef<int64_t> leadingStaticSizes,
-                                  ArrayRef<int64_t> leadingStaticStrides) {
-  // A subtensor may specify only a leading subset of offset/sizes/strides in
-  // which case we complete with offset=0, sizes from memref type and strides=1.
-  unsigned rank = sourceRankedTensorType.getRank();
-  assert(leadingStaticSizes.size() <= rank &&
-         "unexpected leadingStaticSizes overflow");
-  auto staticSizes = llvm::to_vector<4>(leadingStaticSizes);
-  unsigned numTrailingSizes = rank - staticSizes.size();
-  llvm::append_range(staticSizes, sourceRankedTensorType.getShape().take_back(
-                                      numTrailingSizes));
-  return RankedTensorType::get(staticSizes,
-                               sourceRankedTensorType.getElementType());
-}
-
-Type SubTensorOp::inferResultType(RankedTensorType sourceRankedTensorType,
-                                  ArrayRef<OpFoldResult> leadingStaticOffsets,
-                                  ArrayRef<OpFoldResult> leadingStaticSizes,
-                                  ArrayRef<OpFoldResult> leadingStaticStrides) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
-                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
-                             staticStrides, ShapedType::kDynamicStrideOrOffset);
-  return SubTensorOp::inferResultType(sourceRankedTensorType, staticOffsets,
-                                      staticSizes, staticStrides);
-}
-
-/// A subtensor result type can be fully inferred from the source type and the
-/// static representation of offsets, sizes and strides. Special sentinels
-/// encode the dynamic case.
-Type SubTensorOp::inferRankReducedResultType(
-    unsigned resultRank, RankedTensorType sourceRankedTensorType,
-    ArrayRef<int64_t> leadingStaticOffsets,
-    ArrayRef<int64_t> leadingStaticSizes,
-    ArrayRef<int64_t> leadingStaticStrides) {
-  auto inferredType =
-      inferResultType(sourceRankedTensorType, leadingStaticOffsets,
-                      leadingStaticSizes, leadingStaticStrides)
-          .cast<RankedTensorType>();
-  int rankDiff = inferredType.getRank() - resultRank;
-  if (rankDiff > 0) {
-    auto shape = inferredType.getShape();
-    llvm::SmallDenseSet<unsigned> dimsToProject;
-    mlir::getPositionsOfShapeOne(rankDiff, shape, dimsToProject);
-    SmallVector<int64_t> projectedShape;
-    for (unsigned pos = 0, e = shape.size(); pos < e; ++pos)
-      if (!dimsToProject.contains(pos))
-        projectedShape.push_back(shape[pos]);
-    inferredType =
-        RankedTensorType::get(projectedShape, inferredType.getElementType());
-  }
-  return inferredType;
-}
-
-Type SubTensorOp::inferRankReducedResultType(
-    unsigned resultRank, RankedTensorType sourceRankedTensorType,
-    ArrayRef<OpFoldResult> leadingStaticOffsets,
-    ArrayRef<OpFoldResult> leadingStaticSizes,
-    ArrayRef<OpFoldResult> leadingStaticStrides) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
-                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
-                             staticStrides, ShapedType::kDynamicStrideOrOffset);
-  return SubTensorOp::inferRankReducedResultType(
-      resultRank, sourceRankedTensorType, staticOffsets, staticSizes,
-      staticStrides);
-}
-
-// Build a SubTensorOp with mixed static and dynamic entries and custom result
-// type. If the type passed is nullptr, it is inferred.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              RankedTensorType resultType, Value source,
-                              ArrayRef<OpFoldResult> offsets,
-                              ArrayRef<OpFoldResult> sizes,
-                              ArrayRef<OpFoldResult> strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets,
-                             ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
-                             ShapedType::kDynamicStrideOrOffset);
-  auto sourceRankedTensorType = source.getType().cast<RankedTensorType>();
-  // Structuring implementation this way avoids duplication between builders.
-  if (!resultType) {
-    resultType =
-        SubTensorOp::inferResultType(sourceRankedTensorType, staticOffsets,
-                                     staticSizes, staticStrides)
-            .cast<RankedTensorType>();
-  }
-  build(b, result, resultType, source, dynamicOffsets, dynamicSizes,
-        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
-        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
-  result.addAttributes(attrs);
-}
-
-// Build a SubTensorOp with mixed static and dynamic entries and inferred result
-// type.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              Value source, ArrayRef<OpFoldResult> offsets,
-                              ArrayRef<OpFoldResult> sizes,
-                              ArrayRef<OpFoldResult> strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
-}
-
-// Build a SubTensorOp with dynamic entries and custom result type. If the type
-// passed is nullptr, it is inferred.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              RankedTensorType resultType, Value source,
-                              ValueRange offsets, ValueRange sizes,
-                              ValueRange strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
-      llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
-  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
-      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
-  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
-      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
-  build(b, result, resultType, source, offsetValues, sizeValues, strideValues);
-}
-
-// Build a SubTensorOp with dynamic entries and inferred result type.
-void mlir::SubTensorOp::build(OpBuilder &b, OperationState &result,
-                              Value source, ValueRange offsets,
-                              ValueRange sizes, ValueRange strides,
-                              ArrayRef<NamedAttribute> attrs) {
-  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
-}
-
-enum SubTensorVerificationResult {
-  Success,
-  RankTooLarge,
-  SizeMismatch,
-  ElemTypeMismatch,
-};
-
-/// Checks if `original` Type type can be rank reduced to `reduced` type.
-/// This function is slight variant of `is subsequence` algorithm where
-/// not matching dimension must be 1.
-static SubTensorVerificationResult
-isRankReducedType(Type originalType, Type candidateReducedType,
-                  std::string *errMsg = nullptr) {
-  if (originalType == candidateReducedType)
-    return SubTensorVerificationResult::Success;
-  if (!originalType.isa<RankedTensorType>())
-    return SubTensorVerificationResult::Success;
-  if (originalType.isa<RankedTensorType>() &&
-      !candidateReducedType.isa<RankedTensorType>())
-    return SubTensorVerificationResult::Success;
-
-  ShapedType originalShapedType = originalType.cast<ShapedType>();
-  ShapedType candidateReducedShapedType =
-      candidateReducedType.cast<ShapedType>();
-
-  // Rank and size logic is valid for all ShapedTypes.
-  ArrayRef<int64_t> originalShape = originalShapedType.getShape();
-  ArrayRef<int64_t> candidateReducedShape =
-      candidateReducedShapedType.getShape();
-  unsigned originalRank = originalShape.size(),
-           candidateReducedRank = candidateReducedShape.size();
-  if (candidateReducedRank > originalRank)
-    return SubTensorVerificationResult::RankTooLarge;
-
-  auto optionalUnusedDimsMask =
-      computeRankReductionMask(originalShape, candidateReducedShape);
-
-  // Sizes cannot be matched in case empty vector is returned.
-  if (!optionalUnusedDimsMask.hasValue())
-    return SubTensorVerificationResult::SizeMismatch;
-
-  if (originalShapedType.getElementType() !=
-      candidateReducedShapedType.getElementType())
-    return SubTensorVerificationResult::ElemTypeMismatch;
-
-  // We are done for the tensor case.
-  if (originalType.isa<RankedTensorType>())
-    return SubTensorVerificationResult::Success;
-
-  return SubTensorVerificationResult::Success;
-}
-
-template <typename OpTy>
-static LogicalResult
-produceSubTensorErrorMsg(SubTensorVerificationResult result, OpTy op,
-                         Type expectedType, StringRef errMsg = "") {
-  auto memrefType = expectedType.cast<ShapedType>();
-  switch (result) {
-  case SubTensorVerificationResult::Success:
-    return success();
-  case SubTensorVerificationResult::RankTooLarge:
-    return op.emitError("expected result rank to be smaller or equal to ")
-           << "the source rank. " << errMsg;
-  case SubTensorVerificationResult::SizeMismatch:
-    return op.emitError("expected result type to be ")
-           << expectedType
-           << " or a rank-reduced version. (mismatch of result sizes) "
-           << errMsg;
-  case SubTensorVerificationResult::ElemTypeMismatch:
-    return op.emitError("expected result element type to be ")
-           << memrefType.getElementType() << errMsg;
-  }
-  llvm_unreachable("unexpected subtensor verification result");
-}
-/// Verifier for SubTensorOp.
-static LogicalResult verify(SubTensorOp op) {
-  // Verify result type against inferred type.
-  auto expectedType = SubTensorOp::inferResultType(
-      op.getSourceType(), extractFromI64ArrayAttr(op.static_offsets()),
-      extractFromI64ArrayAttr(op.static_sizes()),
-      extractFromI64ArrayAttr(op.static_strides()));
-  auto result = isRankReducedType(expectedType, op.getType());
-  return produceSubTensorErrorMsg(result, op, expectedType);
-}
-
-/// Infer the canonical type of the result of a subtensor operation. Returns a
-/// type with rank `resultRank` that is either the rank of the rank-reduced
-/// type, or the non-rank-reduced type.
-static RankedTensorType getCanonicalSubTensorResultType(
-    unsigned resultRank, RankedTensorType sourceType,
-    ArrayRef<OpFoldResult> mixedOffsets, ArrayRef<OpFoldResult> mixedSizes,
-    ArrayRef<OpFoldResult> mixedStrides) {
-  auto resultType =
-      SubTensorOp::inferRankReducedResultType(
-          resultRank, sourceType, mixedOffsets, mixedSizes, mixedStrides)
-          .cast<RankedTensorType>();
-  if (resultType.getRank() != resultRank) {
-    resultType = SubTensorOp::inferResultType(sourceType, mixedOffsets,
-                                              mixedSizes, mixedStrides)
-                     .cast<RankedTensorType>();
-  }
-  return resultType;
-}
-
-namespace {
-/// Pattern to rewrite a subtensor op with tensor::Cast arguments.
-/// This essentially pushes memref_cast past its consuming subtensor when
-/// `canFoldIntoConsumerOp` is true.
-///
-/// Example:
-/// ```
-///   %0 = tensorcast %V : tensor<16x16xf32> to tensor<?x?xf32>
-///   %1 = subtensor %0[0, 0][3, 4][1, 1] : tensor<?x?xf32> to tensor<3x4xf32>
-/// ```
-/// is rewritten into:
-/// ```
-///   %0 = subtensor %V[0, 0][3, 4][1, 1] : tensor<16x16xf32> to tensor<3x4xf32>
-///   %1 = tensor.cast %0: tensor<3x4xf32> to tensor<3x4xf32>
-/// ```
-class SubTensorOpCastFolder final : public OpRewritePattern<SubTensorOp> {
-public:
-  using OpRewritePattern<SubTensorOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(SubTensorOp subTensorOp,
-                                PatternRewriter &rewriter) const override {
-    // Any constant operand, just return to let SubViewOpConstantFolder kick in.
-    if (llvm::any_of(subTensorOp.getOperands(), [](Value operand) {
-          return matchPattern(operand, matchConstantIndex());
-        }))
-      return failure();
-
-    auto castOp = subTensorOp.source().getDefiningOp<tensor::CastOp>();
-    if (!castOp)
-      return failure();
-
-    if (!canFoldIntoConsumerOp(castOp))
-      return failure();
-
-    /// Deduce the type of the result to use for the canonicalized operation.
-    RankedTensorType resultType = getCanonicalSubTensorResultType(
-        subTensorOp.getType().getRank(), subTensorOp.getSourceType(),
-        subTensorOp.getMixedOffsets(), subTensorOp.getMixedSizes(),
-        subTensorOp.getMixedStrides());
-    Value newSubTensor = rewriter.create<SubTensorOp>(
-        subTensorOp.getLoc(), resultType, castOp.source(),
-        subTensorOp.offsets(), subTensorOp.sizes(), subTensorOp.strides(),
-        subTensorOp.static_offsets(), subTensorOp.static_sizes(),
-        subTensorOp.static_strides());
-    rewriter.replaceOpWithNewOp<tensor::CastOp>(
-        subTensorOp, subTensorOp.getType(), newSubTensor);
-    return success();
-  }
-};
-} // namespace
-
-/// Return the canonical type of the result of a subtensor.
-struct SubTensorReturnTypeCanonicalizer {
-  RankedTensorType operator()(SubTensorOp op,
-                              ArrayRef<OpFoldResult> mixedOffsets,
-                              ArrayRef<OpFoldResult> mixedSizes,
-                              ArrayRef<OpFoldResult> mixedStrides) {
-    return getCanonicalSubTensorResultType(op.getType().getRank(),
-                                           op.getSourceType(), mixedOffsets,
-                                           mixedSizes, mixedStrides);
-  }
-};
-
-/// A canonicalizer wrapper to replace SubTensorOps.
-struct SubTensorCanonicalizer {
-  void operator()(PatternRewriter &rewriter, SubTensorOp op,
-                  SubTensorOp newOp) {
-    Value replacement = newOp.getResult();
-    if (replacement.getType() != op.getType())
-      replacement = rewriter.create<tensor::CastOp>(op.getLoc(), op.getType(),
-                                                    replacement);
-    rewriter.replaceOp(op, replacement);
-  }
-};
-
-void SubTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                              MLIRContext *context) {
-  results.add<OpWithOffsetSizesAndStridesConstantArgumentFolder<
-                  SubTensorOp, SubTensorReturnTypeCanonicalizer,
-                  SubTensorCanonicalizer>,
-              SubTensorOpCastFolder>(context);
-}
-
-//
-static LogicalResult
-foldIdentityOffsetSizeAndStrideOpInterface(OffsetSizeAndStrideOpInterface op,
-                                           ShapedType shapedType) {
-  OpBuilder b(op.getContext());
-  for (OpFoldResult ofr : op.getMixedOffsets())
-    if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(0)))
-      return failure();
-  // Rank-reducing noops only need to inspect the leading dimensions: llvm::zip
-  // is appropriate.
-  auto shape = shapedType.getShape();
-  for (auto it : llvm::zip(op.getMixedSizes(), shape))
-    if (!isEqualConstantIntOrValue(std::get<0>(it),
-                                   b.getIndexAttr(std::get<1>(it))))
-      return failure();
-  for (OpFoldResult ofr : op.getMixedStrides())
-    if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(1)))
-      return failure();
-  return success();
-}
-
-OpFoldResult SubTensorOp::fold(ArrayRef<Attribute>) {
-  if (getSourceType() == getType() &&
-      succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType())))
-    return this->source();
-  return OpFoldResult();
-}
-
-//===----------------------------------------------------------------------===//
-// SubTensorInsertOp
-//===----------------------------------------------------------------------===//
-
-// Build a SubTensorInsertOp with mixed static and dynamic entries.
-void mlir::SubTensorInsertOp::build(OpBuilder &b, OperationState &result,
-                                    Value source, Value dest,
-                                    ArrayRef<OpFoldResult> offsets,
-                                    ArrayRef<OpFoldResult> sizes,
-                                    ArrayRef<OpFoldResult> strides,
-                                    ArrayRef<NamedAttribute> attrs) {
-  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
-  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
-  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets,
-                             ShapedType::kDynamicStrideOrOffset);
-  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
-                             ShapedType::kDynamicStrideOrOffset);
-  build(b, result, dest.getType(), source, dest, dynamicOffsets, dynamicSizes,
-        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
-        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
-  result.addAttributes(attrs);
-}
-
-// Build a SubTensorInsertOp with dynamic entries.
-void mlir::SubTensorInsertOp::build(OpBuilder &b, OperationState &result,
-                                    Value source, Value dest,
-                                    ValueRange offsets, ValueRange sizes,
-                                    ValueRange strides,
-                                    ArrayRef<NamedAttribute> attrs) {
-  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
-      llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
-  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
-      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
-  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
-      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
-  build(b, result, source, dest, offsetValues, sizeValues, strideValues);
-}
-
-OpFoldResult SubTensorInsertOp::fold(ArrayRef<Attribute>) {
-  if (getSourceType().hasStaticShape() && getType().hasStaticShape() &&
-      getSourceType() == getType() &&
-      succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType())))
-    return this->source();
-  return OpFoldResult();
-}
-
-namespace {
-/// Pattern to rewrite a subtensor_insert op with constant arguments.
-class SubTensorInsertOpConstantArgumentFolder final
-    : public OpRewritePattern<SubTensorInsertOp> {
-public:
-  using OpRewritePattern<SubTensorInsertOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(SubTensorInsertOp subTensorInsertOp,
-                                PatternRewriter &rewriter) const override {
-    // No constant operand, just return.
-    if (llvm::none_of(subTensorInsertOp.getOperands(), [](Value operand) {
-          return matchPattern(operand, matchConstantIndex());
-        }))
-      return failure();
-
-    // At least one of offsets/sizes/strides is a new constant.
-    // Form the new list of operands and constant attributes from the
-    // existing.
-    SmallVector<OpFoldResult> mixedOffsets(subTensorInsertOp.getMixedOffsets());
-    SmallVector<OpFoldResult> mixedSizes(subTensorInsertOp.getMixedSizes());
-    SmallVector<OpFoldResult> mixedStrides(subTensorInsertOp.getMixedStrides());
-    canonicalizeSubViewPart(mixedOffsets, ShapedType::isDynamicStrideOrOffset);
-    canonicalizeSubViewPart(mixedSizes, ShapedType::isDynamic);
-    canonicalizeSubViewPart(mixedStrides, ShapedType::isDynamicStrideOrOffset);
-
-    // Create the new op in canonical form.
-    rewriter.replaceOpWithNewOp<SubTensorInsertOp>(
-        subTensorInsertOp, subTensorInsertOp.source(), subTensorInsertOp.dest(),
-        mixedOffsets, mixedSizes, mixedStrides);
-    return success();
-  }
-};
-
-/// Fold tensor_casts with subtensor_insert operations.
-struct SubTensorInsertOpCastFolder final
-    : public OpRewritePattern<SubTensorInsertOp> {
-  using OpRewritePattern<SubTensorInsertOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(SubTensorInsertOp subTensorInsertOp,
-                                PatternRewriter &rewriter) const override {
-    if (llvm::any_of(subTensorInsertOp.getOperands(), [](Value operand) {
-          return matchPattern(operand, matchConstantIndex());
-        }))
-      return failure();
-
-    auto getSourceOfCastOp = [](Value v) -> Optional<Value> {
-      auto castOp = v.getDefiningOp<tensor::CastOp>();
-      if (!castOp || !canFoldIntoConsumerOp(castOp))
-        return llvm::None;
-      return castOp.source();
-    };
-    Optional<Value> sourceCastSource =
-        getSourceOfCastOp(subTensorInsertOp.source());
-    Optional<Value> destCastSource =
-        getSourceOfCastOp(subTensorInsertOp.dest());
-    if (!sourceCastSource && !destCastSource)
-      return failure();
-
-    Value replacement = rewriter.create<SubTensorInsertOp>(
-        subTensorInsertOp.getLoc(),
-        (sourceCastSource ? *sourceCastSource : subTensorInsertOp.source()),
-        (destCastSource ? *destCastSource : subTensorInsertOp.dest()),
-        subTensorInsertOp.getMixedOffsets(), subTensorInsertOp.getMixedSizes(),
-        subTensorInsertOp.getMixedStrides());
-
-    if (replacement.getType() != subTensorInsertOp.getType()) {
-      replacement = rewriter.create<tensor::CastOp>(
-          subTensorInsertOp.getLoc(), subTensorInsertOp.getType(), replacement);
-    }
-    rewriter.replaceOp(subTensorInsertOp, replacement);
-    return success();
-  }
-};
-} // namespace
-
-void SubTensorInsertOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                                    MLIRContext *context) {
-  results.add<SubTensorInsertOpConstantArgumentFolder,
-              SubTensorInsertOpCastFolder>(context);
-}
-
 //===----------------------------------------------------------------------===//
 // SwitchOp
 //===----------------------------------------------------------------------===//
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//

+#include "mlir/Dialect/StandardOps/Utils/Utils.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
@ -25,7 +26,7 @@ using namespace mlir::tensor;
 /// source tensor. This is useful to fold a tensor.cast into a consuming op and
 /// implement canonicalization patterns for ops in different dialects that may
 /// consume the results of tensor.cast operations. Such foldable tensor.cast
-/// operations are typically inserted as `subtensor` ops and are canonicalized,
+/// operations are typically inserted as `slice` ops and are canonicalized,
 /// to preserve the type compatibility of their uses.
 ///
 /// Returns true when all conditions are met:
@ -511,6 +512,530 @@ static LogicalResult verify(ReshapeOp op) {
  return success();
 }

+//===----------------------------------------------------------------------===//
+// ExtractSliceOp
+//===----------------------------------------------------------------------===//
+
+/// Helper function to dispatch an OpFoldResult into either the `dynamicVec` if
+/// it is a Value or into `staticVec` if it is an IntegerAttr.
+/// In the case of a Value, a copy of the `sentinel` value is also pushed to
+/// `staticVec`. This is useful to extract mixed static and dynamic entries that
+/// come from an AttrSizedOperandSegments trait.
+static void dispatchIndexOpFoldResult(OpFoldResult ofr,
+                                      SmallVectorImpl<Value> &dynamicVec,
+                                      SmallVectorImpl<int64_t> &staticVec,
+                                      int64_t sentinel) {
+  if (auto v = ofr.dyn_cast<Value>()) {
+    dynamicVec.push_back(v);
+    staticVec.push_back(sentinel);
+    return;
+  }
+  APInt apInt = ofr.dyn_cast<Attribute>().cast<IntegerAttr>().getValue();
+  staticVec.push_back(apInt.getSExtValue());
+}
+
+static void dispatchIndexOpFoldResults(ArrayRef<OpFoldResult> ofrs,
+                                       SmallVectorImpl<Value> &dynamicVec,
+                                       SmallVectorImpl<int64_t> &staticVec,
+                                       int64_t sentinel) {
+  for (auto ofr : ofrs)
+    dispatchIndexOpFoldResult(ofr, dynamicVec, staticVec, sentinel);
+}
+
+/// An extract_slice op result type can be fully inferred from the source type
+/// and the static representation of offsets, sizes and strides. Special
+/// sentinels encode the dynamic case.
+Type ExtractSliceOp::inferResultType(RankedTensorType sourceRankedTensorType,
+                                     ArrayRef<int64_t> leadingStaticOffsets,
+                                     ArrayRef<int64_t> leadingStaticSizes,
+                                     ArrayRef<int64_t> leadingStaticStrides) {
+  // An extract_slice op may specify only a leading subset of offset/sizes/
+  // strides in which case we complete with offset=0, sizes from memref type and
+  // strides=1.
+  unsigned rank = sourceRankedTensorType.getRank();
+  assert(leadingStaticSizes.size() <= rank &&
+         "unexpected leadingStaticSizes overflow");
+  auto staticSizes = llvm::to_vector<4>(leadingStaticSizes);
+  unsigned numTrailingSizes = rank - staticSizes.size();
+  llvm::append_range(staticSizes, sourceRankedTensorType.getShape().take_back(
+                                      numTrailingSizes));
+  return RankedTensorType::get(staticSizes,
+                               sourceRankedTensorType.getElementType());
+}
+
+/// Extract int64_t values from the assumed ArrayAttr of IntegerAttr.
+static SmallVector<int64_t, 4> extractFromI64ArrayAttr(Attribute attr) {
+  return llvm::to_vector<4>(
+      llvm::map_range(attr.cast<ArrayAttr>(), [](Attribute a) -> int64_t {
+        return a.cast<IntegerAttr>().getInt();
+      }));
+}
+
+Type ExtractSliceOp::inferResultType(
+    RankedTensorType sourceRankedTensorType,
+    ArrayRef<OpFoldResult> leadingStaticOffsets,
+    ArrayRef<OpFoldResult> leadingStaticSizes,
+    ArrayRef<OpFoldResult> leadingStaticStrides) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
+                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
+                             staticStrides, ShapedType::kDynamicStrideOrOffset);
+  return ExtractSliceOp::inferResultType(sourceRankedTensorType, staticOffsets,
+                                         staticSizes, staticStrides);
+}
+
+/// An extract_slice op result type can be fully inferred from the source type
+/// and the static representation of offsets, sizes and strides. Special
+/// sentinels encode the dynamic case.
+Type ExtractSliceOp::inferRankReducedResultType(
+    unsigned resultRank, RankedTensorType sourceRankedTensorType,
+    ArrayRef<int64_t> leadingStaticOffsets,
+    ArrayRef<int64_t> leadingStaticSizes,
+    ArrayRef<int64_t> leadingStaticStrides) {
+  auto inferredType =
+      inferResultType(sourceRankedTensorType, leadingStaticOffsets,
+                      leadingStaticSizes, leadingStaticStrides)
+          .cast<RankedTensorType>();
+  int rankDiff = inferredType.getRank() - resultRank;
+  if (rankDiff > 0) {
+    auto shape = inferredType.getShape();
+    llvm::SmallDenseSet<unsigned> dimsToProject;
+    mlir::getPositionsOfShapeOne(rankDiff, shape, dimsToProject);
+    SmallVector<int64_t> projectedShape;
+    for (unsigned pos = 0, e = shape.size(); pos < e; ++pos)
+      if (!dimsToProject.contains(pos))
+        projectedShape.push_back(shape[pos]);
+    inferredType =
+        RankedTensorType::get(projectedShape, inferredType.getElementType());
+  }
+  return inferredType;
+}
+
+Type ExtractSliceOp::inferRankReducedResultType(
+    unsigned resultRank, RankedTensorType sourceRankedTensorType,
+    ArrayRef<OpFoldResult> leadingStaticOffsets,
+    ArrayRef<OpFoldResult> leadingStaticSizes,
+    ArrayRef<OpFoldResult> leadingStaticStrides) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(leadingStaticOffsets, dynamicOffsets,
+                             staticOffsets, ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(leadingStaticSizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(leadingStaticStrides, dynamicStrides,
+                             staticStrides, ShapedType::kDynamicStrideOrOffset);
+  return ExtractSliceOp::inferRankReducedResultType(
+      resultRank, sourceRankedTensorType, staticOffsets, staticSizes,
+      staticStrides);
+}
+
+/// Build an ExtractSliceOp with mixed static and dynamic entries and custom
+/// result type. If the type passed is nullptr, it is inferred.
+void ExtractSliceOp::build(OpBuilder &b, OperationState &result,
+                           RankedTensorType resultType, Value source,
+                           ArrayRef<OpFoldResult> offsets,
+                           ArrayRef<OpFoldResult> sizes,
+                           ArrayRef<OpFoldResult> strides,
+                           ArrayRef<NamedAttribute> attrs) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets,
+                             ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
+                             ShapedType::kDynamicStrideOrOffset);
+  auto sourceRankedTensorType = source.getType().cast<RankedTensorType>();
+  // Structuring implementation this way avoids duplication between builders.
+  if (!resultType) {
+    resultType =
+        ExtractSliceOp::inferResultType(sourceRankedTensorType, staticOffsets,
+                                        staticSizes, staticStrides)
+            .cast<RankedTensorType>();
+  }
+  build(b, result, resultType, source, dynamicOffsets, dynamicSizes,
+        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
+        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
+  result.addAttributes(attrs);
+}
+
+/// Build an ExtractSliceOp with mixed static and dynamic entries and inferred
+/// result type.
+void ExtractSliceOp::build(OpBuilder &b, OperationState &result, Value source,
+                           ArrayRef<OpFoldResult> offsets,
+                           ArrayRef<OpFoldResult> sizes,
+                           ArrayRef<OpFoldResult> strides,
+                           ArrayRef<NamedAttribute> attrs) {
+  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
+}
+
+/// Build an ExtractSliceOp with dynamic entries and custom result type. If the
+/// type passed is nullptr, it is inferred.
+void ExtractSliceOp::build(OpBuilder &b, OperationState &result,
+                           RankedTensorType resultType, Value source,
+                           ValueRange offsets, ValueRange sizes,
+                           ValueRange strides, ArrayRef<NamedAttribute> attrs) {
+  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
+      llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
+  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
+      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
+  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
+      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
+  build(b, result, resultType, source, offsetValues, sizeValues, strideValues);
+}
+
+/// Build an ExtractSliceOp with dynamic entries and inferred result type.
+void ExtractSliceOp::build(OpBuilder &b, OperationState &result, Value source,
+                           ValueRange offsets, ValueRange sizes,
+                           ValueRange strides, ArrayRef<NamedAttribute> attrs) {
+  build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
+}
+
+enum SliceVerificationResult {
+  Success,
+  RankTooLarge,
+  SizeMismatch,
+  ElemTypeMismatch,
+};
+
+/// Checks if `original` Type type can be rank reduced to `reduced` type.
+/// This function is slight variant of `is subsequence` algorithm where
+/// not matching dimension must be 1.
+static SliceVerificationResult
+isRankReducedType(Type originalType, Type candidateReducedType,
+                  std::string *errMsg = nullptr) {
+  if (originalType == candidateReducedType)
+    return SliceVerificationResult::Success;
+  if (!originalType.isa<RankedTensorType>())
+    return SliceVerificationResult::Success;
+  if (originalType.isa<RankedTensorType>() &&
+      !candidateReducedType.isa<RankedTensorType>())
+    return SliceVerificationResult::Success;
+
+  ShapedType originalShapedType = originalType.cast<ShapedType>();
+  ShapedType candidateReducedShapedType =
+      candidateReducedType.cast<ShapedType>();
+
+  // Rank and size logic is valid for all ShapedTypes.
+  ArrayRef<int64_t> originalShape = originalShapedType.getShape();
+  ArrayRef<int64_t> candidateReducedShape =
+      candidateReducedShapedType.getShape();
+  unsigned originalRank = originalShape.size(),
+           candidateReducedRank = candidateReducedShape.size();
+  if (candidateReducedRank > originalRank)
+    return SliceVerificationResult::RankTooLarge;
+
+  auto optionalUnusedDimsMask =
+      computeRankReductionMask(originalShape, candidateReducedShape);
+
+  // Sizes cannot be matched in case empty vector is returned.
+  if (!optionalUnusedDimsMask.hasValue())
+    return SliceVerificationResult::SizeMismatch;
+
+  if (originalShapedType.getElementType() !=
+      candidateReducedShapedType.getElementType())
+    return SliceVerificationResult::ElemTypeMismatch;
+
+  // We are done for the tensor case.
+  if (originalType.isa<RankedTensorType>())
+    return SliceVerificationResult::Success;
+
+  return SliceVerificationResult::Success;
+}
+
+template <typename OpTy>
+static LogicalResult produceSliceErrorMsg(SliceVerificationResult result,
+                                          OpTy op, Type expectedType,
+                                          StringRef errMsg = "") {
+  auto memrefType = expectedType.cast<ShapedType>();
+  switch (result) {
+  case SliceVerificationResult::Success:
+    return success();
+  case SliceVerificationResult::RankTooLarge:
+    return op.emitError("expected result rank to be smaller or equal to ")
+           << "the source rank. " << errMsg;
+  case SliceVerificationResult::SizeMismatch:
+    return op.emitError("expected result type to be ")
+           << expectedType
+           << " or a rank-reduced version. (mismatch of result sizes) "
+           << errMsg;
+  case SliceVerificationResult::ElemTypeMismatch:
+    return op.emitError("expected result element type to be ")
+           << memrefType.getElementType() << errMsg;
+  }
+  llvm_unreachable("unexpected extract_slice op verification result");
+}
+
+/// Verifier for ExtractSliceOp.
+static LogicalResult verify(ExtractSliceOp op) {
+  // Verify result type against inferred type.
+  auto expectedType = ExtractSliceOp::inferResultType(
+      op.getSourceType(), extractFromI64ArrayAttr(op.static_offsets()),
+      extractFromI64ArrayAttr(op.static_sizes()),
+      extractFromI64ArrayAttr(op.static_strides()));
+  auto result = isRankReducedType(expectedType, op.getType());
+  return produceSliceErrorMsg(result, op, expectedType);
+}
+
+/// Infer the canonical type of the result of an extract_slice op. Returns a
+/// type with rank `resultRank` that is either the rank of the rank-reduced
+/// type, or the non-rank-reduced type.
+static RankedTensorType
+getCanonicalSliceResultType(unsigned resultRank, RankedTensorType sourceType,
+                            ArrayRef<OpFoldResult> mixedOffsets,
+                            ArrayRef<OpFoldResult> mixedSizes,
+                            ArrayRef<OpFoldResult> mixedStrides) {
+  auto resultType =
+      ExtractSliceOp::inferRankReducedResultType(
+          resultRank, sourceType, mixedOffsets, mixedSizes, mixedStrides)
+          .cast<RankedTensorType>();
+  if (resultType.getRank() != resultRank) {
+    resultType = ExtractSliceOp::inferResultType(sourceType, mixedOffsets,
+                                                 mixedSizes, mixedStrides)
+                     .cast<RankedTensorType>();
+  }
+  return resultType;
+}
+
+namespace {
+/// Pattern to rewrite an extract_slice op with tensor::Cast arguments.
+/// This essentially pushes memref_cast past its consuming slice when
+/// `canFoldIntoConsumerOp` is true.
+///
+/// Example:
+/// ```
+///   %0 = tensor.cast %V : tensor<16x16xf32> to tensor<?x?xf32>
+///   %1 = tensor.extract_slice %0[0, 0][3, 4][1, 1] : tensor<?x?xf32> to
+///   tensor<3x4xf32>
+/// ```
+/// is rewritten into:
+/// ```
+///   %0 = tensor.extract_slice %V[0, 0][3, 4][1, 1] : tensor<16x16xf32> to
+///   tensor<3x4xf32> %1 = tensor.cast %0: tensor<3x4xf32> to tensor<3x4xf32>
+/// ```
+class ExtractSliceOpCastFolder final : public OpRewritePattern<ExtractSliceOp> {
+public:
+  using OpRewritePattern<ExtractSliceOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(ExtractSliceOp sliceOp,
+                                PatternRewriter &rewriter) const override {
+    // Any constant operand, just return to let SubViewOpConstantFolder kick in.
+    if (llvm::any_of(sliceOp.getOperands(), [](Value operand) {
+          return matchPattern(operand, matchConstantIndex());
+        }))
+      return failure();
+
+    auto castOp = sliceOp.source().getDefiningOp<tensor::CastOp>();
+    if (!castOp)
+      return failure();
+
+    if (!canFoldIntoConsumerOp(castOp))
+      return failure();
+
+    /// Deduce the type of the result to use for the canonicalized operation.
+    RankedTensorType resultType = getCanonicalSliceResultType(
+        sliceOp.getType().getRank(), sliceOp.getSourceType(),
+        sliceOp.getMixedOffsets(), sliceOp.getMixedSizes(),
+        sliceOp.getMixedStrides());
+    Value newSlice = rewriter.create<ExtractSliceOp>(
+        sliceOp.getLoc(), resultType, castOp.source(), sliceOp.offsets(),
+        sliceOp.sizes(), sliceOp.strides(), sliceOp.static_offsets(),
+        sliceOp.static_sizes(), sliceOp.static_strides());
+    rewriter.replaceOpWithNewOp<tensor::CastOp>(sliceOp, sliceOp.getType(),
+                                                newSlice);
+    return success();
+  }
+};
+} // namespace
+
+/// Return the canonical type of the result of an extract_slice op.
+struct SliceReturnTypeCanonicalizer {
+  RankedTensorType operator()(ExtractSliceOp op,
+                              ArrayRef<OpFoldResult> mixedOffsets,
+                              ArrayRef<OpFoldResult> mixedSizes,
+                              ArrayRef<OpFoldResult> mixedStrides) {
+    return getCanonicalSliceResultType(op.getType().getRank(),
+                                       op.getSourceType(), mixedOffsets,
+                                       mixedSizes, mixedStrides);
+  }
+};
+
+/// A canonicalizer wrapper to replace ExtractSliceOps.
+struct SliceCanonicalizer {
+  void operator()(PatternRewriter &rewriter, ExtractSliceOp op,
+                  ExtractSliceOp newOp) {
+    Value replacement = newOp.getResult();
+    if (replacement.getType() != op.getType())
+      replacement = rewriter.create<tensor::CastOp>(op.getLoc(), op.getType(),
+                                                    replacement);
+    rewriter.replaceOp(op, replacement);
+  }
+};
+
+void ExtractSliceOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                 MLIRContext *context) {
+  results.add<
+      OpWithOffsetSizesAndStridesConstantArgumentFolder<
+          ExtractSliceOp, SliceReturnTypeCanonicalizer, SliceCanonicalizer>,
+      ExtractSliceOpCastFolder>(context);
+}
+
+//
+static LogicalResult
+foldIdentityOffsetSizeAndStrideOpInterface(OffsetSizeAndStrideOpInterface op,
+                                           ShapedType shapedType) {
+  OpBuilder b(op.getContext());
+  for (OpFoldResult ofr : op.getMixedOffsets())
+    if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(0)))
+      return failure();
+  // Rank-reducing noops only need to inspect the leading dimensions: llvm::zip
+  // is appropriate.
+  auto shape = shapedType.getShape();
+  for (auto it : llvm::zip(op.getMixedSizes(), shape))
+    if (!isEqualConstantIntOrValue(std::get<0>(it),
+                                   b.getIndexAttr(std::get<1>(it))))
+      return failure();
+  for (OpFoldResult ofr : op.getMixedStrides())
+    if (!isEqualConstantIntOrValue(ofr, b.getIndexAttr(1)))
+      return failure();
+  return success();
+}
+
+OpFoldResult ExtractSliceOp::fold(ArrayRef<Attribute>) {
+  if (getSourceType() == getType() &&
+      succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType())))
+    return this->source();
+  return OpFoldResult();
+}
+
+//===----------------------------------------------------------------------===//
+// InsertSliceOp
+//===----------------------------------------------------------------------===//
+
+// Build a InsertSliceOp with mixed static and dynamic entries.
+void InsertSliceOp::build(OpBuilder &b, OperationState &result, Value source,
+                          Value dest, ArrayRef<OpFoldResult> offsets,
+                          ArrayRef<OpFoldResult> sizes,
+                          ArrayRef<OpFoldResult> strides,
+                          ArrayRef<NamedAttribute> attrs) {
+  SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
+  SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
+  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets,
+                             ShapedType::kDynamicStrideOrOffset);
+  dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides,
+                             ShapedType::kDynamicStrideOrOffset);
+  build(b, result, dest.getType(), source, dest, dynamicOffsets, dynamicSizes,
+        dynamicStrides, b.getI64ArrayAttr(staticOffsets),
+        b.getI64ArrayAttr(staticSizes), b.getI64ArrayAttr(staticStrides));
+  result.addAttributes(attrs);
+}
+
+// Build a InsertSliceOp with dynamic entries.
+void InsertSliceOp::build(OpBuilder &b, OperationState &result, Value source,
+                          Value dest, ValueRange offsets, ValueRange sizes,
+                          ValueRange strides, ArrayRef<NamedAttribute> attrs) {
+  SmallVector<OpFoldResult> offsetValues = llvm::to_vector<4>(
+      llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
+  SmallVector<OpFoldResult> sizeValues = llvm::to_vector<4>(
+      llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
+  SmallVector<OpFoldResult> strideValues = llvm::to_vector<4>(
+      llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
+  build(b, result, source, dest, offsetValues, sizeValues, strideValues);
+}
+
+OpFoldResult InsertSliceOp::fold(ArrayRef<Attribute>) {
+  if (getSourceType().hasStaticShape() && getType().hasStaticShape() &&
+      getSourceType() == getType() &&
+      succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType())))
+    return this->source();
+  return OpFoldResult();
+}
+
+namespace {
+/// Pattern to rewrite a insert_slice op with constant arguments.
+class InsertSliceOpConstantArgumentFolder final
+    : public OpRewritePattern<InsertSliceOp> {
+public:
+  using OpRewritePattern<InsertSliceOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(InsertSliceOp insertSliceOp,
+                                PatternRewriter &rewriter) const override {
+    // No constant operand, just return.
+    if (llvm::none_of(insertSliceOp.getOperands(), [](Value operand) {
+          return matchPattern(operand, matchConstantIndex());
+        }))
+      return failure();
+
+    // At least one of offsets/sizes/strides is a new constant.
+    // Form the new list of operands and constant attributes from the
+    // existing.
+    SmallVector<OpFoldResult> mixedOffsets(insertSliceOp.getMixedOffsets());
+    SmallVector<OpFoldResult> mixedSizes(insertSliceOp.getMixedSizes());
+    SmallVector<OpFoldResult> mixedStrides(insertSliceOp.getMixedStrides());
+    canonicalizeSubViewPart(mixedOffsets, ShapedType::isDynamicStrideOrOffset);
+    canonicalizeSubViewPart(mixedSizes, ShapedType::isDynamic);
+    canonicalizeSubViewPart(mixedStrides, ShapedType::isDynamicStrideOrOffset);
+
+    // Create the new op in canonical form.
+    rewriter.replaceOpWithNewOp<InsertSliceOp>(
+        insertSliceOp, insertSliceOp.source(), insertSliceOp.dest(),
+        mixedOffsets, mixedSizes, mixedStrides);
+    return success();
+  }
+};
+
+/// Fold tensor_casts with insert_slice operations.
+struct InsertSliceOpCastFolder final : public OpRewritePattern<InsertSliceOp> {
+  using OpRewritePattern<InsertSliceOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(InsertSliceOp insertSliceOp,
+                                PatternRewriter &rewriter) const override {
+    if (llvm::any_of(insertSliceOp.getOperands(), [](Value operand) {
+          return matchPattern(operand, matchConstantIndex());
+        }))
+      return failure();
+
+    auto getSourceOfCastOp = [](Value v) -> Optional<Value> {
+      auto castOp = v.getDefiningOp<tensor::CastOp>();
+      if (!castOp || !canFoldIntoConsumerOp(castOp))
+        return llvm::None;
+      return castOp.source();
+    };
+    Optional<Value> sourceCastSource =
+        getSourceOfCastOp(insertSliceOp.source());
+    Optional<Value> destCastSource = getSourceOfCastOp(insertSliceOp.dest());
+    if (!sourceCastSource && !destCastSource)
+      return failure();
+
+    Value replacement = rewriter.create<InsertSliceOp>(
+        insertSliceOp.getLoc(),
+        (sourceCastSource ? *sourceCastSource : insertSliceOp.source()),
+        (destCastSource ? *destCastSource : insertSliceOp.dest()),
+        insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(),
+        insertSliceOp.getMixedStrides());
+
+    if (replacement.getType() != insertSliceOp.getType()) {
+      replacement = rewriter.create<tensor::CastOp>(
+          insertSliceOp.getLoc(), insertSliceOp.getType(), replacement);
+    }
+    rewriter.replaceOp(insertSliceOp, replacement);
+    return success();
+  }
+};
+} // namespace
+
+void InsertSliceOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                MLIRContext *context) {
+  results.add<InsertSliceOpConstantArgumentFolder, InsertSliceOpCastFolder>(
+      context);
+}
+
 //===----------------------------------------------------------------------===//
 // TableGen'd op method definitions
 //===----------------------------------------------------------------------===//
--- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
+++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
@ -616,9 +616,9 @@ func @split_at(%shape: tensor<?xindex>, %index: index) -> (tensor<?xindex>, tens
  // CHECK-NEXT: %[[ISNEG:.*]] = cmpi slt, %[[INDEX]], %[[C0]] : index
  // CHECK-NEXT: %[[SELECT:.*]] = select %[[ISNEG]], %[[POSINDEX]], %[[INDEX]] : index
  // CHECK-NEXT: %[[C1:.*]] = constant 1 : index
-  // CHECK-NEXT: %[[HEAD:.*]] = subtensor %[[SHAPE]][%[[C0]]] [%[[SELECT]]] [%[[C1]]] : tensor<?xindex> to tensor<?xindex>
+  // CHECK-NEXT: %[[HEAD:.*]] = tensor.extract_slice %[[SHAPE]][%[[C0]]] [%[[SELECT]]] [%[[C1]]] : tensor<?xindex> to tensor<?xindex>
  // CHECK-NEXT: %[[TAIL_SIZE:.*]] = subi %[[RANK]], %[[SELECT]] : index
-  // CHECK-NEXT: %[[TAIL:.*]] = subtensor %[[SHAPE]][%[[SELECT]]] [%[[TAIL_SIZE]]] [%[[C1]]] : tensor<?xindex> to tensor<?xindex>
+  // CHECK-NEXT: %[[TAIL:.*]] = tensor.extract_slice %[[SHAPE]][%[[SELECT]]] [%[[TAIL_SIZE]]] [%[[C1]]] : tensor<?xindex> to tensor<?xindex>
  // CHECK-NEXT: return %[[HEAD]], %[[TAIL]] : tensor<?xindex>, tensor<?xindex>
  %head, %tail = "shape.split_at"(%shape, %index) : (tensor<?xindex>, index) -> (tensor<?xindex>, tensor<?xindex>)
  return %head, %tail : tensor<?xindex>, tensor<?xindex>
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@ -679,10 +679,10 @@ func @concat(%arg0: tensor<5x1xf32>, %arg1: tensor<6x1xf32>) -> () {
  // CHECK: [[CST:%.+]] = constant 0.0
  // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST]])
  // CHECK: [[ARG0_DIM0:%.+]] = memref.dim %arg0, [[AXIS]]
-  // CHECK: [[INSERT0:%.+]] = subtensor_insert %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
+  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
  // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM0]]
  // CHECK: [[ARG1_DIM0:%.+]] = memref.dim %arg1, [[AXIS]]
-  // CHECK: [[INSERT1:%.+]] = subtensor_insert %arg1 into [[INSERT0]]{{\[}}[[NEW_OFFSET]], [[OFFSET]]] {{\[}}[[ARG1_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
+  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg1 into [[INSERT0]]{{\[}}[[NEW_OFFSET]], [[OFFSET]]] {{\[}}[[ARG1_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
  %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x1xf32>, tensor<6x1xf32>)  -> (tensor<11x1xf32>)

  // CHECK: [[AXIS:%.+]] = constant 1
@ -698,10 +698,10 @@ func @concat(%arg0: tensor<5x1xf32>, %arg1: tensor<6x1xf32>) -> () {
  // CHECK: [[CST:%.+]] = constant 0.0
  // CHECK: [[FILL:%.+]] = linalg.fill([[INIT]], [[CST]])
  // CHECK: [[ARG0_DIM1:%.+]] = memref.dim %arg0, [[AXIS]]
-  // CHECK: [[INSERT0:%.+]] = subtensor_insert %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
+  // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %arg0 into [[FILL]]{{\[}}[[OFFSET]], [[OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG0_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
  // CHECK: [[NEW_OFFSET:%.+]] = addi [[OFFSET]], [[ARG0_DIM1]]
  // CHECK: [[ARG1_DIM1:%.+]] = memref.dim %arg0, [[AXIS]]
-  // CHECK: [[INSERT1:%.+]] = subtensor_insert %arg0 into [[INSERT0]]{{\[}}[[OFFSET]], [[NEW_OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG1_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
+  // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %arg0 into [[INSERT0]]{{\[}}[[OFFSET]], [[NEW_OFFSET]]] {{\[}}[[ARG0_DIM0]], [[ARG1_DIM1]]] {{\[}}[[STRIDE]], [[STRIDE]]]
  %1 = "tosa.concat"(%arg0, %arg0) { axis = 1 : i64} : (tensor<5x1xf32>, tensor<5x1xf32>)  -> (tensor<5x2xf32>)
  return
 }
--- a/mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir
+++ b/mlir/test/Conversion/TosaToStandard/tosa-to-standard.mlir
@ -12,7 +12,7 @@ func @const_test() -> (tensor<i32>) {
 // -----

 func @slice(%arg0: tensor<6xf32>) ->() {
-  // CHECK: [[SLICE:%.+]] = subtensor %arg0[2] [1] [1]
+  // CHECK: [[SLICE:%.+]] = tensor.extract_slice %arg0[2] [1] [1]
  %0 = "tosa.slice"(%arg0) {start = [2], size = [1]} : (tensor<6xf32>)  -> (tensor<1xf32>)
  return
 }
--- a/mlir/test/Dialect/Linalg/bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/bufferize.mlir
@ -166,9 +166,9 @@ func @generic_with_init_tensor(%arg0: tensor<2x3x4xvector<3x4xi4>>,

 func private @make_index() -> index

-// CHECK-LABEL: func @bufferize_subtensor(
+// CHECK-LABEL: func @bufferize_slice(
 //  CHECK-SAME:   %[[T:[0-9a-z]*]]: tensor<?x?xf32>
-func @bufferize_subtensor(%t : tensor<?x?xf32>) -> (tensor<2x3xf32>, tensor<2x?xf32>) {
+func @bufferize_slice(%t : tensor<?x?xf32>) -> (tensor<2x3xf32>, tensor<2x?xf32>) {
  //      CHECK: %[[IDX:.*]] = call @make_index() : () -> index
  %i0 = call @make_index() : () -> index

@ -178,14 +178,14 @@ func @bufferize_subtensor(%t : tensor<?x?xf32>) -> (tensor<2x3xf32>, tensor<2x?x
  // CHECK-SAME:   memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
  // CHECK-NEXT: linalg.copy(%[[SM0]], %[[A0]]) : memref<2x3xf32, #[[$MAP0]]>, memref<2x3xf32>
  // CHECK-NEXT: %[[RT0:.*]] = memref.tensor_load %[[A0]] : memref<2x3xf32>
-  %st0 = subtensor %t[0, 0][2, 3][1, 1] : tensor<?x?xf32> to tensor<2x3xf32>
+  %st0 = tensor.extract_slice %t[0, 0][2, 3][1, 1] : tensor<?x?xf32> to tensor<2x3xf32>

  // CHECK-NEXT: %[[A1:.*]] = memref.alloc(%[[IDX]]) : memref<2x?xf32>
  // CHECK-NEXT: %[[SM1:.*]] = memref.subview %[[M]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
  // CHECK-SAME:   memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
  // CHECK-NEXT: linalg.copy(%[[SM1]], %[[A1]]) : memref<2x?xf32, #[[$MAP1]]>, memref<2x?xf32>
  // CHECK-NEXT: %[[RT1:.*]] = memref.tensor_load %[[A1]] : memref<2x?xf32>
-  %st1 = subtensor %t[0, %i0][2, %i0][1, 2] : tensor<?x?xf32> to tensor<2x?xf32>
+  %st1 = tensor.extract_slice %t[0, %i0][2, %i0][1, 2] : tensor<?x?xf32> to tensor<2x?xf32>

  // CHECK-NEXT: return %[[RT0]], %[[RT1]]
  return %st0, %st1 : tensor<2x3xf32>, tensor<2x?xf32>
@ -198,11 +198,11 @@ func @bufferize_subtensor(%t : tensor<?x?xf32>) -> (tensor<2x3xf32>, tensor<2x?x

 func private @make_index() -> index

-// CHECK-LABEL: func @bufferize_subtensor_insert(
+// CHECK-LABEL: func @bufferize_insert_slice(
 //  CHECK-SAME:   %[[T:[0-9a-z]*]]: tensor<?x?xf32>
 //  CHECK-SAME:   %[[ST0:[0-9a-z]*]]: tensor<2x3xf32>
 //  CHECK-SAME:   %[[ST1:[0-9a-z]*]]: tensor<2x?xf32>
-func @bufferize_subtensor_insert(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %st1 : tensor<2x?xf32>) ->
+func @bufferize_insert_slice(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %st1 : tensor<2x?xf32>) ->
    (tensor<?x?xf32>, tensor<?x?xf32>) {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
@ -222,7 +222,7 @@ func @bufferize_subtensor_insert(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %
  // CHECK-SAME:   memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
  // CHECK-NEXT: linalg.copy(%[[SM0]], %[[SUBVIEW0]]) : memref<2x3xf32>, memref<2x3xf32, #[[$MAP0]]>
  // CHECK-NEXT: %[[RT0:.*]] = memref.tensor_load %[[M_COPY0]] : memref<?x?xf32>
-  %t0 = subtensor_insert %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor<?x?xf32>
+  %t0 = tensor.insert_slice %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor<?x?xf32>

  //  CHECK-DAG: %[[SM1:.*]] = memref.buffer_cast %[[ST1]] : memref<2x?xf32>
  // CHECK-NEXT: %[[M_COPY1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
@ -231,7 +231,7 @@ func @bufferize_subtensor_insert(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %
  // CHECK-SAME:   memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
  // CHECK-NEXT: linalg.copy(%[[SM1]], %[[SUBVIEW1]]) : memref<2x?xf32>, memref<2x?xf32, #[[$MAP1]]>
  // CHECK-NEXT: %[[RT1:.*]] = memref.tensor_load %[[M_COPY1]] : memref<?x?xf32>
-  %t1 = subtensor_insert %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor<?x?xf32>
+  %t1 = tensor.insert_slice %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor<?x?xf32>

  //     CHECK: return %[[RT0]], %[[RT1]]
  return %t0, %t1: tensor<?x?xf32>, tensor<?x?xf32>
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@ -648,15 +648,15 @@ func @keep_not_noop(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>)

 // -----

-func @fold_init_tensor_with_subtensor
+func @fold_init_tensor_with_slice
  (%arg0 : index, %arg1 : index) -> tensor<5x?x20xf32>
 {
  %0 = linalg.init_tensor[%arg0, 10, 40] : tensor<?x10x40xf32>
-  %1 = subtensor %0[0, 0, 0] [5, %arg1, 20] [1, 1, 1]
+  %1 = tensor.extract_slice %0[0, 0, 0] [5, %arg1, 20] [1, 1, 1]
    : tensor<?x10x40xf32> to tensor<5x?x20xf32>
  return %1 : tensor<5x?x20xf32>
 }
-//      CHECK: func @fold_init_tensor_with_subtensor
+//      CHECK: func @fold_init_tensor_with_slice
 // CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //      CHECK:   %[[T0:.+]] = linalg.init_tensor [5, %[[ARG1]], 20]
@ -723,13 +723,13 @@ func @propogate_casts(%arg0 : tensor<?x?xf32>, %arg1 : f32, %arg2 : index,
  %1 = linalg.fill(%0, %arg1) : tensor<?x?xf32>, f32 -> tensor<?x?xf32>
  %2 = memref.dim %arg0, %c0 : tensor<?x?xf32>
  %3 = memref.dim %arg0, %c1 : tensor<?x?xf32>
-  %4 = subtensor_insert %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+  %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
  return %4 : tensor<?x?xf32>
 }
 // CHECK-LABEL: func @propogate_casts
 //       CHECK:   %[[INIT:.+]] = linalg.init_tensor [21, 42]
 //       CHECK:   %[[FILL:.+]] = linalg.fill(%[[INIT]], %{{.+}})
-//       CHECK:   %[[INSERTED:.+]] = subtensor_insert %{{.+}} into %[[FILL]]
+//       CHECK:   %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]]
 //       CHECK:   %[[RESULT:.+]] = tensor.cast %[[INSERTED]]
 //       CHECK:   return %[[RESULT]]

--- a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize-analysis.mlir
@ -6,43 +6,43 @@

 // -----

-// CHECK-LABEL: func @subtensor_fun
-func @subtensor_fun(%A : tensor<?xf32>, %B : tensor<?xf32> {linalg.inplaceable = true})
+// CHECK-LABEL: func @extract_slice_fun
+func @extract_slice_fun(%A : tensor<?xf32>, %B : tensor<?xf32> {linalg.inplaceable = true})
  -> (tensor<4xf32>, tensor<8xf32>)
 {
-  // subtensor is not used in a write, it is not compelled to bufferize out of
-  // place. Let callers decide whether they want to create aliasing subviews at
-  // all call sites or whether they allocate.
+  // tensor.extract_slice is not used in a write, it is not compelled to
+  // bufferize out of place. Let callers decide whether they want to create
+  // aliasing subviews at all call sites or whether they allocate.
  // This is true irrespective of whether the function argument is inplaceable.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r0 = subtensor %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>

-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor %B[0][8][1] : tensor<?xf32> to tensor<8xf32>
+  %r1 = tensor.extract_slice %B[0][8][1] : tensor<?xf32> to tensor<8xf32>

  return %r0, %r1: tensor<4xf32>, tensor<8xf32>
 }

 // -----

-// CHECK-LABEL: func @subtensor_insert_fun
-func @subtensor_insert_fun(
+// CHECK-LABEL: func @insert_slice_fun
+func @insert_slice_fun(
    %A : tensor<?xf32>,
    %B : tensor<?xf32> {linalg.inplaceable = true},
    %C : tensor<4xf32>)
  -> (tensor<?xf32>, tensor<?xf32>)
 {
  // must bufferize out of place.
-  //     CHECK: subtensor_insert
+  //     CHECK: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %r0 = subtensor_insert %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>

  // bufferizes inplace.
-  //     CHECK: subtensor_insert
+  //     CHECK: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor_insert %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>

  return %r0, %r1: tensor<?xf32>, tensor<?xf32>
 }
@ -85,34 +85,34 @@ func @conflict_on_B(

 // -----

-// CHECK-LABEL: func @subtensor_subtensor
-func @subtensor_subtensor(
+// CHECK-LABEL: func @extract_slice_extract_slice
+func @extract_slice_extract_slice(
    %A : tensor<?xf32> {linalg.inplaceable = true}, %B : tensor<?xf32>)
  -> (tensor<2xf32>, tensor<2xf32>)
 {
-  // subtensor is not used in a write, it is not compelled to bufferize out of
-  // place. Let callers decide whether they want to create aliasing subviews at
-  // all call sites or whether they allocate.
+  // tensor.extract_slice is not used in a write, it is not compelled to
+  // bufferize out of place. Let callers decide whether they want to create
+  // aliasing subviews at all call sites or whether they allocate.
  // This is true irrespective of whether the function argument is inplaceable.
  // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r0 = subtensor %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>

  // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor %r0[0][2][1] : tensor<4xf32> to tensor<2xf32>
+  %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32>

  // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r2 = subtensor %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>

  // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r3 = subtensor %r2[0][2][1] : tensor<4xf32> to tensor<2xf32>
+  %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32>

  return %r1, %r3: tensor<2xf32>, tensor<2xf32>
 }

 // -----

-// CHECK-LABEL: func @subtensor_insert_subtensor_insert
-func @subtensor_insert_subtensor_insert(
+// CHECK-LABEL: func @insert_slice_insert_slice
+func @insert_slice_insert_slice(
    %A : tensor<?xf32> {linalg.inplaceable = true},
    %A2 : tensor<4xf32> {linalg.inplaceable = true},
    %A3 : tensor<2xf32> {linalg.inplaceable = true},
@ -120,102 +120,106 @@ func @subtensor_insert_subtensor_insert(
  -> (tensor<?xf32>, tensor<?xf32>)
 {
  // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r0 = subtensor_insert %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32>
+  %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32>

  // CHECK: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor_insert %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>

  // CHECK: {__inplace_results_attr__ = ["false"]}
-  %r2 = subtensor_insert %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32>
+  %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32>

  // CHECK: {__inplace_results_attr__ = ["false"]}
-  %r3 = subtensor_insert %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>

  return %r1, %r3: tensor<?xf32>, tensor<?xf32>
 }

 // -----

-// CHECK-LABEL: func @subtensor_nonmatching_subtensor_insert
-func @subtensor_nonmatching_subtensor_insert(
+// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice
+func @extract_slice_nonmatching_insert_slice(
    %A : tensor<?xf32> {linalg.inplaceable = true},
    %B : tensor<?xf32>, %idx: index)
  -> (tensor<?xf32>, tensor<?xf32>)
 {
  // %r1 bufferizes inplace because %A is inplaceable.
-  // %r0 is an overlapping subtensor that does not match, it must be out of place.
-  //      CHECK: subtensor
+  // %r0 is an overlapping tensor.extract_slice that does not match, it must be
+  // out of place.
+  //      CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %r0 = subtensor %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>

  // %r1 can bufferize inplace fine.
-  //      CHECK: subtensor_insert
+  //      CHECK: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor_insert %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32>

  // %r3 does bufferizes inplace because %B is not inplaceable.
-  // %r0 is an overlapping subtensor that does not match, but does not alias with
-  // the buffer coming from %r3 so it can actually bufferize inplace.
-  //      CHECK: subtensor
+  // %r0 is an overlapping tensor.extract_slice that does not match, but does
+  // not alias with the buffer coming from %r3 so it can actually bufferize
+  // inplace.
+  //      CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r2 = subtensor %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>

  // %r3 cannot bufferize inplace since %B is not inplaceable.
-  //      CHECK: subtensor_insert
+  //      CHECK: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %r3 = subtensor_insert %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32>

  return %r1, %r3: tensor<?xf32>, tensor<?xf32>
 }

 // -----

-// CHECK-LABEL: func @subtensor_matching_subtensor_insert
-func @subtensor_matching_subtensor_insert(
+// CHECK-LABEL: func @extract_slice_matching_insert_slice
+func @extract_slice_matching_insert_slice(
    %A : tensor<?xf32> {linalg.inplaceable = true},
    %B : tensor<?xf32>)
  -> (tensor<?xf32>, tensor<?xf32>)
 {
  // %r1 bufferizes inplace because %A is inplaceable.
-  // %r0 is a subtensor that matches, it can also be bufferized inplace.
-  //      CHECK: subtensor
+  // %r0 is a tensor.extract_slice that matches, it can also be bufferized
+  // inplace.
+  //      CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r0 = subtensor %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>

-  //      CHECK: subtensor_insert
+  //      CHECK: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r1 = subtensor_insert %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>

-  // %r2 is a subtensor that matches %r3, it can be bufferized inplace.
-  //      CHECK: subtensor
+  // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized
+  // inplace.
+  //      CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %r2 = subtensor %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>

-  // subtensor_insert cannot bufferize inplace.
+  // tensor.insert_slice cannot bufferize inplace.
  // This should have been captured by a canonicalization pattern and it would
  // be unproductive to have special logic in bufferization to encode matching
-  // subtensor_insert(subtensor(A), A).
-  //      CHECK: subtensor_insert
+  // insert_slice(extract_slice(A), A).
+  //      CHECK: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %r3 = subtensor_insert %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>

  return %r1, %r3: tensor<?xf32>, tensor<?xf32>
 }

 // -----

-// CHECK-LABEL: func @subtensor_linalg_readonly_use
-func @subtensor_linalg_readonly_use(
+// CHECK-LABEL: func @extract_slice_linalg_readonly_use
+func @extract_slice_linalg_readonly_use(
    %A : tensor<?x?xf32>,
    %B : tensor<4x4xf32>,
    %C : tensor<4x4xf32> {linalg.inplaceable = true})
  ->  (tensor<4x4xf32>, tensor<4x4xf32>)
 {
-  // subtensor is only used as a read, no interference irrespective of user's
-  // inplace status.
-  //     CHECK: subtensor
+  // tensor.extract_slice is only used as a read, no interference irrespective
+  // of user's inplace status.
+  //     CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sA = subtensor %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>

  // matmul output operand is not inplaceable at the function boundary.
  //     CHECK: linalg.matmul
@ -236,8 +240,8 @@ func @subtensor_linalg_readonly_use(

 // -----

-// CHECK-LABEL: func @subtensor_to_linalg_write_use
-func @subtensor_to_linalg_write_use(
+// CHECK-LABEL: func @extract_slice_to_linalg_write_use
+func @extract_slice_to_linalg_write_use(
    %A : tensor<4x4xf32>,
    %B : tensor<?x?xf32>,
    %C : tensor<?x?xf32> {linalg.inplaceable = true})
@ -245,9 +249,9 @@ func @subtensor_to_linalg_write_use(
 {
  // Step 3. %sB forward propagates to a write in %D but it is not inplace.
  // So this is only ever read and can bufferize inplace.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sB = subtensor %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>

  // Step 2. %sB has a read interference in %E, it does not bufferize inplace.
  //     CHECK: linalg.matmul
@ -259,12 +263,12 @@ func @subtensor_to_linalg_write_use(
  // Step 4. %sC forward propagates to an inplace write in %E.
  // %sC backward propagates to %C which is inplaceable.
  // As a consequence this is bufferized inplace.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sC = subtensor %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>

-  // Step 1. %sC backprops to the subtensor producer which is not considered an
-  // interference. This bufferizes inplace.
+  // Step 1. %sC backprops to the tensor.extract_slice producer which is not
+  // considered an interference. This bufferizes inplace.
  //     CHECK: linalg.matmul
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
  %E = linalg.matmul  ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>)
@ -280,8 +284,8 @@ func @subtensor_to_linalg_write_use(

 // -----

-// CHECK-LABEL: func @subtensor_to_linalg_write_use
-func @subtensor_to_linalg_write_use(
+// CHECK-LABEL: func @extract_slice_to_linalg_write_use
+func @extract_slice_to_linalg_write_use(
    %A : tensor<4x4xf32>,
    %B : tensor<?x?xf32>,
    %C : tensor<?x?xf32> {linalg.inplaceable = true})
@ -290,12 +294,12 @@ func @subtensor_to_linalg_write_use(
  // Step 4. %sB forward propagates to an inplace write in %D.
  // %sB backward propagates to %B which is not inplaceable.
  // As a consequence this is bufferized out of place.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %sB = subtensor %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>

-  // Step 1. %sB backprops to the subtensor producer which is not considered an
-  // interference. This bufferizes inplace.
+  // Step 1. %sB backprops to the tensor.extract_slice producer which is not
+  // considered an interference. This bufferizes inplace.
  //     CHECK: linalg.matmul
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
  %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
@ -305,12 +309,12 @@ func @subtensor_to_linalg_write_use(
  // Step 3. %sC forward propagates to an inplace write in %E.
  // %sC backward propagates to %C which is inplaceable.
  // As a consequence this is bufferized inplace.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sC = subtensor %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>

-  // Step 1. %sC backprops to the subtensor producer which is not considered an
-  // interference. This bufferizes inplace.
+  // Step 1. %sC backprops to the tensor.extract_slice producer which is not
+  // considered an interference. This bufferizes inplace.
  //     CHECK: linalg.matmul
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
  %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
@ -322,8 +326,8 @@ func @subtensor_to_linalg_write_use(

 // -----

-// CHECK-LABEL: func @nested_subtensor_and_insert
-func @nested_subtensor_and_insert(
+// CHECK-LABEL: func @nested_extract_slice_and_insert
+func @nested_extract_slice_and_insert(
    %A : tensor<?x?xf32>,
    %B : tensor<?x?xf32> {linalg.inplaceable = true},
    %C : tensor<?x?xf32> {linalg.inplaceable = true},
@ -332,75 +336,78 @@ func @nested_subtensor_and_insert(
 {
  %f0 = constant 0.0 : f32

-  // 2-level matching subtensor / subtensor_insert into non inplaceable %A.
+  // 2-level matching tensor.extract_slice / tensor.insert_slice into non
+  // inplaceable %A.
  //   - %rA is not inplaceable because %A is not inplaceable at function boundary.
  //   - once %rA is deemed not inplaceable, nothing prevent %rsA to be inplaceable
  //   - this propagates to %FA and %ssA being inplaceable.
  //   - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not
  //     inplaceable and so %sA is not inplaceable.
-  //     CHECK: subtensor
+  //     CHECK: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  // CHECK-NEXT: subtensor
+  // CHECK-NEXT: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
  // CHECK-NEXT: fill
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  %sA = subtensor %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-  %ssA = subtensor %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+  %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
  %FA = linalg.fill(%ssA, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32>
-  %rsA = subtensor_insert %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
-  %rA = subtensor_insert %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+  %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
+  %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>

-  // 3-level matching subtensor / subtensor_insert into inplaceable %B.
-  // CHECK-NEXT: subtensor
+  // 3-level matching tensor.extract_slice / tensor.insert_slice into
+  // inplaceable %B.
+  // CHECK-NEXT: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor
-  // Atm, this 2nd subtensor fails to bufferize inplace because clobbering
-  // analysis conservatively test for equivalent buffers.
+  // CHECK-NEXT: tensor.extract_slice
+  // Atm, this 2nd tensor.extract_slice fails to bufferize inplace because
+  // clobbering analysis conservatively test for equivalent buffers.
  // TODO: This is currently too restrictive and misses clobberings.
  // When available, use container-containee analysis.
  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
-  // CHECK-NEXT: subtensor
+  // CHECK-NEXT: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
  // CHECK-NEXT: fill
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sB = subtensor %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-  %ssB = subtensor %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
-  %sssB = subtensor %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32>
+  %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+  %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
+  %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32>
  %FB = linalg.fill(%sssB, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32>
-  %rssB = subtensor_insert %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32>
-  %rsB = subtensor_insert %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32>
-  %rB = subtensor_insert %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+  %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32>
+  %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32>
+  %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>

-  // 2-level matching subtensor / subtensor_insert into inplaceable %C with a twist.
+  // 2-level matching tensor.extract_slice / tensor.insert_slice into
+  // inplaceable %C with a twist.
  // Throw a wrench in the system: %rsC production sizes do not match %ssC.
-  // CHECK-NEXT: subtensor
+  // CHECK-NEXT: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // The subtensor_insert that would be candidate for matching does not actually
-  // match. That subtensor_insert can still be bufferized inplace nonetheless
-  // but this subtensor, which bufferizes to an inplace write, cannot.
-  // CHECK-NEXT: subtensor
+  // The tensor.insert_slice that would be candidate for matching does not actually
+  // match. That tensor.insert_slice can still be bufferized inplace nonetheless
+  // but this tensor.extract_slice, which bufferizes to an inplace write, cannot.
+  // CHECK-NEXT: tensor.extract_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["false"]}
  // CHECK-NEXT: fill
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  // CHECK-NEXT: subtensor_insert
+  // CHECK-NEXT: tensor.insert_slice
  // CHECK-SAME: {__inplace_results_attr__ = ["true"]}
-  %sC = subtensor %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-  %ssC = subtensor %sC[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
+  %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+  %ssC = tensor.extract_slice %sC[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
  %FC = linalg.fill(%ssC, %f0) : tensor<4x4xf32>, f32 -> tensor<4x4xf32>
-  %rsC = subtensor_insert %FC into %sC[0, 0][12345, 67890][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
-  %rC = subtensor_insert %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+  %rsC = tensor.insert_slice %FC into %sC[0, 0][12345, 67890][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
+  %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>

  return %rA, %rB, %rC: tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
 }
--- a/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-func-bufferize.mlir
@ -118,8 +118,8 @@ func @vec_not_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %vec : vec

 // -----

-// CHECK-LABEL: func @subtensor_insert_fun
-func @subtensor_insert_fun(%A0 : tensor<?xf32>, %A1 : tensor<?xf32> {linalg.inplaceable = true},
+// CHECK-LABEL: func @insert_slice_fun
+func @insert_slice_fun(%A0 : tensor<?xf32>, %A1 : tensor<?xf32> {linalg.inplaceable = true},
                           %t0 : tensor<4xf32>, %t1 : tensor<4xf32> {linalg.inplaceable = true})
  ->  (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
 {
@ -128,40 +128,40 @@ func @subtensor_insert_fun(%A0 : tensor<?xf32>, %A1 : tensor<?xf32> {linalg.inpl
  //      CHECK: %[[BUFFER_CAST_t0:.*]] = memref.buffer_cast {{.*}} : memref<4xf32
  //      CHECK: %[[BUFFER_CAST_t1:.*]] = memref.buffer_cast {{.*}} : memref<4xf32

-  // Alloc and copy the whole result tensor. Copy the subtensor.
+  // Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
  //      CHECK: %[[REALLOC_A0:.*]] = memref.alloc
  //      CHECK: linalg.copy(%[[BUFFER_CAST_A0]]
  //      CHECK: %[[SV_A0:.*]] = memref.subview %[[REALLOC_A0]]
  //      CHECK: linalg.copy(%[[BUFFER_CAST_t0]], %[[SV_A0]])
-  %r0 = subtensor_insert %t0 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r0 = tensor.insert_slice %t0 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>

-  // Alloc and copy the whole result tensor. Copy the subtensor.
+  // Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
  //      CHECK: %[[REALLOC_A0_2:.*]] = memref.alloc
  //      CHECK: linalg.copy(%[[BUFFER_CAST_A0]]
  //      CHECK: %[[SV_A0_2:.*]] = memref.subview %[[REALLOC_A0_2]]
  //      CHECK: linalg.copy(%[[BUFFER_CAST_t1]], %[[SV_A0_2]])
-  %r1 = subtensor_insert %t1 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %t1 into %A0[0][4][1] : tensor<4xf32> into tensor<?xf32>

-  //  Still alloc the large tensor because %A1 is read after. Copy the subtensor.
+  //  Still alloc the large tensor because %A1 is read after. Copy the tensor.extract_slice.
  //      CHECK: %[[REALLOC_A1:.*]] = memref.alloc
  //      CHECK: linalg.copy(%[[BUFFER_CAST_A1]]
  //      CHECK: %[[SV_A1:.*]] = memref.subview %[[REALLOC_A1]]
  //      CHECK: linalg.copy(%[[BUFFER_CAST_t0]], %[[SV_A1]])
-  %r2 = subtensor_insert %t0 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r2 = tensor.insert_slice %t0 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>

-  //  Do not realloc the large tensor. Copy the subtensor.
+  //  Do not realloc the large tensor. Copy the tensor.extract_slice.
  //  CHECK-NOT: alloc
  //      CHECK: %[[SV_A1_2:.*]] = memref.subview %[[BUFFER_CAST_A1]]
  //      CHECK: linalg.copy(%[[BUFFER_CAST_t1]], %[[SV_A1_2]])
-  %r3 = subtensor_insert %t1 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r3 = tensor.insert_slice %t1 into %A1[0][4][1] : tensor<4xf32> into tensor<?xf32>

  return %r0, %r1, %r2, %r3: tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
 }

 // -----

-// CHECK-LABEL: func @subtensor_insert_fun
-func @subtensor_insert_fun(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
+// CHECK-LABEL: func @insert_slice_fun
+func @insert_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
  -> tensor<?xf32>
 {
  %f0 = constant 0.0 : f32
@ -172,7 +172,7 @@ func @subtensor_insert_fun(%A : tensor<?xf32> {linalg.inplaceable = true}, %t :
  //  CHECK-NOT: alloc
  //      CHECK: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]]
  //      CHECK: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]])
-  %r0 = subtensor_insert %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>

  /// Overwrite BUFFER_CAST_A inplace.
  //      CHECK: linalg.fill(%[[BUFFER_CAST_A]]
@ -182,8 +182,8 @@ func @subtensor_insert_fun(%A : tensor<?xf32> {linalg.inplaceable = true}, %t :

 // -----

-// CHECK-LABEL: func @subtensor_insert_fun
-func @subtensor_insert_fun(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
+// CHECK-LABEL: func @insert_slice_fun
+func @insert_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
  -> tensor<?xf32>
 {
  %f0 = constant 0.0 : f32
@ -198,15 +198,15 @@ func @subtensor_insert_fun(%A : tensor<?xf32> {linalg.inplaceable = true}, %t :
  //      CHECK: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]]
  /// Overwrite BUFFER_CAST_A inplace by copying into the subview.
  //      CHECK: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]])
-  %r1 = subtensor_insert %t into %r0[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r1 = tensor.insert_slice %t into %r0[0][4][1] : tensor<4xf32> into tensor<?xf32>

  return %r1: tensor<?xf32>
 }

 // -----

-// CHECK-LABEL: func @subtensor_insert_fun_not_inplace
-func @subtensor_insert_fun_not_inplace(%A : tensor<?xf32>, %t : tensor<4xf32>)
+// CHECK-LABEL: func @insert_slice_fun_not_inplace
+func @insert_slice_fun_not_inplace(%A : tensor<?xf32>, %t : tensor<4xf32>)
  -> tensor<?xf32>
 {
  //      CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref<?xf32
@ -217,14 +217,14 @@ func @subtensor_insert_fun_not_inplace(%A : tensor<?xf32>, %t : tensor<4xf32>)
  //      CHECK: %[[SV:.*]] = memref.subview %[[ALLOC]][0] [4] [1] : memref<?xf32> to memref<4xf32>
  //      CHECK: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]]) : memref<4xf32, #map>, memref<4xf32>
  //      CHECK: memref.dealloc %[[ALLOC]] : memref<?xf32>
-  %r0 = subtensor_insert %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
  return %r0: tensor<?xf32>
 }

 // -----

-// CHECK-LABEL: func @subtensor_insert_fun_not_inplace
-func @subtensor_insert_fun_not_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
+// CHECK-LABEL: func @insert_slice_fun_not_inplace
+func @insert_slice_fun_not_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
  -> (tensor<?xf32>, tensor<?xf32>)
 {
  %f0 = constant 0.0 : f32
@ -232,10 +232,10 @@ func @subtensor_insert_fun_not_inplace(%A : tensor<?xf32> {linalg.inplaceable =
  //  CHECK-DAG: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref<?xf32{{.*}}
  //  CHECK-DAG: %[[BUFFER_CAST_B:.*]] = memref.buffer_cast {{.*}} : memref<4xf32{{.*}}

-  // subtensor_insert is bufferized first, %A is inplaceable so we can make this inplace
+  // tensor.insert_slice is bufferized first, %A is inplaceable so we can make this inplace
  //  CHECK-DAG: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]][0] [4] [1] : memref<?xf32, {{.*}}> to memref<4xf32, {{.*}}>
  //  CHECK-DAG: linalg.copy(%[[BUFFER_CAST_B]], %[[SV]]) : memref<4xf32, {{.*}}>, memref<4xf32, {{.*}}>
-  %r0 = subtensor_insert %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
+  %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>

  // fill would interfere with %r0 that is also being returned.
  // So we need to bufferize it out of place and make a new alloc.
@ -253,8 +253,8 @@ func @subtensor_insert_fun_not_inplace(%A : tensor<?xf32> {linalg.inplaceable =

 // -----

-// CHECK-LABEL: func @subtensor_fun
-func @subtensor_fun(%A : tensor<?xf32> {linalg.inplaceable = true})
+// CHECK-LABEL: func @extract_slice_fun
+func @extract_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = true})
  ->  tensor<4xf32>
 {
  // This bufferizes to a pattern that the cross-function boundary pass needs to
@ -268,9 +268,8 @@ func @subtensor_fun(%A : tensor<?xf32> {linalg.inplaceable = true})
  //     CHECK: %[[BUFFER_CAST_A:.*]] = memref.buffer_cast {{.*}} : memref<?xf32
  //     CHECK: %[[SV:.*]] = memref.subview %[[BUFFER_CAST_A]][0] [4] [1]
  //     CHECK: %[[RES:.*]] = memref.tensor_load %[[SV]]
-  %r0 = subtensor %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
+  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>

  //     CHECK: return %[[RES]]
  return %r0: tensor<4xf32>
 }
-
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@ -299,28 +299,28 @@ func @fold_unit_dim_for_init_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32>

 // -----

-func @fold_subtensor(
+func @fold_slice(
    %arg0 : tensor<1x?x?x1x?x1x1xf32>, %arg1 : tensor<1x?x?x?x?x1x1xf32>,
    %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index,
    %arg6 : index, %arg7 : index) -> (tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32>) {
-  %0 = subtensor %arg0[0, %arg2, %arg3, 0, %arg4, 0, 0]
-                      [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
+  %0 = tensor.extract_slice %arg0[0, %arg2, %arg3, 0, %arg4, 0, 0]
+                             [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
      tensor<1x?x?x1x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32>
-  %1 = subtensor %arg1[%arg2, 0, %arg3, 0, 0, %arg4, 0]
-                      [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
+  %1 = tensor.extract_slice %arg1[%arg2, 0, %arg3, 0, 0, %arg4, 0]
+                             [1, %arg5, %arg6, 1, %arg7, 1, 1] [1, 1, 1, 1, 1, 1, 1] :
      tensor<1x?x?x?x?x1x1xf32> to tensor<1x?x?x1x?x1x1xf32>
  return %0, %1 : tensor<1x?x?x1x?x1x1xf32>, tensor<1x?x?x1x?x1x1xf32>
 }
-//      CHECK: func @fold_subtensor
+//      CHECK: func @fold_slice
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x?x1x?x1x1xf32>
 // CHECK-SAME:   %[[ARG1:.+]]: tensor<1x?x?x?x?x1x1xf32>
-//      CHECK:   %[[SUBTENSOR1:.+]] = subtensor %[[ARG0]]
+//      CHECK:   %[[SLICE1:.+]] = tensor.extract_slice %[[ARG0]]
 // CHECK-SAME:       to tensor<?x?x?xf32>
-//      CHECK:   %[[RESULT1:.+]] = linalg.tensor_expand_shape %[[SUBTENSOR1]]
+//      CHECK:   %[[RESULT1:.+]] = linalg.tensor_expand_shape %[[SLICE1]]
 // CHECK-SAME:       [0, 1], [2], [3, 4, 5, 6]
-//      CHECK:   %[[SUBTENSOR2:.+]] = subtensor %[[ARG1]]
+//      CHECK:   %[[SLICE2:.+]] = tensor.extract_slice %[[ARG1]]
 // CHECK-SAME:       to tensor<?x?x?xf32>
-//      CHECK:   %[[RESULT2:.+]] = linalg.tensor_expand_shape %[[SUBTENSOR2]]
+//      CHECK:   %[[RESULT2:.+]] = linalg.tensor_expand_shape %[[SLICE2]]
 // CHECK-SAME:       [0, 1], [2], [3, 4, 5, 6]
 //      CHECK:   return %[[RESULT1]], %[[RESULT2]]

@ -430,25 +430,25 @@ func @unit_dim_for_reduction_inner(%arg0: tensor<?x1x?x1xf32>) -> tensor<?x1xf32

 // -----

-func @subtensor_unit_dims(%arg0: tensor<1x3xf32>) -> tensor<1x1xf32> {
-  %0 = subtensor %arg0[0, 2] [1, 1] [1, 1] : tensor<1x3xf32> to tensor<1x1xf32>
+func @slice_unit_dims(%arg0: tensor<1x3xf32>) -> tensor<1x1xf32> {
+  %0 = tensor.extract_slice %arg0[0, 2] [1, 1] [1, 1] : tensor<1x3xf32> to tensor<1x1xf32>
  return %0 : tensor<1x1xf32>
 }
-// CHECK-LABEL: func @subtensor_unit_dims
-//       CHECK:   %[[SUBTENSOR:.+]] = subtensor
+// CHECK-LABEL: func @slice_unit_dims
+//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice
 //  CHECK-SAME:     tensor<1x3xf32> to tensor<f32>
-//       CHECK:   %[[RESULT:.+]] = linalg.tensor_expand_shape %[[SUBTENSOR]] []
+//       CHECK:   %[[RESULT:.+]] = linalg.tensor_expand_shape %[[SLICE]] []
 //       CHECK:   return %[[RESULT]]

 // -----

-func @subtensor_insert_unit_dims(%arg0: tensor<1x3xf32>, %arg1: tensor<1x1xf32>) -> tensor<1x3xf32> {
-  %0 = subtensor_insert %arg1 into %arg0[0, 2] [1, 1] [1, 1] : tensor<1x1xf32> into tensor<1x3xf32>
+func @insert_slice_unit_dims(%arg0: tensor<1x3xf32>, %arg1: tensor<1x1xf32>) -> tensor<1x3xf32> {
+  %0 = tensor.insert_slice %arg1 into %arg0[0, 2] [1, 1] [1, 1] : tensor<1x1xf32> into tensor<1x3xf32>
  return %0 : tensor<1x3xf32>
 }
-// CHECK-LABEL: func @subtensor_insert_unit_dims
+// CHECK-LABEL: func @insert_slice_unit_dims
 //       CHECK:   %[[RESHAPE:.+]] = linalg.tensor_collapse_shape %{{.+}} []
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[RESHAPE]]
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[RESHAPE]]
 //  CHECK-SAME:     tensor<f32> into tensor<1x3xf32>
 //       CHECK:   return %[[RESULT]]

--- a/mlir/test/Dialect/Linalg/fusion-sequence.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
@ -175,18 +175,18 @@ module {
 //       CHECK:   %[[INIT:.+]] = linalg.init_tensor
 //       CHECK:   %[[R0:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG5:.+]] = %[[INIT]]) -> (tensor<?x?xf32>) {
 //       CHECK:     %[[R1:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG7:.+]] = %[[ARG5]]) -> (tensor<?x?xf32>) {
-//   CHECK-DAG:       %[[STARG3:.+]] = subtensor %[[ARG3]]
-//   CHECK-DAG:       %[[STARG7:.+]] = subtensor %[[ARG7]]
-//   CHECK-DAG:       %[[STARG0:.+]] = subtensor %[[ARG0]]
-//   CHECK-DAG:       %[[STARG1:.+]] = subtensor %[[ARG1]]
-//   CHECK-DAG:       %[[STARG2:.+]] = subtensor %[[ARG2]]
+//   CHECK-DAG:       %[[STARG3:.+]] = tensor.extract_slice %[[ARG3]]
+//   CHECK-DAG:       %[[STARG7:.+]] = tensor.extract_slice %[[ARG7]]
+//   CHECK-DAG:       %[[STARG0:.+]] = tensor.extract_slice %[[ARG0]]
+//   CHECK-DAG:       %[[STARG1:.+]] = tensor.extract_slice %[[ARG1]]
+//   CHECK-DAG:       %[[STARG2:.+]] = tensor.extract_slice %[[ARG2]]
 //       CHECK:       %[[T0:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[STARG0]], %[[STARG1]] : tensor<?x?xf32>, tensor<?x?xf32>)
 //  CHECK-SAME:         outs(%[[STARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
 //       CHECK:       %[[T1:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[T0:.+]], %[[STARG3]] : tensor<?x?xf32>, tensor<?xf32>)
 //  CHECK-SAME:         outs(%[[STARG7]] : tensor<?x?xf32>)
-//       CHECK:       %[[RESULT:.+]] = subtensor_insert %[[T1]] into %[[ARG7]]
+//       CHECK:       %[[RESULT:.+]] = tensor.insert_slice %[[T1]] into %[[ARG7]]
 //       CHECK:       scf.yield %[[RESULT]]
 //       CHECK:     }
 //       CHECK:     scf.yield %[[R1]]
@ -229,21 +229,21 @@ module {
 //       CHECK:     %[[M_1:.+]] = memref.dim %[[ARG8]], %[[C0]]
 //       CHECK:     %[[TILE_M_1:.+]] = affine.min #[[MAP0]](%[[M_1]], %[[IV0]])
 //       CHECK:     %[[N3:.+]] = memref.dim %[[ARG8]], %[[C1]]
-//       CHECK:     %[[STARG6:.+]] = subtensor %[[ARG8]][%[[IV0]], 0]
+//       CHECK:     %[[STARG6:.+]] = tensor.extract_slice %[[ARG8]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_1]], %[[N3]]]
 //       CHECK:     %[[M_2:.+]] = memref.dim %[[ARG4]], %[[C0]]
 //       CHECK:     %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M_2]], %[[M]]]
 //       CHECK:     %[[N2:.+]] = memref.dim %[[ARG4]], %[[C1]]
-//       CHECK:     %[[STARG4:.+]] = subtensor %[[ARG4]][%[[IV0]], 0]
+//       CHECK:     %[[STARG4:.+]] = tensor.extract_slice %[[ARG4]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_2]], %[[N2]]]
 //       CHECK:     %[[TILE_M_3:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M]], %[[M]]]
 //       CHECK:     %[[N0:.+]] = memref.dim %[[ARG0]], %[[C1]]
-//       CHECK:     %[[STARG0:.+]] = subtensor %[[ARG0]][%[[IV0]], 0]
+//       CHECK:     %[[STARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_3]], %[[N0]]]
 //       CHECK:     %[[M_3:.+]] = memref.dim %[[ARG2]], %[[C0]]
 //       CHECK:     %[[TILE_M_4:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M_3]], %[[M]]]
 //       CHECK:     %[[N1:.+]] = memref.dim %[[ARG2]], %[[C1]]
-//       CHECK:     %[[STARG2:.+]] = subtensor %[[ARG2]][%[[IV0]], 0]
+//       CHECK:     %[[STARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], 0]
 //  CHECK-SAME:       [%[[TILE_M_4]], %[[N1]]]
 //       CHECK:     %[[T0:.+]] = linalg.matmul
 //  CHECK-SAME:       ins(%[[STARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>
@ -254,7 +254,7 @@ module {
 //       CHECK:     %[[T2:.+]] = linalg.matmul
 //  CHECK-SAME:       ins(%[[T1]], %arg5 : tensor<?x?xf32>, tensor<?x?xf32>
 //  CHECK-SAME:       ) outs(%[[STARG6]] : tensor<?x?xf32>)
-//       CHECK:     %[[R1:.+]] = subtensor_insert %[[T2]]
+//       CHECK:     %[[R1:.+]] = tensor.insert_slice %[[T2]]
 //  CHECK-SAME:       into %[[ARG8]][%[[IV0]], 0] [%[[TILE_M_1]], %[[N3]]]
 //       CHECK:     scf.yield %[[R1]] : tensor<?x?xf32>
 //       CHECK:   }
--- a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
@ -38,16 +38,16 @@ module {
 //      CHECK:     %[[M_2:.+]] = memref.dim %[[ARG6]], %[[C0]]
 //      CHECK:     %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[M_2]], %[[IV0]])
 //      CHECK:     %[[N3:.+]] = memref.dim %[[ARG6]], %[[C1]]
-//      CHECK:     %[[ST_ARG6:.+]] = subtensor %[[ARG6]][%[[IV0]], 0]
+//      CHECK:     %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_2]], %[[N3]]]
 //      CHECK:     %[[TILE_M_3:.+]] = affine.min #[[MAP5]](%[[IV0]])[%[[M]], %[[M]]]
 //      CHECK:     %[[N1:.+]] = memref.dim %[[ARG0]], %[[C1]]
-//      CHECK:     %[[ST_ARG0:.+]] = subtensor %[[ARG0]][%[[IV0]], 0]
+//      CHECK:     %[[ST_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_3]], %[[N1]]]
 //      CHECK:     %[[M_3:.+]] = memref.dim %[[ARG2]], %[[C0]]
 //      CHECK:     %[[TILE_M_4:.+]] = affine.min #[[MAP5]](%[[IV0]])[%[[M_3]], %[[M]]]
 //      CHECK:     %[[N2_2:.+]] = memref.dim %[[ARG2]], %[[C1]]
-//      CHECK:     %[[ST_ARG2:.+]] = subtensor %[[ARG2]][%[[IV0]], 0]
+//      CHECK:     %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], 0]
 // CHECK-SAME:       [%[[TILE_M_4]], %[[N2_2]]]
 //      CHECK:     %[[LHS:.+]] = linalg.matmul
 // CHECK-SAME:       __internal_linalg_transform__ = "after_lhs_fusion_producer"
@ -62,30 +62,30 @@ module {
 // CHECK-SAME:         %[[C0]] to %[[N2]] step %[[C16]]
 // CHECK-SAME:         iter_args(%[[ARG10:.+]] = %[[ARG8]]) -> (tensor<?x?xf32>) {
 //      CHECK:         %[[TILE_N2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2]]]
-//      CHECK:         %[[ST_LHS:.+]] = subtensor %[[LHS]][0, %[[IV2]]]
+//      CHECK:         %[[ST_LHS:.+]] = tensor.extract_slice %[[LHS]][0, %[[IV2]]]
 // CHECK-SAME:           [%[[TILE_M_3]], %[[TILE_N2]]]
 //      CHECK:         %[[N2_3:.+]] = memref.dim %[[ARG3]], %[[C0]]
 //      CHECK:         %[[TILE_N2_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2_3]]]
 //      CHECK:         %[[TILE_N3:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N3_2]]]
-//      CHECK:         %[[ST_ARG3:.+]] = subtensor %[[ARG3]][%[[IV2]], %[[IV1]]]
+//      CHECK:         %[[ST_ARG3:.+]] = tensor.extract_slice %[[ARG3]][%[[IV2]], %[[IV1]]]
 // CHECK-SAME:           [%[[TILE_N2_2]], %[[TILE_N3]]]
 //      CHECK:         %[[M_4:.+]] = memref.dim %[[ARG10]], %[[C0]]
 //      CHECK:         %[[N3_3:.+]] = memref.dim %[[ARG10]], %[[C1]]
 //      CHECK:         %[[TILE_N3_2:.+]] = affine.min #[[MAP4]](%[[N3_3]], %[[IV1]])
-//      CHECK:         %[[ST_ARG4:.+]] = subtensor %[[ARG10]][0, %[[IV1]]]
+//      CHECK:         %[[ST_ARG4:.+]] = tensor.extract_slice %[[ARG10]][0, %[[IV1]]]
 // CHECK-SAME:           [%[[M_4]], %[[TILE_N3_2]]]
 //      CHECK:         %[[ST_RESULT:.+]] = linalg.matmul
 // CHECK-SAME:           __internal_linalg_transform__ = "after_lhs_fusion"
 // CHECK-SAME:           ins(%[[ST_LHS]], %[[ST_ARG3]]
 // CHECK-SAME:             : tensor<?x?xf32>, tensor<?x?xf32>)
 // CHECK-SAME:           outs(%[[ST_ARG4]] : tensor<?x?xf32>)
-//      CHECK:         %[[UPDATE1:.+]] = subtensor_insert %[[ST_RESULT]]
+//      CHECK:         %[[UPDATE1:.+]] = tensor.insert_slice %[[ST_RESULT]]
 // CHECK-SAME:           into %[[ARG10]][0, %[[IV1]]] [%[[M_4]], %[[TILE_N3_2]]]
 //      CHECK:         scf.yield %[[UPDATE1]]
 //      CHECK:       }
 //      CHECK:       scf.yield %[[YIELD1]]
 //      CHECK:     }
-//      CHECK:     %[[UPDATE0:.+]] = subtensor_insert %[[YIELD0]] into
+//      CHECK:     %[[UPDATE0:.+]] = tensor.insert_slice %[[YIELD0]] into
 // CHECK-SAME:       %[[ARG6]][%[[IV0]], 0] [%[[TILE_M_2]], %[[N3]]]
 //      CHECK:     scf.yield %[[UPDATE0]]
 //      CHECK:   }
@ -114,9 +114,9 @@ module {
 // TLOOP-SAME:      %[[AB_INIT_:.*]] = %[[AB_INIT]]: tensor<?x?xf32>)
 // TLOOP-SAME: outs (%[[ABC_INIT_:.*]] = %[[ABC_INIT]]: tensor<?x?xf32>) {

-// TLOOP:    %[[ABC_INIT_SUB:.*]] = subtensor %[[ABC_INIT_]][%[[IV0]], 0]
-// TLOOP:    %[[A_SUB:.*]] = subtensor %[[A_]][%[[IV0]], 0]
-// TLOOP:    %[[AB_INIT_SUB:.*]] = subtensor %[[AB_INIT_]][%[[IV0]], 0]
+// TLOOP:    %[[ABC_INIT_SUB:.*]] = tensor.extract_slice %[[ABC_INIT_]][%[[IV0]], 0]
+// TLOOP:    %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[IV0]], 0]
+// TLOOP:    %[[AB_INIT_SUB:.*]] = tensor.extract_slice %[[AB_INIT_]][%[[IV0]], 0]

 // TLOOP:    %[[AB_SUB:.*]] = linalg.matmul
 // TLOOP-SAME:  ins(%[[A_SUB]], %[[B_]] : {{.*}}) outs(%[[AB_INIT_SUB]]
@ -132,19 +132,19 @@ module {
 // TLOOP-SAME: outs (%[[ABC_INIT_SUB_:.*]] = %[[ABC_INIT_SUB]]: [[TY]])
 // TLOOP-SAME: iterators["parallel", "reduction"] {

-// TLOOP:      %[[AB_SUB_SUB:.*]] = subtensor %[[AB_SUB_]][0, %[[IV2]]]
-// TLOOP:      %[[C__SUB:.*]] = subtensor %[[C__]][%[[IV2]], %[[IV1]]]
-// TLOOP:      %[[ABS_INIT_SUB_SUB:.*]] = subtensor %[[ABC_INIT_SUB_]][0, %[[IV1]]]
+// TLOOP:      %[[AB_SUB_SUB:.*]] = tensor.extract_slice %[[AB_SUB_]][0, %[[IV2]]]
+// TLOOP:      %[[C__SUB:.*]] = tensor.extract_slice %[[C__]][%[[IV2]], %[[IV1]]]
+// TLOOP:      %[[ABS_INIT_SUB_SUB:.*]] = tensor.extract_slice %[[ABC_INIT_SUB_]][0, %[[IV1]]]

 // TLOOP:      %[[ABC_SUB_SUB:.*]] = linalg.matmul
 // TLOOP-SAME:  ins(%[[AB_SUB_SUB]], %[[C__SUB]] : [[TY]], [[TY]])
 // TLOOP-SAME:  outs(%[[ABS_INIT_SUB_SUB]] : [[TY]]) -> [[TY]]

-// TLOOP:      %[[RES0:.*]] = subtensor_insert %[[ABC_SUB_SUB]]
+// TLOOP:      %[[RES0:.*]] = tensor.insert_slice %[[ABC_SUB_SUB]]
 // TLOOP-SAME:   into %[[ABC_INIT_SUB_]][0, %[[IV1]]]
 // TLOOP:      linalg.yield %[[RES0]] : [[TY]]
 // TLOOP:    }
-// TLOOP:    %[[RES1:.*]] = subtensor_insert %[[ABC_SUB_]] into %[[ABC_INIT_]][%[[IV0]], 0]
+// TLOOP:    %[[RES1:.*]] = tensor.insert_slice %[[ABC_SUB_]] into %[[ABC_INIT_]][%[[IV0]], 0]
 // TLOOP:    linalg.yield %[[RES1]] : [[TY]]
 // TLOOP:  }
 // TLOOP:  return %[[ABC]] : [[TY]]
@ -186,10 +186,10 @@ module {
 //  CHECK-SAME:     iter_args(%[[ARG4:.+]] = %{{[a-zA-Z0-9_]+}})
 //       CHECK:     %[[YIELD:.+]] = scf.for %[[IV1:[a-zA-Z0-9_]+]]
 //  CHECK-SAME:       iter_args(%[[ARG6:.+]] = %[[ARG4]])
-//       CHECK:       %[[ST_ARG6:.+]] = subtensor %[[ARG6]][%[[IV0]], %[[IV1]]]
-//       CHECK:       %[[ST_ARG0:.+]] = subtensor %[[ARG0]][%[[IV0]], 0]
-//       CHECK:       %[[ST_ARG1:.+]] = subtensor %[[ARG1]][0, %[[IV1]]]
-//       CHECK:       %[[ST_ARG2:.+]] = subtensor %[[ARG2]][%[[IV0]], %[[IV1]]]
+//       CHECK:       %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]]
+//       CHECK:       %[[ST_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
+//       CHECK:       %[[ST_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
+//       CHECK:       %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]]
 //       CHECK:       %[[LHS:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[ST_ARG0]], %[[ST_ARG1]]
 //  CHECK-SAME:           : tensor<?x?xf32>, tensor<?x?xf32>)
@ -197,7 +197,7 @@ module {
 //       CHECK:       %[[ST_RESULT:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[LHS]] : tensor<?x?xf32>)
 //  CHECK-SAME:         outs(%[[ST_ARG6]] : tensor<?x?xf32>)
-//       CHECK:       %[[UPDATE:.+]] = subtensor_insert %[[ST_RESULT]]
+//       CHECK:       %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]]
 //  CHECK-SAME:         into %[[ARG6]][%[[IV0]], %[[IV1]]]
 //       CHECK:       scf.yield %[[UPDATE]]
 //       CHECK:     scf.yield %[[YIELD]]
@ -226,10 +226,10 @@ module {
 // TLOOP-SAME:      %[[AB_:.*]] = %[[AB]]: [[TY]])
 // TLOOP-SAME: outs (%[[INIT_:.*]] = %[[INIT]]: [[TY]]) {

-// TLOOP:    %[[INIT_SUB:.*]] = subtensor %[[INIT_]][%[[IV0]], %[[IV1]]]
-// TLOOP:    %[[A_SUB:.*]] = subtensor %[[A_]][%[[IV0]], 0]
-// TLOOP:    %[[B_SUB:.*]] = subtensor %[[B_]][0, %[[IV1]]]
-// TLOOP:    %[[AB_SUB_INIT:.*]] = subtensor %[[AB_]][%[[IV0]], %[[IV1]]]
+// TLOOP:    %[[INIT_SUB:.*]] = tensor.extract_slice %[[INIT_]][%[[IV0]], %[[IV1]]]
+// TLOOP:    %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[IV0]], 0]
+// TLOOP:    %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[IV1]]]
+// TLOOP:    %[[AB_SUB_INIT:.*]] = tensor.extract_slice %[[AB_]][%[[IV0]], %[[IV1]]]

 // TLOOP:    %[[AB_SUB:.*]] = linalg.matmul
 // TLOOP-SAME:  ins(%[[A_SUB]], %[[B_SUB]] : [[TY]], [[TY]])
@ -238,7 +238,7 @@ module {
 // TLOOP:    %[[DOUBLE_AB:.*]] = linalg.generic
 // TLOOP-SAME:  ins(%[[AB_SUB]] : [[TY]]) outs(%[[INIT_SUB]] : [[TY]])

-// TLOOP:    %[[RESULT_SUB:.*]] = subtensor_insert
+// TLOOP:    %[[RESULT_SUB:.*]] = tensor.insert_slice
 // TLOOP-SAME:  %[[DOUBLE_AB:.*]] into %[[INIT_]][%[[IV0]], %[[IV1]]]

 // TLOOP:    linalg.yield %[[RESULT_SUB]] : [[TY]]
@ -267,13 +267,13 @@ module {
 //   CHECK-NOT: fill
 //       CHECK: scf.for %[[I:.*]]{{.*}}iter_args(%{{.*}} = %[[ARG0]]) -> (tensor<?x?xf32>) {
 //       CHECK:   scf.for %[[J:.*]]
-//       CHECK:     %[[ST:.*]] = subtensor %[[ARG0]]
+//       CHECK:     %[[ST:.*]] = tensor.extract_slice %[[ARG0]]
 //       CHECK:     %[[ST_FILL:.*]] = linalg.fill(%[[ST]], %[[C0]]) {__internal_linalg_transform__ = "after_out_fusion_producer"} : tensor<?x?xf32>, f32 -> tensor<?x?xf32>
 //       CHECK:     %[[ST_MM_RES:.*]] = scf.for %[[K:.*]]{{.*}}iter_args(%[[BB:.*]] = %[[ST_FILL]]) -> (tensor<?x?xf32>) {
 //   CHECK-NOT:       fill
 //       CHECK:       %[[ST_MM:.*]] = linalg.matmul {__internal_linalg_transform__ = "after_out_fusion"} ins(%{{.*}}, %{{.*}} : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[BB]] : tensor<?x?xf32>) -> tensor<?x?xf32>
 //       CHECK:       scf.yield %[[ST_MM]] : tensor<?x?xf32>
-//       CHECK:     %[[MM:.*]] = subtensor_insert %[[ST_MM_RES]] into {{.*}}
+//       CHECK:     %[[MM:.*]] = tensor.insert_slice %[[ST_MM_RES]] into {{.*}}
 //       CHECK:     scf.yield %[[MM]] : tensor<?x?xf32>


@ -300,9 +300,9 @@ module {
 // TLOOP-SAME: outs (%[[OUT_:.*]] = %[[OUT]]: [[TY]]) {

 // TLOOP:    %[[DIM_A__1:.*]] = memref.dim %[[A_]], %[[C1]] : [[TY]]
-// TLOOP:    %[[A_SUB:.*]] = subtensor %[[A_]][%[[I]], 0]
-// TLOOP:    %[[B_SUB:.*]] = subtensor %[[B_]][0, %[[J]]]
-// TLOOP:    %[[OUT_SUB:.*]] = subtensor %[[OUT_]][%[[I]], %[[J]]]
+// TLOOP:    %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[I]], 0]
+// TLOOP:    %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[J]]]
+// TLOOP:    %[[OUT_SUB:.*]] = tensor.extract_slice %[[OUT_]][%[[I]], %[[J]]]
 // TLOOP:    %[[INIT_SUB:.*]] = linalg.fill(%[[OUT_SUB]], %[[C0_F32]])

 // TLOOP:    %[[AB_SUB:.*]] = linalg.tiled_loop (%[[K:.*]]) = (%[[C0]])
@ -312,15 +312,15 @@ module {
 // TLOOP-SAME: outs (%[[INIT_SUB_:.*]] = %[[INIT_SUB]]: [[TY]])
 // TLOOP-SAME: iterators["reduction"] {

-// TLOOP:      %[[A_SUB_SUB:.*]] = subtensor %[[A_SUB_]][0, %[[K]]]
-// TLOOP:      %[[B_SUB_SUB:.*]] = subtensor %[[B_SUB_]][%[[K]], 0]
+// TLOOP:      %[[A_SUB_SUB:.*]] = tensor.extract_slice %[[A_SUB_]][0, %[[K]]]
+// TLOOP:      %[[B_SUB_SUB:.*]] = tensor.extract_slice %[[B_SUB_]][%[[K]], 0]

 // TLOOP:      %[[AB_SUB_SUB:.*]] = linalg.matmul
 // TLOOP-SAME:   ins(%[[A_SUB_SUB]], %[[B_SUB_SUB]] : [[TY]], [[TY]])
 // TLOOP-SAME:   outs(%[[INIT_SUB_]] : [[TY]]) -> [[TY]]
 // TLOOP:      linalg.yield %[[AB_SUB_SUB]] : [[TY]]
 // TLOOP:    }
-// TLOOP:    %[[SUB_RESULT:.*]] = subtensor_insert %[[AB_SUB]]
+// TLOOP:    %[[SUB_RESULT:.*]] = tensor.insert_slice %[[AB_SUB]]
 // TLOOP-SAME:  into %[[OUT_]][%[[I]], %[[J]]]
 // TLOOP:    linalg.yield %[[SUB_RESULT]] : [[TY]]
 // TLOOP:  }
@ -371,9 +371,9 @@ module {
 // TLOOP-SAME: outs (%[[OUT_:.*]] = %[[OUT]]: [[TY]]) {

 // TLOOP:    %[[DIM_A__1:.*]] = memref.dim %[[A_]], %[[C1]] : [[TY]]
-// TLOOP:    %[[A_SUB:.*]] = subtensor %[[A_]][%[[I]], 0]
-// TLOOP:    %[[B_SUB:.*]] = subtensor %[[B_]][0, %[[J]]]
-// TLOOP:    %[[OUT_SUB:.*]] = subtensor %[[OUT_]][%[[I]], %[[J]]]
+// TLOOP:    %[[A_SUB:.*]] = tensor.extract_slice %[[A_]][%[[I]], 0]
+// TLOOP:    %[[B_SUB:.*]] = tensor.extract_slice %[[B_]][0, %[[J]]]
+// TLOOP:    %[[OUT_SUB:.*]] = tensor.extract_slice %[[OUT_]][%[[I]], %[[J]]]
 // TLOOP:    %[[INIT_SUB:.*]] = linalg.generic
 // TLOOP-SAME: ins(%[[C0_F32_]]
 // TLOOP-SAME: outs(%[[OUT_SUB]]
@ -385,15 +385,15 @@ module {
 // TLOOP-SAME: outs (%[[INIT_SUB_:.*]] = %[[INIT_SUB]]: [[TY]])
 // TLOOP-SAME: iterators["reduction"] {

-// TLOOP:      %[[A_SUB_SUB:.*]] = subtensor %[[A_SUB_]][0, %[[K]]]
-// TLOOP:      %[[B_SUB_SUB:.*]] = subtensor %[[B_SUB_]][%[[K]], 0]
+// TLOOP:      %[[A_SUB_SUB:.*]] = tensor.extract_slice %[[A_SUB_]][0, %[[K]]]
+// TLOOP:      %[[B_SUB_SUB:.*]] = tensor.extract_slice %[[B_SUB_]][%[[K]], 0]

 // TLOOP:      %[[AB_SUB_SUB:.*]] = linalg.matmul
 // TLOOP-SAME:   ins(%[[A_SUB_SUB]], %[[B_SUB_SUB]] : [[TY]], [[TY]])
 // TLOOP-SAME:   outs(%[[INIT_SUB_]] : [[TY]]) -> [[TY]]
 // TLOOP:      linalg.yield %[[AB_SUB_SUB]] : [[TY]]
 // TLOOP:    }
-// TLOOP:    %[[SUB_RESULT:.*]] = subtensor_insert %[[AB_SUB]]
+// TLOOP:    %[[SUB_RESULT:.*]] = tensor.insert_slice %[[AB_SUB]]
 // TLOOP-SAME:  into %[[OUT_]][%[[I]], %[[J]]]
 // TLOOP:    linalg.yield %[[SUB_RESULT]] : [[TY]]
 // TLOOP:  }
--- a/mlir/test/Dialect/Linalg/hoist-padding.mlir
+++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir
@ -53,10 +53,10 @@ func @matmul_tensors(
  //      CHECK:   %[[A:.*]] = scf.for %[[J1:[0-9a-z]+]] =
  // Iteration count along J1
  //      CHECK:     %[[IDXpad0_K:[0-9]+]] = affine.apply #[[$DIV4]](%[[J1]])
-  //      CHECK:     subtensor %{{.*}} [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+  //      CHECK:     tensor.extract_slice %{{.*}} [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
  //      CHECK:     linalg.pad_tensor %{{.*}}
  //      CHECK:       : tensor<?x?xf32> to tensor<2x4xf32>
-  //      CHECK:     subtensor_insert %{{.*}} into %{{.*}}[%[[IDXpad0_K]], 0, 0]
+  //      CHECK:     tensor.insert_slice %{{.*}} into %{{.*}}[%[[IDXpad0_K]], 0, 0]
  // CHECK-SAME:       [1, 2, 4] [1, 1, 1] : tensor<2x4xf32> into tensor<?x2x4xf32>
  // Second tensor is KxN but loop order is (M, N, K) so padded tensor is NxKx4x3
  //      CHECK:   %[[SZpad1_N:[0-9]+]] = affine.apply #[[$DIVS3]]()[%[[dN]]]
@ -69,23 +69,23 @@ func @matmul_tensors(
  //      CHECK:     scf.for %[[J2:[0-9a-z]+]] =
  // Iteration count along J2
  //      CHECK:       %[[IDXpad1_N:[0-9]+]] = affine.apply #[[$DIV4]](%[[J2]])
-  //      CHECK:       subtensor %{{.*}} [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+  //      CHECK:       tensor.extract_slice %{{.*}} [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
  //      CHECK:       linalg.pad_tensor %{{.*}}
  //      CHECK:         : tensor<?x?xf32> to tensor<4x3xf32>
-  //      CHECK:       subtensor_insert %{{.*}} into %{{.*}}[%[[IDXpad1_K]], %[[IDXpad1_N]], 0, 0]
+  //      CHECK:       tensor.insert_slice %{{.*}} into %{{.*}}[%[[IDXpad1_K]], %[[IDXpad1_N]], 0, 0]
  // CHECK-SAME:         [1, 1, 4, 3] [1, 1, 1, 1] : tensor<4x3xf32> into tensor<?x?x4x3xf32>
  // 2-D loop
  //      CHECK:   scf.for %[[J:[0-9a-zA-Z]+]]
  //      CHECK:     scf.for %[[K:[0-9a-zA-Z]+]]
  // Iteration count along K
  //      CHECK:       %[[IDXpad0_K:[0-9]+]] = affine.apply #[[$DIV4]](%[[K]])
-  //      CHECK:       %[[stA:.*]] = subtensor %[[A]][%[[IDXpad0_K]], 0, 0] [1, 2, 4] [1, 1, 1] :
+  //      CHECK:       %[[stA:.*]] = tensor.extract_slice %[[A]][%[[IDXpad0_K]], 0, 0] [1, 2, 4] [1, 1, 1] :
  // CHECK-SAME:         tensor<?x2x4xf32> to tensor<2x4xf32>
  // Iteration count along J
  //      CHECK:       %[[IDXpad1_N:[0-9]+]] = affine.apply #[[$DIV3]](%[[J]])
  // Iteration count along K
  //      CHECK:       %[[IDXpad1_K:[0-9]+]] = affine.apply #[[$DIV4]](%[[K]])
-  //      CHECK:       %[[stB:.*]] = subtensor %[[B]][%[[IDXpad1_N]], %[[IDXpad1_K]], 0, 0] [1, 1, 4, 3] [1, 1, 1, 1] :
+  //      CHECK:       %[[stB:.*]] = tensor.extract_slice %[[B]][%[[IDXpad1_N]], %[[IDXpad1_K]], 0, 0] [1, 1, 4, 3] [1, 1, 1, 1] :
  // CHECK-SAME:         tensor<?x?x4x3xf32> to tensor<4x3xf32>
  //      CHECK:       %[[stC:.*]] = linalg.pad_tensor %{{.*}}
  //      CHECK:         : tensor<?x?xf32> to tensor<2x3xf32>
@ -98,17 +98,17 @@ func @matmul_tensors(
        %7 = affine.min #map0(%arg3)[%6]
        %8 = memref.dim %arg0, %c1 : tensor<?x?xf32>
        %9 = affine.min #map1(%arg7)[%8]
-        %10 = subtensor %arg0[%arg3, %arg7] [%7, %9] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+        %10 = tensor.extract_slice %arg0[%arg3, %arg7] [%7, %9] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
        %11 = memref.dim %arg1, %c0 : tensor<?x?xf32>
        %12 = affine.min #map1(%arg7)[%11]
        %13 = memref.dim %arg1, %c1 : tensor<?x?xf32>
        %14 = affine.min #map2(%arg5)[%13]
-        %15 = subtensor %arg1[%arg7, %arg5] [%12, %14] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+        %15 = tensor.extract_slice %arg1[%arg7, %arg5] [%12, %14] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
        %16 = memref.dim %arg8, %c0 : tensor<?x?xf32>
        %17 = affine.min #map3(%16, %arg3)
        %18 = memref.dim %arg8, %c1 : tensor<?x?xf32>
        %19 = affine.min #map4(%18, %arg5)
-        %20 = subtensor %arg8[%arg3, %arg5] [%17, %19] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+        %20 = tensor.extract_slice %arg8[%arg3, %arg5] [%17, %19] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
        %21 = subi %c2, %7 : index
        %22 = subi %c4, %9 : index
        %23 = linalg.pad_tensor %10 low[%c0, %c0] high[%21, %22] {
@ -128,8 +128,8 @@ func @matmul_tensors(
          linalg.yield %cst : f32
        } : tensor<?x?xf32> to tensor<2x3xf32>
        %30 = linalg.matmul ins(%23, %26 : tensor<2x4xf32>, tensor<4x3xf32>) outs(%29 : tensor<2x3xf32>) -> tensor<2x3xf32>
-        %31 = subtensor %30[0, 0] [%7, %14] [1, 1] : tensor<2x3xf32> to tensor<?x?xf32>
-        %32 = subtensor_insert %31 into %arg8[%arg3, %arg5] [%17, %19] [%c1, %c1] : tensor<?x?xf32> into tensor<?x?xf32>
+        %31 = tensor.extract_slice %30[0, 0] [%7, %14] [1, 1] : tensor<2x3xf32> to tensor<?x?xf32>
+        %32 = tensor.insert_slice %31 into %arg8[%arg3, %arg5] [%17, %19] [%c1, %c1] : tensor<?x?xf32> into tensor<?x?xf32>
        scf.yield %32 : tensor<?x?xf32>
      }
      scf.yield %5 : tensor<?x?xf32>
@ -173,7 +173,7 @@ func @dot(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %arg2: tensor<f32>)
  //      CHECK:   %[[INIT_PACKED_A:.*]] = linalg.init_tensor [%[[D0]], %[[D1]], 2] : tensor<?x?x2xf32>
  //      CHECK:   %[[PACKED_A:.*]] = scf.for %[[II:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[INIT_PACKED_A]]) -> (tensor<?x?x2xf32>) {
  //      CHECK:     scf.for %[[III:[0-9a-z]+]] =
-  //      CHECK:       subtensor_insert %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor<?x?x2xf32>
+  //      CHECK:       tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor<?x?x2xf32>
  //
  //      CHECK:   %[[D0_2:.*]] = affine.apply #[[$DIV4]](%[[MR8]])
  //      CHECK:   %[[MM4_2:.*]] = affine.min #[[$MIN_MOD4]](%[[MR8]])
@ -182,33 +182,33 @@ func @dot(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %arg2: tensor<f32>)
  //      CHECK:   %[[INIT_PACKED_B:.*]] = linalg.init_tensor [%[[D0_2]], %[[D1_2]], 2] : tensor<?x?x2xf32>
  //      CHECK:   %[[PACKED_B:.*]] = scf.for %[[II_2:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[INIT_PACKED_B]]) -> (tensor<?x?x2xf32>) {
  //      CHECK:     scf.for %[[III_2:[0-9a-z]+]] =
-  //      CHECK:       subtensor_insert %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor<?x?x2xf32>
+  //      CHECK:       tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor<?x?x2xf32>
  // Compute.
  //      CHECK:   scf.for %[[II_3:[0-9a-z]+]] =
  //      CHECK:     scf.for %[[III_3:[0-9a-z]+]] = {{.*}} iter_args(%[[C:.*]] = %{{.*}}) -> (tensor<f32>) {
  //      CHECK:       %[[IDX0:.*]] = affine.apply #[[$DIV4]](%[[II_3]])
  //      CHECK:       %[[IDX1:.*]] = affine.apply #[[$DIV2]](%[[III_3]])
-  //      CHECK:       %[[A:.*]] = subtensor %[[PACKED_A]][%[[IDX0]], %[[IDX1]], 0] [1, 1, 2] [1, 1, 1] : tensor<?x?x2xf32> to tensor<2xf32>
+  //      CHECK:       %[[A:.*]] = tensor.extract_slice %[[PACKED_A]][%[[IDX0]], %[[IDX1]], 0] [1, 1, 2] [1, 1, 1] : tensor<?x?x2xf32> to tensor<2xf32>
  //      CHECK:       %[[IDX0_2:.*]] = affine.apply #[[$DIV4]](%[[II_3]])
  //      CHECK:       %[[IDX1_2:.*]] = affine.apply #[[$DIV2]](%[[III_3]])
-  //      CHECK:       %[[B:.*]] = subtensor %[[PACKED_B]][%[[IDX0_2]], %[[IDX1_2]], 0] [1, 1, 2] [1, 1, 1] : tensor<?x?x2xf32> to tensor<2xf32>
+  //      CHECK:       %[[B:.*]] = tensor.extract_slice %[[PACKED_B]][%[[IDX0_2]], %[[IDX1_2]], 0] [1, 1, 2] [1, 1, 1] : tensor<?x?x2xf32> to tensor<2xf32>
  //      CHECK:       linalg.dot ins(%[[A]], %[[B]] : tensor<2xf32>, tensor<2xf32>) outs(%[[C]] : tensor<f32>) -> tensor<f32>

  %4 = scf.for %arg3 = %c0 to %1 step %c8 iter_args(%arg4 = %arg2) -> (tensor<f32>) {
    %5 = affine.min #map0(%arg3)[%2]
-    %6 = subtensor %arg0[%arg3] [%5] [1] : tensor<?xf32> to tensor<?xf32>
+    %6 = tensor.extract_slice %arg0[%arg3] [%5] [1] : tensor<?xf32> to tensor<?xf32>
    %7 = affine.min #map0(%arg3)[%3]
-    %8 = subtensor %arg1[%arg3] [%7] [1] : tensor<?xf32> to tensor<?xf32>
+    %8 = tensor.extract_slice %arg1[%arg3] [%7] [1] : tensor<?xf32> to tensor<?xf32>
    %9 = scf.for %arg5 = %c0 to %5 step %c4 iter_args(%arg6 = %arg4) -> (tensor<f32>) {
      %10 = affine.min #map1(%5, %arg5)
-      %11 = subtensor %6[%arg5] [%10] [1] : tensor<?xf32> to tensor<?xf32>
+      %11 = tensor.extract_slice %6[%arg5] [%10] [1] : tensor<?xf32> to tensor<?xf32>
      %12 = affine.min #map1(%7, %arg5)
-      %13 = subtensor %8[%arg5] [%12] [1] : tensor<?xf32> to tensor<?xf32>
+      %13 = tensor.extract_slice %8[%arg5] [%12] [1] : tensor<?xf32> to tensor<?xf32>
      %14 = scf.for %arg7 = %c0 to %10 step %c2 iter_args(%arg8 = %arg6) -> (tensor<f32>) {
        %15 = affine.min #map2(%10, %arg7)
-        %16 = subtensor %11[%arg7] [%15] [1] : tensor<?xf32> to tensor<?xf32>
+        %16 = tensor.extract_slice %11[%arg7] [%15] [1] : tensor<?xf32> to tensor<?xf32>
        %17 = affine.min #map2(%12, %arg7)
-        %18 = subtensor %13[%arg7] [%17] [1] : tensor<?xf32> to tensor<?xf32>
+        %18 = tensor.extract_slice %13[%arg7] [%17] [1] : tensor<?xf32> to tensor<?xf32>
        %19 = subi %c2, %15 : index
        %20 = linalg.pad_tensor %16 low[%c0] high[%19]  {
        ^bb0(%arg9: index):  // no predecessors
@ -245,17 +245,17 @@ func @matmul_2d_tiling(%arg0: tensor<32x128xf32>, %arg1: tensor<128x64xf32>, %ar
  %1 = scf.for %arg3 = %c0 to %c32 step %c16 iter_args(%arg4 = %arg2) -> (tensor<32x64xf32>) {
    %2 = scf.for %arg5 = %c0 to %c64 step %c32 iter_args(%arg6 = %arg4) -> (tensor<32x64xf32>) {
      %3 = scf.for %arg7 = %c0 to %c128 step %c32 iter_args(%arg8 = %arg6) -> (tensor<32x64xf32>) {
-        %4 = subtensor %arg0[%arg3, %arg7] [16, 32] [1, 1] : tensor<32x128xf32> to tensor<16x32xf32>
-        %5 = subtensor %arg1[%arg7, %arg5] [32, 32] [1, 1] : tensor<128x64xf32> to tensor<32x32xf32>
-        %6 = subtensor %arg8[%arg3, %arg5] [16, 32] [1, 1] : tensor<32x64xf32> to tensor<16x32xf32>
+        %4 = tensor.extract_slice %arg0[%arg3, %arg7] [16, 32] [1, 1] : tensor<32x128xf32> to tensor<16x32xf32>
+        %5 = tensor.extract_slice %arg1[%arg7, %arg5] [32, 32] [1, 1] : tensor<128x64xf32> to tensor<32x32xf32>
+        %6 = tensor.extract_slice %arg8[%arg3, %arg5] [16, 32] [1, 1] : tensor<32x64xf32> to tensor<16x32xf32>
        %7 = scf.for %arg9 = %c0 to %c16 step %c2 iter_args(%arg10 = %6) -> (tensor<16x32xf32>) {
          %10 = scf.for %arg11 = %c0 to %c32 step %c4 iter_args(%arg12 = %arg10) -> (tensor<16x32xf32>) {
            %11 = scf.for %arg13 = %c0 to %c32 step %c16 iter_args(%arg14 = %arg12) -> (tensor<16x32xf32>) {
-              %12 = subtensor %4[%arg9, %arg13] [2, 16] [1, 1] : tensor<16x32xf32> to tensor<2x16xf32>
+              %12 = tensor.extract_slice %4[%arg9, %arg13] [2, 16] [1, 1] : tensor<16x32xf32> to tensor<2x16xf32>
              %13 = tensor.cast %12 : tensor<2x16xf32> to tensor<?x?xf32>
-              %14 = subtensor %5[%arg13, %arg11] [16, 4] [1, 1] : tensor<32x32xf32> to tensor<16x4xf32>
+              %14 = tensor.extract_slice %5[%arg13, %arg11] [16, 4] [1, 1] : tensor<32x32xf32> to tensor<16x4xf32>
              %15 = tensor.cast %14 : tensor<16x4xf32> to tensor<?x?xf32>
-              %16 = subtensor %arg14[%arg9, %arg11] [2, 4] [1, 1] : tensor<16x32xf32> to tensor<2x4xf32>
+              %16 = tensor.extract_slice %arg14[%arg9, %arg11] [2, 4] [1, 1] : tensor<16x32xf32> to tensor<2x4xf32>
              %17 = tensor.cast %16 : tensor<2x4xf32> to tensor<?x?xf32>
              %18 = linalg.pad_tensor %13 low[%c0, %c0] high[%c0, %c0]  {
              ^bb0(%arg15: index, %arg16: index):  // no predecessors
@ -271,7 +271,7 @@ func @matmul_2d_tiling(%arg0: tensor<32x128xf32>, %arg1: tensor<128x64xf32>, %ar
              } : tensor<?x?xf32> to tensor<2x4xf32>
              %21 = linalg.matmul ins(%18, %19 : tensor<2x16xf32>, tensor<16x4xf32>) outs(%20 : tensor<2x4xf32>) -> tensor<2x4xf32>
              %22 = tensor.cast %21 : tensor<2x4xf32> to tensor<?x?xf32>
-              %23 = subtensor_insert %22 into %arg14[%arg9, %arg11] [%c2, %c4] [1, 1] : tensor<?x?xf32> into tensor<16x32xf32>
+              %23 = tensor.insert_slice %22 into %arg14[%arg9, %arg11] [%c2, %c4] [1, 1] : tensor<?x?xf32> into tensor<16x32xf32>
              scf.yield %23 : tensor<16x32xf32>
            }
            scf.yield %11 : tensor<16x32xf32>
@ -279,7 +279,7 @@ func @matmul_2d_tiling(%arg0: tensor<32x128xf32>, %arg1: tensor<128x64xf32>, %ar
          scf.yield %10 : tensor<16x32xf32>
        }
        %8 = tensor.cast %7 : tensor<16x32xf32> to tensor<?x?xf32>
-        %9 = subtensor_insert %8 into %arg8[%arg3, %arg5] [%c16, %c32] [1, 1] : tensor<?x?xf32> into tensor<32x64xf32>
+        %9 = tensor.insert_slice %8 into %arg8[%arg3, %arg5] [%c16, %c32] [1, 1] : tensor<?x?xf32> into tensor<32x64xf32>
        scf.yield %9 : tensor<32x64xf32>
      }
      scf.yield %3 : tensor<32x64xf32>
--- a/mlir/test/Dialect/Linalg/hoisting.mlir
+++ b/mlir/test/Dialect/Linalg/hoisting.mlir
@ -321,14 +321,14 @@ func @hoist_vector_transfer_pairs_disjoint_tensor(

 // -----

-// CHECK-LABEL: func @hoist_vector_transfer_pairs_tensor_and_subtensors
+// CHECK-LABEL: func @hoist_vector_transfer_pairs_tensor_and_slices
 //  CHECK-SAME:   %[[TENSOR0:[a-zA-Z0-9]*]]: tensor<?x?xf32>,
 //  CHECK-SAME:   %[[TENSOR1:[a-zA-Z0-9]*]]: tensor<?x?xf32>,
 //  CHECK-SAME:   %[[TENSOR2:[a-zA-Z0-9]*]]: tensor<?x?xf32>,
 //  CHECK-SAME:   %[[TENSOR3:[a-zA-Z0-9]*]]: tensor<?x?xf32>,
 //  CHECK-SAME:   %[[TENSOR4:[a-zA-Z0-9]*]]: tensor<?x?xf32>,
 //  CHECK-SAME:   %[[TENSOR5:[a-zA-Z0-9]*]]: tensor<?x?xf32>
-func @hoist_vector_transfer_pairs_tensor_and_subtensors(
+func @hoist_vector_transfer_pairs_tensor_and_slices(
    %tensor0: tensor<?x?xf32>, %tensor1: tensor<?x?xf32>, %tensor2: tensor<?x?xf32>,
    %tensor3: tensor<?x?xf32>, %tensor4: tensor<?x?xf32>, %tensor5: tensor<?x?xf32>,
    %val: index, %lb : index, %ub : index, %step: index) ->
@ -349,7 +349,7 @@ func @hoist_vector_transfer_pairs_tensor_and_subtensors(
    -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)  {

    // Hoisted
-    // CHECK:   %[[ST0:.*]] = subtensor %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
+    // CHECK:   %[[ST0:.*]] = tensor.extract_slice %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
    // CHECK:   %[[V0:.*]] = vector.transfer_read %[[ST0]]{{.*}} : tensor<?x?xf32>, vector<1xf32>

    //      CHECK:   %[[R:.*]]:3 = scf.for %[[J:.*]] = {{.*}} iter_args(
@ -362,19 +362,19 @@ func @hoist_vector_transfer_pairs_tensor_and_subtensors(
    iter_args(%arg6 = %arg0, %arg7 = %arg1, %arg8 = %arg2)
    -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)  {
      // Hoists.
-      %st0 = subtensor %arg6[%i, %i][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+      %st0 = tensor.extract_slice %arg6[%i, %i][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
      %r0 = vector.transfer_read %st0[%c0, %c0], %cst: tensor<?x?xf32>, vector<1xf32>

-      // CHECK:     %[[ST1:.*]] = subtensor %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
+      // CHECK:     %[[ST1:.*]] = tensor.extract_slice %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
      // CHECK:     %[[V1:.*]] = vector.transfer_read %[[ST1]]{{.*}} : tensor<?x?xf32>, vector<2xf32>
-      // Does not hoist (subtensor depends on %j)
-      %st1 = subtensor %arg7[%j, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+      // Does not hoist (slice depends on %j)
+      %st1 = tensor.extract_slice %arg7[%j, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
      %r1 = vector.transfer_read %st1[%c0, %c0], %cst: tensor<?x?xf32>, vector<2xf32>

-      // CHECK:     %[[ST2:.*]] = subtensor %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
+      // CHECK:     %[[ST2:.*]] = tensor.extract_slice %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
      // CHECK:     %[[V2:.*]] = vector.transfer_read %[[ST2]]{{.*}} : tensor<?x?xf32>, vector<3xf32>
-      // Does not hoist, 2 subtensor %arg8.
-      %st2 = subtensor %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+      // Does not hoist, 2 slice %arg8.
+      %st2 = tensor.extract_slice %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
      %r2 = vector.transfer_read %st2[%c0, %c0], %cst: tensor<?x?xf32>, vector<3xf32>

      // CHECK:     %[[U0:.*]] = "some_use"(%[[V0_ARG_L2]]) : (vector<1xf32>) -> vector<1xf32>
@ -388,25 +388,25 @@ func @hoist_vector_transfer_pairs_tensor_and_subtensors(
      %w0 = vector.transfer_write %u0, %st0[%c0, %c0] : vector<1xf32>, tensor<?x?xf32>

      // CHECK-DAG:     %[[STI1:.*]] = vector.transfer_write %[[U1]], %{{.*}} : vector<2xf32>, tensor<?x?xf32>
-      // Does not hoist (associated subtensor depends on %j).
+      // Does not hoist (associated slice depends on %j).
      %w1 = vector.transfer_write %u1, %st1[%i, %i] : vector<2xf32>, tensor<?x?xf32>

      // CHECK-DAG:     %[[STI2:.*]] = vector.transfer_write %[[U2]], %{{.*}} : vector<3xf32>, tensor<?x?xf32>
-      // Does not hoist, 2 subtensor / subtensor_insert for %arg8.
+      // Does not hoist, 2 slice / insert_slice for %arg8.
      %w2 = vector.transfer_write %u2, %st2[%c0, %c0] : vector<3xf32>, tensor<?x?xf32>

      // Hoists.
-      %sti0 = subtensor_insert %w0 into %arg6[%i, %i][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+      %sti0 = tensor.insert_slice %w0 into %arg6[%i, %i][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>

-      // CHECK-DAG:     subtensor_insert %[[STI1]] into %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor<?x?xf32> into tensor<?x?xf32>
+      // CHECK-DAG:     tensor.insert_slice %[[STI1]] into %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor<?x?xf32> into tensor<?x?xf32>
      // Does not hoist (depends on %j).
-      %sti1 = subtensor_insert %w1 into %arg7[%j, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+      %sti1 = tensor.insert_slice %w1 into %arg7[%j, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>

-      // CHECK-DAG:     subtensor_insert %[[STI2]] into %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor<?x?xf32> into tensor<?x?xf32>
-      // Does not hoist, 2 subtensor / subtensor_insert for %arg8.
-      %sti2 = subtensor_insert %w2 into %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
-      %st22 = subtensor %sti2[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-      %sti22 = subtensor_insert %st22 into %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+      // CHECK-DAG:     tensor.insert_slice %[[STI2]] into %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor<?x?xf32> into tensor<?x?xf32>
+      // Does not hoist, 2 slice / insert_slice for %arg8.
+      %sti2 = tensor.insert_slice %w2 into %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
+      %st22 = tensor.extract_slice %sti2[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+      %sti22 = tensor.insert_slice %st22 into %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>

      // CHECK:     scf.yield {{.*}} : tensor<?x?xf32>, tensor<?x?xf32>, vector<1xf32>
      // CHECK:   }
@ -416,7 +416,7 @@ func @hoist_vector_transfer_pairs_tensor_and_subtensors(

    // Hoisted
    // CHECK:   %[[STI0:.*]] = vector.transfer_write %[[R]]#2, %[[ST0]]{{.*}} : vector<1xf32>, tensor<?x?xf32>
-    // CHECK:   subtensor_insert %[[STI0]] into %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}} : tensor<?x?xf32> into tensor<?x?xf32>
+    // CHECK:   tensor.insert_slice %[[STI0]] into %[[TENSOR0_ARG]][%[[I]], %[[I]]]{{.*}} : tensor<?x?xf32> into tensor<?x?xf32>

    // CHECK:   scf.yield {{.*}} : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
    scf.yield %1#0, %1#1, %1#2 :
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@ -732,11 +732,11 @@ func @tiled_loop(%lhs: tensor<24x64xi8>, %rhs: tensor<24x64xi8>,
 %prod = linalg.tiled_loop (%i) = (%c0) to (%c24) step (%c4)
      ins(%lhs_ = %lhs: tensor<24x64xi8>, %rhs_ = %rhs: tensor<24x64xi8>)
      outs(%out_ = %out: tensor<24x64xi8>) {
-    %lhs_sub = subtensor %lhs_[%i, 0] [%c4, %c64] [1, 1]
+    %lhs_sub = tensor.extract_slice %lhs_[%i, 0] [%c4, %c64] [1, 1]
        : tensor<24x64xi8> to tensor<?x?xi8>
-    %rhs_sub = subtensor %rhs_[%i, 0] [%c4, %c64] [1, 1]
+    %rhs_sub = tensor.extract_slice %rhs_[%i, 0] [%c4, %c64] [1, 1]
        : tensor<24x64xi8> to tensor<?x?xi8>
-    %out_sub = subtensor %out_[%i, 0] [%c4, %c64] [1, 1]
+    %out_sub = tensor.extract_slice %out_[%i, 0] [%c4, %c64] [1, 1]
        : tensor<24x64xi8> to tensor<?x?xi8>

    %sum = linalg.generic #trait_4
@ -747,7 +747,7 @@ func @tiled_loop(%lhs: tensor<24x64xi8>, %rhs: tensor<24x64xi8>,
        linalg.yield %s : i8
      } -> tensor<?x?xi8>

-    %sum_sub = subtensor_insert %sum into %out_[%i, 0][%c4, %c64][1, 1]
+    %sum_sub = tensor.insert_slice %sum into %out_[%i, 0][%c4, %c64][1, 1]
      : tensor<?x?xi8> into tensor<24x64xi8>
    linalg.yield %sum_sub : tensor<24x64xi8>
  }
@ -792,13 +792,13 @@ func @tiled_loop_reduction(%input_3d: tensor<16x24x32xf32>,
      outs(%o_ =  %output: tensor<24xf32>)
      iterators["reduction", "parallel", "reduction"]
      distribution["block_x", "block_y", "none"] {
-    %sub_3d = subtensor %i3d_[%i, %j, %k][2, 4, 8][1, 1, 1]
+    %sub_3d = tensor.extract_slice %i3d_[%i, %j, %k][2, 4, 8][1, 1, 1]
      : tensor<16x24x32xf32> to tensor<2x4x8xf32>
-    %sub_2d = subtensor %i2d_[%i, %k][2, 8][1, 1]
+    %sub_2d = tensor.extract_slice %i2d_[%i, %k][2, 8][1, 1]
      : tensor<16x32xf32> to tensor<2x8xf32>
-    %sub_1d = subtensor %i1d_[%j] [4] [1]
+    %sub_1d = tensor.extract_slice %i1d_[%j] [4] [1]
      : tensor<24xf32> to tensor<4xf32>
-    %sub_out = subtensor %o_[%j] [4] [1]
+    %sub_out = tensor.extract_slice %o_[%j] [4] [1]
      : tensor<24xf32> to tensor<4xf32>
    %acc = linalg.generic #trait_5
      ins(%sub_3d, %sub_2d, %sub_1d
@ -810,7 +810,7 @@ func @tiled_loop_reduction(%input_3d: tensor<16x24x32xf32>,
      linalg.yield %1 : f32
    } -> tensor<4xf32>

-    %sum_sub = subtensor_insert %acc into %o_[%j][%c4][1]
+    %sum_sub = tensor.insert_slice %acc into %o_[%j][%c4][1]
      : tensor<4xf32> into tensor<24xf32>
    linalg.yield %sum_sub : tensor<24xf32>
  }
--- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
+++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
@ -2,7 +2,7 @@

 // CHECK-LABEL: @static_data_only(
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<4x5xf32>
-//       CHECK:   %[[RESULT:.*]] = subtensor %[[ARG0]][1, 2] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
+//       CHECK:   %[[RESULT:.*]] = tensor.extract_slice %[[ARG0]][1, 2] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
 //       CHECK:   return %[[RESULT]]
 func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32)
    -> tensor<2x1xf32> {
@ -10,7 +10,7 @@ func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32)
    ^bb0(%arg1: index, %arg2: index):
      linalg.yield %pad : f32
    } : tensor<4x5xf32> to tensor<11x13xf32>
-  %1 = subtensor %0[1, 2] [2, 1] [1, 1] : tensor<11x13xf32> to tensor<2x1xf32>
+  %1 = tensor.extract_slice %0[1, 2] [2, 1] [1, 1] : tensor<11x13xf32> to tensor<2x1xf32>
  return %1 : tensor<2x1xf32>
 }

@ -19,7 +19,7 @@ func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32)
 // CHECK-LABEL: @static_high_pad_only
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
-//   CHECK-NOT:   subtensor
+//   CHECK-NOT:   tensor.extract_slice
 //       CHECK:   %[[RESULT:.*]] = tensor.generate
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<2x4xf32>
@ -29,7 +29,7 @@ func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
    ^bb0(%arg1: index, %arg2: index):
      linalg.yield %pad : f32
    } : tensor<4x5xf32> to tensor<11x13xf32>
-  %1 = subtensor %0[4, 5] [2, 4] [1, 1] : tensor<11x13xf32> to tensor<2x4xf32>
+  %1 = tensor.extract_slice %0[4, 5] [2, 4] [1, 1] : tensor<11x13xf32> to tensor<2x4xf32>
  return %1 : tensor<2x4xf32>
 }

@ -38,7 +38,7 @@ func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
 // CHECK-LABEL: @static_low_pad_only
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
-//   CHECK-NOT:   subtensor
+//   CHECK-NOT:   tensor.extract_slice
 //       CHECK:   %[[RESULT:.*]] = tensor.generate
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<2x3xf32>
@ -48,7 +48,7 @@ func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
    ^bb0(%arg1: index, %arg2: index):
      linalg.yield %pad : f32
    } : tensor<4x5xf32> to tensor<14x20xf32>
-  %1 = subtensor %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32>
+  %1 = tensor.extract_slice %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32>
  return %1 : tensor<2x3xf32>
 }

@ -57,7 +57,7 @@ func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
 // CHECK-LABEL: @static_low_pad_only_2
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
-//   CHECK-NOT:   subtensor
+//   CHECK-NOT:   tensor.extract_slice
 //       CHECK:   %[[RESULT:.*]] = tensor.generate
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<1x3xf32>
@ -67,7 +67,7 @@ func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32)
    ^bb0(%arg1: index, %arg2: index):
      linalg.yield %pad : f32
    } : tensor<4x5xf32> to tensor<14x20xf32>
-  %1 = subtensor %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32>
+  %1 = tensor.extract_slice %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32>
  return %1 : tensor<1x3xf32>
 }

@ -76,7 +76,7 @@ func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32)
 // CHECK-LABEL: @static_mixed_data_high_pad
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
-//       CHECK:   %[[SUBTENSOR:.*]] = subtensor %[[ARG0]][2, 4] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
+//       CHECK:   %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][2, 4] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
 //       CHECK:   %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[1, 3]
 //       CHECK:     linalg.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<3x4xf32>
@ -86,7 +86,7 @@ func @static_mixed_data_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
    ^bb0(%arg1: index, %arg2: index):
      linalg.yield %pad : f32
    } : tensor<4x5xf32> to tensor<11x13xf32>
-  %1 = subtensor %0[2, 4] [3, 4] [1, 1] : tensor<11x13xf32> to tensor<3x4xf32>
+  %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<11x13xf32> to tensor<3x4xf32>
  return %1 : tensor<3x4xf32>
 }

@ -95,7 +95,7 @@ func @static_mixed_data_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
 // CHECK-LABEL: @static_mixed_data_low_pad
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
-//       CHECK:   %[[SUBTENSOR:.*]] = subtensor %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
+//       CHECK:   %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
 //       CHECK:   %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[1, 3] high[0, 0]
 //       CHECK:     linalg.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<3x4xf32>
@ -105,7 +105,7 @@ func @static_mixed_data_low_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
    ^bb0(%arg1: index, %arg2: index):
      linalg.yield %pad : f32
    } : tensor<4x5xf32> to tensor<14x20xf32>
-  %1 = subtensor %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32>
+  %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32>
  return %1 : tensor<3x4xf32>
 }

@ -123,7 +123,7 @@ func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
    ^bb0(%arg1: index, %arg2: index):
      linalg.yield %pad : f32
    } : tensor<4x5xf32> to tensor<13x16xf32>
-  %1 = subtensor %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32>
+  %1 = tensor.extract_slice %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32>
  return %1 : tensor<7x9xf32>
 }

@ -138,7 +138,7 @@ func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
 //       CHECK:     %[[GEN:.*]] = tensor.generate
 //       CHECK:     scf.yield %[[GEN]]
 //       CHECK:   } else {
-//       CHECK:     %[[SUBTENSOR:.*]] = subtensor %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
+//       CHECK:     %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
 //       CHECK:     %[[PADTENSOR:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
 //       CHECK:     %[[CAST:.*]] = tensor.cast %[[PADTENSOR]] : tensor<?x4xf32> to tensor<3x4xf32>
 //       CHECK:     scf.yield %[[CAST]]
@ -149,7 +149,7 @@ func @dynamic_high_pad(%arg0 : tensor<?x5xf32>, %h1: index, %pad : f32) -> tenso
    ^bb0(%arg1: index, %arg2: index):
      linalg.yield %pad : f32
    } : tensor<?x5xf32> to tensor<?x13xf32>
-  %1 = subtensor %0[2, 4] [3, 4] [1, 1] : tensor<?x13xf32> to tensor<3x4xf32>
+  %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<?x13xf32> to tensor<3x4xf32>
  return %1 : tensor<3x4xf32>
 }

--- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
@ -199,12 +199,12 @@ func @matmul_tensors(
 //      CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]]
 //      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
 //      CHECK:     %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<?x?xf32>) {
-//      CHECK:       %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
 // CHECK-SAME:                                  outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
-//      CHECK:       %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
+//      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<?x?xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<?x?xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<?x?xf32>
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@ -16,11 +16,11 @@ func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tens
  %3 = scf.for %arg3 = %c0 to %0 step %c2 iter_args(%arg4 = %arg2) -> (tensor<?x?xf32>) {
    %4 = scf.for %arg5 = %c0 to %2 step %c3 iter_args(%arg6 = %arg4) -> (tensor<?x?xf32>) {
      %5 = scf.for %arg7 = %c0 to %1 step %c4 iter_args(%arg8 = %arg6) -> (tensor<?x?xf32>) {
-        %6 = subtensor %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
-        %7 = subtensor %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
-        %8 = subtensor %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+        %6 = tensor.extract_slice %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
+        %7 = tensor.extract_slice %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
+        %8 = tensor.extract_slice %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
        %9 = linalg.matmul ins(%6, %7 : tensor<?x4xf32>, tensor<4x?xf32>) outs(%8 : tensor<?x?xf32>) -> tensor<?x?xf32>
-        %10 = subtensor_insert %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1]  : tensor<?x?xf32> into tensor<?x?xf32>
+        %10 = tensor.insert_slice %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1]  : tensor<?x?xf32> into tensor<?x?xf32>
        scf.yield %10 : tensor<?x?xf32>
      }
      scf.yield %5 : tensor<?x?xf32>
@ -48,22 +48,22 @@ func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tens
 //   CHECK-DAG: %[[dC1:.*]] = memref.dim %[[C]], %[[C1]] : tensor<?x?xf32>
 //       CHECK: scf.for %[[I:[0-9a-z]*]]
 //       CHECK:   %[[sizeA0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dA0]]]
-//       CHECK:   %[[stA:.*]] = subtensor %[[A]][%[[I]], 0] [%[[sizeA0]], %[[dA1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
+//       CHECK:   %[[stA:.*]] = tensor.extract_slice %[[A]][%[[I]], 0] [%[[sizeA0]], %[[dA1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
 //       CHECK:   %[[sizeC0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dC0]]]
 //  CHECK-NEXT:   scf.for %[[J:[0-9a-z]*]]
 //  CHECK-NEXT:     scf.for %[[K:[0-9a-z]*]] {{.*}} iter_args(%[[RES:[0-9a-z]*]]
-//   CHECK-DAG:       %[[stB1:.*]] = subtensor %[[B]][%[[K]], %[[J]]] [4, 3] [1, 1]  : tensor<?x?xf32> to tensor<4x3xf32>
-//   CHECK-DAG:       %[[stF:.*]] = subtensor %[[RES]][%[[I]], %[[J]]] [2, 3] [1, 1]  : tensor<?x?xf32> to tensor<2x3xf32>
+//   CHECK-DAG:       %[[stB1:.*]] = tensor.extract_slice %[[B]][%[[K]], %[[J]]] [4, 3] [1, 1]  : tensor<?x?xf32> to tensor<4x3xf32>
+//   CHECK-DAG:       %[[stF:.*]] = tensor.extract_slice %[[RES]][%[[I]], %[[J]]] [2, 3] [1, 1]  : tensor<?x?xf32> to tensor<2x3xf32>
 //
-// subtensors of the producing matmul.
+// slices of the producing matmul.
 //       CHECK:       %[[sizeB1:.*]] = affine.min #[[BOUND4_MAP]](%[[K]])[%[[dB1]]]
-//       CHECK:       %[[stB2:.*]] = subtensor %[[B]][0, %[[K]]] [%[[dB0]], %[[sizeB1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
+//       CHECK:       %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], %[[sizeB1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
 //       CHECK:       %[[sizeC1:.*]] = affine.min #[[BOUND4_MAP]](%[[K]])[%[[dC1]]]
-//       CHECK:       %[[stC:.*]] = subtensor %[[C]][%[[I]], %[[K]]] [%[[sizeC0]], %[[sizeC1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
+//       CHECK:       %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [%[[sizeC0]], %[[sizeC1]]] [1, 1]  : tensor<?x?xf32> to tensor<?x?xf32>
 //       CHECK:       %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[stC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
 //       CHECK:       %[[CAST:.*]] = tensor.cast %[[stD]] : tensor<?x?xf32> to tensor<?x4xf32>
 //  CHECK-NEXT:       %[[stG:.*]] = linalg.matmul ins(%[[CAST]], %[[stB1]] : tensor<?x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>)  -> tensor<2x3xf32>
-//  CHECK-NEXT:       subtensor_insert %[[stG]] into %[[RES]][%[[I]], %[[J]]]
+//  CHECK-NEXT:       tensor.insert_slice %[[stG]] into %[[RES]][%[[I]], %[[J]]]

 // -----

@ -87,9 +87,9 @@ func @conv_tensors_static(%input: tensor<1x225x225x3xf32>, %filter: tensor<3x3x3
  %for0 = scf.for %iv0 = %c0 to %c112 step %c8 iter_args(%arg0 = %fill) -> tensor<1x112x112x32xf32> {
    %for1 = scf.for %iv1 = %c0 to %c112 step %c16 iter_args(%arg1 = %arg0) -> tensor<1x112x112x32xf32> {
      %for2 = scf.for %iv2 = %c0 to %c32 step %c4 iter_args(%arg2 = %arg1) -> tensor<1x112x112x32xf32> {
-        %0 = subtensor %conv[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
-        %1 = subtensor %elementwise[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
-        %2 = subtensor %arg2[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+        %0 = tensor.extract_slice %conv[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+        %1 = tensor.extract_slice %elementwise[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+        %2 = tensor.extract_slice %arg2[0, %iv0, %iv1, %iv2][1, 8, 16, 4][1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
        %add = linalg.generic
          {
            indexing_maps = [
@ -104,7 +104,7 @@ func @conv_tensors_static(%input: tensor<1x225x225x3xf32>, %filter: tensor<3x3x3
          linalg.yield %result : f32
        } -> tensor<1x8x16x4xf32>

-        %insert = subtensor_insert %add into %arg2[0, %iv0, %iv1, %iv2] [1, 8, 16, 4] [1, 1, 1, 1]  : tensor<1x8x16x4xf32> into tensor<1x112x112x32xf32>
+        %insert = tensor.insert_slice %add into %arg2[0, %iv0, %iv1, %iv2] [1, 8, 16, 4] [1, 1, 1, 1]  : tensor<1x8x16x4xf32> into tensor<1x112x112x32xf32>
        scf.yield %insert : tensor<1x112x112x32xf32>
      }
      scf.yield %for2 : tensor<1x112x112x32xf32>
@ -127,19 +127,19 @@ func @conv_tensors_static(%input: tensor<1x225x225x3xf32>, %filter: tensor<3x3x3
 // CHECK-NEXT:   %[[OFFSET_H:.+]] = affine.apply #[[MAP0]](%[[IV0]])
 // CHECK-NEXT:   scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG1:.+]] = %[[ARG0]])
 // CHECK-NEXT:     %[[OFFSET_W:.+]] = affine.apply #[[MAP0]](%[[IV1]])
-// CHECK-NEXT:     %[[ST_INPUT:.+]] = subtensor %arg0[0, %[[OFFSET_H]], %[[OFFSET_W]], 0] [1, 17, 33, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x17x33x3xf32>
+// CHECK-NEXT:     %[[ST_INPUT:.+]] = tensor.extract_slice %arg0[0, %[[OFFSET_H]], %[[OFFSET_W]], 0] [1, 17, 33, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x17x33x3xf32>
 // CHECK-NEXT:     scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG2:.+]] = %[[ARG1]])
-// CHECK-NEXT:       %[[ST_ELEM:.+]] = subtensor %[[ELEM]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
-// CHECK-NEXT:       %[[ST_ARG2:.+]] = subtensor %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
-// CHECK-NEXT:       %[[ST_FILTER:.+]] = subtensor %[[FILTER]][0, 0, 0, %[[IV2]]] [3, 3, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x4xf32>
-// CHECK-NEXT:       %[[ST_FILL:.+]] = subtensor %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+// CHECK-NEXT:       %[[ST_ELEM:.+]] = tensor.extract_slice %[[ELEM]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+// CHECK-NEXT:       %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
+// CHECK-NEXT:       %[[ST_FILTER:.+]] = tensor.extract_slice %[[FILTER]][0, 0, 0, %[[IV2]]] [3, 3, 3, 4] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x4xf32>
+// CHECK-NEXT:       %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
 // CHECK-NEXT:       %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf
 // CHECK-SAME:         ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<1x17x33x3xf32>, tensor<3x3x3x4xf32>)
 // CHECK-SAME:         outs(%[[ST_FILL]] : tensor<1x8x16x4xf32>)
 // CHECK-NEXT:       %[[ADD:.+]] = linalg.generic
 // CHECK-SAME:         ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>)
 // CHECK-SAME:         outs(%[[ST_ARG2]] : tensor<1x8x16x4xf32>)
-//      CHECK:       subtensor_insert %[[ADD]] into %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4]
+//      CHECK:       tensor.insert_slice %[[ADD]] into %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4]

 // -----

@ -174,9 +174,9 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
          %oh_size = affine.min affine_map<(d0)[s0] -> (16, -d0 + s0)>(%iv1)[%oh]
          %ow_size = affine.min affine_map<(d0)[s0] -> (4, -d0 + s0)>(%iv2)[%ow]
          %oc_size = affine.min affine_map<(d0)[s0] -> (2, -d0 + s0)>(%iv2)[%oc]
-          %0 = subtensor %conv[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
-          %1 = subtensor %elementwise[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
-          %2 = subtensor %arg3[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+          %0 = tensor.extract_slice %conv[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+          %1 = tensor.extract_slice %elementwise[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+          %2 = tensor.extract_slice %arg3[%iv0, %iv1, %iv2, %iv3][%n_size, %oh_size, %ow_size, %oc_size][1, 1, 1, 1] : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
          %add = linalg.generic
            {
              indexing_maps = [
@ -191,7 +191,7 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
            linalg.yield %result : f32
          } -> tensor<?x?x?x?xf32>

-          %insert = subtensor_insert %add into %arg3[%iv0, %iv1, %iv2, %iv3] [%n_size, %oh_size, %ow_size, %oc_size] [1, 1, 1, 1]  : tensor<?x?x?x?xf32> into tensor<?x?x?x?xf32>
+          %insert = tensor.insert_slice %add into %arg3[%iv0, %iv1, %iv2, %iv3] [%n_size, %oh_size, %ow_size, %oc_size] [1, 1, 1, 1]  : tensor<?x?x?x?xf32> into tensor<?x?x?x?xf32>
          scf.yield %insert : tensor<?x?x?x?xf32>
        }
        scf.yield %for3 : tensor<?x?x?x?xf32>
@ -257,19 +257,19 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
 // CHECK-NEXT:         %[[SIZE_ELEM_OC:.+]] = affine.min #[[BOUND2_MAP]](%[[IV2]])[%[[ELEM_OC]]]
 // CHECK-NEXT:         %[[OFFSET_OW:.+]] = affine.apply #[[X2_MAP]](%[[IV2]])
 // CHECK-NEXT:         %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OW]], %[[IV2]])[%[[FILTER_W]], %[[INPUT_W]]]
-// CHECK-NEXT:         %[[ST_INPUT:.+]] = subtensor %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0]
+// CHECK-NEXT:         %[[ST_INPUT:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0]
 // CHECK-SAME:               [%[[SIZE_INPUT_N]], %[[SIZE_INPUT_H]], %[[SIZE_INPUT_W]], %[[INPUT_C]]]
 // CHECK-NEXT:         %[[SIZE_ELEM_OW_2:.+]] = affine.min #[[BOUND4_MAP_2]](%[[IV2]])[%[[FILL_W]], %[[ELEM_OW]]]
 // CHECK-NEXT:         scf.for %[[IV3:.+]] = %{{.+}} to %[[ELEM_OC]] step %{{.+}} iter_args(%[[ARG:[a-z0-9]+]]
-// CHECK-NEXT:           %[[ST_ELEM:.+]] = subtensor %[[ELEM]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
+// CHECK-NEXT:           %[[ST_ELEM:.+]] = tensor.extract_slice %[[ELEM]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 // CHECK-SAME:                 [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]]
-// CHECK-NEXT:           %[[ST_ARG:.+]] = subtensor %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
+// CHECK-NEXT:           %[[ST_ARG:.+]] = tensor.extract_slice %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 // CHECK-SAME:                 [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]]
 // CHECK-NEXT:           %[[SIZE_ELEM_OC_2:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILTER_OC]], %[[ELEM_OC]]]
-// CHECK-NEXT:           %[[ST_FILTER:.+]] = subtensor %[[FILTER]][0, 0, 0, %[[IV3]]]
+// CHECK-NEXT:           %[[ST_FILTER:.+]] = tensor.extract_slice %[[FILTER]][0, 0, 0, %[[IV3]]]
 // CHECK-SAME:                 [%[[FILTER_H]], %[[FILTER_W]], %[[FILTER_IC]], %[[SIZE_ELEM_OC_2]]]
 // CHECK-NEXT:           %[[SIZE_ELEM_OC_3:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILL_C]], %[[ELEM_OC]]]
-// CHECK-NEXT:           %[[ST_FILL:.+]] = subtensor %[[FILL]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
+// CHECK-NEXT:           %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 // CHECK-SAME:                 [%[[SIZE_ELEM_N_2]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_3]]]
 // CHECK-NEXT:           %[[ST_CONV:.+]] = linalg.conv_2d_input_nhwc_filter_hwcf
 // CHECK-SAME:                 ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
@ -277,5 +277,5 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
 // CHECK-NEXT:           %[[ST_ADD:.+]] = linalg.generic
 // CHECK-SAME:                 ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
 // CHECK-SAME:                 outs(%[[ST_ARG]] : tensor<?x?x?x?xf32>)
-//      CHECK:           subtensor_insert %[[ST_ADD]] into %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
+//      CHECK:           tensor.insert_slice %[[ST_ADD]] into %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 // CHECK-SAME:                 [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]]
--- a/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-pad-tensors.mlir
@ -12,9 +12,9 @@ func @matmul_tensors(
 //      CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?xi32>) {
 //      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xi32>) {
 //      CHECK:     %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<?x?xi32>) {
-//      CHECK:       %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor<?x?xi8> to tensor<?x?xi8>
-//      CHECK:       %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<?x?xi8> to tensor<?x?xi8>
-//      CHECK:       %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor<?x?xi32> to tensor<?x?xi32>
+//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xi8> to tensor<?x?xi8>
+//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xi8> to tensor<?x?xi8>
+//      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xi32> to tensor<?x?xi32>

 // Dynamic op has been canonicalized away.
 //  CHECK-NOT:       linalg.matmul {{.*}} tensor<?x?xi8>
@ -28,8 +28,8 @@ func @matmul_tensors(
 //      CHECK:         : tensor<?x?xi32> to tensor<2x3xi32>
 //      CHECK:       %[[pD:.*]] = linalg.matmul_i8_i8_i32 ins(%[[pA]], %[[pB]] : tensor<2x4xi8>, tensor<4x3xi8>)
 // CHECK-SAME:                                           outs(%[[pC]] : tensor<2x3xi32>)  -> tensor<2x3xi32>
-//      CHECK:       %[[sTD:.*]] = subtensor %[[pD]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor<2x3xi32> to tensor<?x?xi32>
-//      CHECK:       %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xi32> into tensor<?x?xi32>
+//      CHECK:       %[[sTD:.*]] = tensor.extract_slice %[[pD]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor<2x3xi32> to tensor<?x?xi32>
+//      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xi32> into tensor<?x?xi32>
 //      CHECK:       scf.yield %[[TD]] : tensor<?x?xi32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<?x?xi32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<?x?xi32>
@ -52,15 +52,15 @@ func @generic_scalar_and_tensor(
 //      CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?x?xf32>) {
 //      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?x?xf32>) {
 //      CHECK:     %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<?x?x?xf32>) {
-//      CHECK:       %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+//      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>

 // Padding injects static information.
 //      CHECK:       %[[pC:.*]] = linalg.pad_tensor %[[sTC]] low[%[[C0]], %[[C0]], %[[C0]]] high[%{{.*}}, %{{.*}}, %{{.*}}]
 //      CHECK:        : tensor<?x?x?xf32> to tensor<2x3x4xf32>
 //      CHECK:       %[[pD:.*]] = linalg.generic
 // CHECK-SAME:         ins(%[[VAL]] : f32) outs(%[[pC]] : tensor<2x3x4xf32>)
-//      CHECK:       %[[sTD:.*]] = subtensor %[[pD]][0, 0, 0] [%{{.*}}, %{{.*}}, %{{.*}}] [1, 1, 1] : tensor<2x3x4xf32> to tensor<?x?x?xf32>
-//      CHECK:       %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?x?xf32> into tensor<?x?x?xf32>
+//      CHECK:       %[[sTD:.*]] = tensor.extract_slice %[[pD]][0, 0, 0] [%{{.*}}, %{{.*}}, %{{.*}}] [1, 1, 1] : tensor<2x3x4xf32> to tensor<?x?x?xf32>
+//      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?x?xf32> into tensor<?x?x?xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<?x?x?xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<?x?x?xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<?x?x?xf32>
@ -104,11 +104,11 @@ func @matmul_partially_padded_tensors(
 //      CHECK-1DIM-TILE:        %[[C0:.*]] = constant 0 : index
 //      CHECK-1DIM-TILE:        %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?xi32>) {
 //      CHECK-1DIM-TILE:            %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xi32>) {
-//      CHECK-1DIM-TILE:                %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor<?x8xi8> to tensor<?x8xi8>
+//      CHECK-1DIM-TILE:                %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x8xi8> to tensor<?x8xi8>
 //      CHECK-1DIM-TILE:                %[[sTAc:.*]] = tensor.cast %[[sTA]] : tensor<?x8xi8> to tensor<?x?xi8>
-//      CHECK-1DIM-TILE:                %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<8x?xi8> to tensor<8x?xi8>
+//      CHECK-1DIM-TILE:                %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<8x?xi8> to tensor<8x?xi8>
 //      CHECK-1DIM-TILE:                %[[sTBc:.*]] = tensor.cast %[[sTB]] : tensor<8x?xi8> to tensor<?x?xi8>
-//      CHECK-1DIM-TILE:                %[[sTC:.*]] = subtensor %[[TC1]][{{.*}}] : tensor<?x?xi32> to tensor<?x?xi32>
+//      CHECK-1DIM-TILE:                %[[sTC:.*]] = tensor.extract_slice %[[TC1]][{{.*}}] : tensor<?x?xi32> to tensor<?x?xi32>
 //      CHECK-1DIM-TILE:                %[[pA:.*]] = linalg.pad_tensor %[[sTAc]] low[%[[C0]], %[[C0]]] high[%{{.*}}, %{{.*}}]
 //      CHECK-1DIM-TILE:                   : tensor<?x?xi8> to tensor<2x8xi8>
 //      CHECK-1DIM-TILE:                %[[pB:.*]] = linalg.pad_tensor %[[sTBc]] low[%[[C0]], %[[C0]]] high[%{{.*}}, %{{.*}}]
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@ -11,12 +11,12 @@ func @matmul_tensors(
 //      CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?xf32>) {
 //      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
 //      CHECK:     %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<?x?xf32>) {
-//      CHECK:       %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
 // CHECK-SAME:                                  outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
-//      CHECK:       %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
+//      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<?x?xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<?x?xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<?x?xf32>
@ -51,14 +51,14 @@ func @matmul_tensors(
 // TLOOP-SAME: iterators["parallel", "parallel", "reduction"]
 // TLOOP-SAME: distribution["block_x", "block_y", "none"] {

-// TLOOP: %[[SUB_ARG_0:.*]] = subtensor %[[A0]][%[[I]], %[[K]]]
-// TLOOP: %[[SUB_ARG_1:.*]] = subtensor %[[A1]][%[[K]], %[[J]]]
-// TLOOP: %[[SUB_ARG_2:.*]] = subtensor %[[A2]][%[[I]], %[[J]]]
+// TLOOP: %[[SUB_ARG_0:.*]] = tensor.extract_slice %[[A0]][%[[I]], %[[K]]]
+// TLOOP: %[[SUB_ARG_1:.*]] = tensor.extract_slice %[[A1]][%[[K]], %[[J]]]
+// TLOOP: %[[SUB_ARG_2:.*]] = tensor.extract_slice %[[A2]][%[[I]], %[[J]]]

 // TLOOP: %[[PROD:.*]] = linalg.matmul ins(%[[SUB_ARG_0]], %[[SUB_ARG_1]]
 // TLOOP-SE: outs(%[[SUB_ARG_2]] : [[TY]]) -> [[TY]]

-// TLOOP: %[[O:.*]] = subtensor_insert %[[PROD]] into %[[A2]][%[[I]], %[[J]]]
+// TLOOP: %[[O:.*]] = tensor.insert_slice %[[PROD]] into %[[A2]][%[[I]], %[[J]]]
 // TLOOP: linalg.yield %[[O]] : [[TY]]

 // -----
@ -93,13 +93,13 @@ func @generic_op_tensors(
 //       CHECK:   %[[TD0:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC0:.+]] = %[[INIT]]) -> (tensor<?x?x?xf32>) {
 //       CHECK:     %[[TD1:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC1:.+]] = %[[TC0]]) -> (tensor<?x?x?xf32>) {
 //       CHECK:       %[[TD2:.+]] = scf.for %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[TC2:.+]] = %[[TC1]]) -> (tensor<?x?x?xf32>) {
-//       CHECK:       %[[STARG0:.+]] = subtensor %[[ARG0]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
-//       CHECK:       %[[STARG1:.+]] = subtensor %[[ARG1]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
-//       CHECK:       %[[STARG2:.+]] = subtensor %[[TC2]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+//       CHECK:       %[[STARG0:.+]] = tensor.extract_slice %[[ARG0]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+//       CHECK:       %[[STARG1:.+]] = tensor.extract_slice %[[ARG1]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+//       CHECK:       %[[STARG2:.+]] = tensor.extract_slice %[[TC2]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
 //       CHECK:       %[[STRETURN:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[STARG0]], %[[STARG1]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
 //  CHECK-SAME:         outs(%[[STARG2]] : tensor<?x?x?xf32>)
-//       CHECK:       %[[TD:.+]] = subtensor_insert %[[STRETURN]] into %[[TC2]]
+//       CHECK:       %[[TD:.+]] = tensor.insert_slice %[[STRETURN]] into %[[TC2]]
 //       CHECK:       scf.yield %[[TD]]
 //       CHECK:     }
 //       CHECK:     scf.yield %[[TD2]]
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@ -586,7 +586,7 @@ func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6
 //       CHECK:   %[[INIT:.*]] = linalg.init_tensor [6, %[[V1]], %[[V2]], %[[V5]]] : tensor<6x?x?x?xf32>
 //       CHECK:   %[[FILL:.*]] = linalg.fill(%[[INIT]], %{{.*}}) : tensor<6x?x?x?xf32>, f32 -> tensor<6x?x?x?xf32>
 //       CHECK:   %[[SRCDIM:.*]] = memref.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
-//       CHECK:   %[[RESULT:.*]] = subtensor_insert %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32>
+//       CHECK:   %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32>
 //       CHECK:   return %[[RESULT]]
 func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index,
                  %pad_value: f32) -> tensor<6x?x?x?xf32> {
@ -638,7 +638,7 @@ func @pad_and_transfer_write_static(
  } : tensor<5x6xf32> to tensor<10x13xf32>
  %1 = vector.transfer_write %arg1, %0[%c0, %c0]
      : vector<7x9xf32>, tensor<10x13xf32>
-  %2 = subtensor %1[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32>
+  %2 = tensor.extract_slice %1[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32>
  return %2 : tensor<5x6xf32>
 }

@ -648,14 +648,14 @@ func @pad_and_transfer_write_static(
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: vector<7x9xf32>, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index
 //   CHECK-NOT:   linalg.pad_tensor
 //       CHECK:   %[[C0:.*]] = constant 0 : index
-//       CHECK:   %[[SUB:.*]] = subtensor %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor<?x?xf32> to tensor<?x6xf32>
+//       CHECK:   %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor<?x?xf32> to tensor<?x6xf32>
 //       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[ARG1]], %[[SUB]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<?x6xf32>
 //       CHECK:   return %[[RESULT]]
 func @pad_and_transfer_write_dynamic_static(
    %arg0: tensor<?x?xf32>, %arg1: vector<7x9xf32>, %size: index, %padding: index) -> tensor<?x6xf32> {
  %c0 = constant 0 : index
  %c5 = constant 5.0 : f32
-  %s = subtensor %arg0[0, 0] [%size, 6] [1, 1]
+  %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1]
      : tensor<?x?xf32> to tensor<?x6xf32>
  %0 = linalg.pad_tensor %s low[0, 0] high[%padding, 7] {
    ^bb0(%arg2: index, %arg3: index):
@ -663,13 +663,13 @@ func @pad_and_transfer_write_dynamic_static(
  } : tensor<?x6xf32> to tensor<?x13xf32>
  %1 = vector.transfer_write %arg1, %0[%c0, %c0]
      : vector<7x9xf32>, tensor<?x13xf32>
-  %2 = subtensor %1[0, 0] [%size, 6] [1, 1] : tensor<?x13xf32> to tensor<?x6xf32>
+  %2 = tensor.extract_slice %1[0, 0] [%size, 6] [1, 1] : tensor<?x13xf32> to tensor<?x6xf32>
  return %2 : tensor<?x6xf32>
 }

 // -----

-// CHECK-LABEL: func @pad_and_subtensor_insert
+// CHECK-LABEL: func @pad_and_insert_slice
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>, %[[ARG1:.*]]: tensor<12x13xf32>
 //   CHECK-NOT:   linalg.pad_tensor
 //   CHECK-DAG:   %[[C0:.*]] = constant 0 : index
@ -677,7 +677,7 @@ func @pad_and_transfer_write_dynamic_static(
 //       CHECK:   %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32>
 //       CHECK:   %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<7x9xf32>, tensor<12x13xf32>
 //       CHECK:   return %[[WRITE]]
-func @pad_and_subtensor_insert(
+func @pad_and_insert_slice(
    %arg0: tensor<5x6xf32>, %arg1: tensor<12x13xf32>) -> tensor<12x13xf32> {
  %c0 = constant 0 : index
  %c5 = constant 5.0 : f32
@ -685,7 +685,7 @@ func @pad_and_subtensor_insert(
    ^bb0(%arg2: index, %arg3: index):
      linalg.yield %c5 : f32
  } : tensor<5x6xf32> to tensor<7x9xf32>
-  %r = subtensor_insert %0 into %arg1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32>
+  %r = tensor.insert_slice %0 into %arg1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32>
  return %r : tensor<12x13xf32>
 }

--- a/mlir/test/Dialect/MemRef/canonicalize.mlir
+++ b/mlir/test/Dialect/MemRef/canonicalize.mlir
@ -367,27 +367,3 @@ func @tensor_cast_to_memref(%arg0 : tensor<4x6x16x32xi8>) ->
  %1 = memref.buffer_cast %0 : memref<?x?x16x32xi8>
  return %1 : memref<?x?x16x32xi8>
 }
-
-// -----
-
-// TODO: Move this test to Tensor/canonicalize.mlir.
-func @subtensor_insert_propagate_dest_cast(%arg0 : tensor<2x?xi32>, %arg1 : tensor<i32>,
-    %arg2 : index, %arg3 : index) -> tensor<?x?xi32> {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c2 = constant 2 : index
-  %c8 = constant 8 : index
-  %0 = memref.dim %arg0, %c1 : tensor<2x?xi32>
-  %1 = tensor.extract %arg1[] : tensor<i32>
-  %2 = tensor.generate %arg2, %c8 {
-  ^bb0(%arg4: index, %arg5: index):
-    tensor.yield %1 : i32
-  } : tensor<?x?xi32>
-  %3 = subtensor_insert %arg0 into %2[%c0, %arg3] [%c2, %0] [%c1, %c1] : tensor<2x?xi32> into tensor<?x?xi32>
-  return %3 : tensor<?x?xi32>
-}
-// CHECK-LABEL: func @subtensor_insert_propagate_dest_cast
-//       CHECK:   %[[UPDATED:.+]] = subtensor_insert %{{.+}} into %{{.+}}[0, %{{.+}}] [2, %{{.+}}] [1, 1]
-//  CHECK-SAME:     tensor<2x?xi32> into tensor<?x8xi32>
-//       CHECK:   %[[CAST:.+]] = tensor.cast %[[UPDATED]]
-//       CHECK:   return %[[CAST]]
--- a/mlir/test/Dialect/SCF/canonicalize.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize.mlir
@ -659,10 +659,10 @@ func @matmul_on_tensors(%t0: tensor<32x1024xf32>, %t1: tensor<1024x1024xf32>) ->
    scf.yield %2 : tensor<?x?xf32>
  }
 //   CHECK-NOT: tensor.cast
-//       CHECK: %[[RES:.*]] = subtensor_insert %[[FOR_RES]] into %[[T1]][0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32>
+//       CHECK: %[[RES:.*]] = tensor.insert_slice %[[FOR_RES]] into %[[T1]][0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32>
 //       CHECK: return %[[RES]] : tensor<1024x1024xf32>
  %2 = tensor.cast %1 : tensor<?x?xf32> to tensor<32x1024xf32>
-  %res = subtensor_insert %2 into %t1[0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32>
+  %res = tensor.insert_slice %2 into %t1[0, 0] [32, 1024] [1, 1] : tensor<32x1024xf32> into tensor<1024x1024xf32>
  return %res : tensor<1024x1024xf32>
 }

--- a/mlir/test/Dialect/Standard/canonicalize.mlir
+++ b/mlir/test/Dialect/Standard/canonicalize.mlir
@ -24,202 +24,6 @@ func @cmpi_equal_operands(%arg0: i64)

 // -----

-func @subtensor_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
-    %arg2 : index) -> tensor<?x?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
-  return %0 : tensor<?x?x?xf32>
-}
-// CHECK-LABEL: func @subtensor_canonicalize
-//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?xf32>
-//       CHECK:   %[[SUBTENSOR:.+]] = subtensor %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1]
-//  CHECK-SAME:      [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x1x?xf32>
-//       CHECK:   %[[RESULT:.+]] = tensor.cast %[[SUBTENSOR]]
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-func @rank_reducing_subtensor_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
-    %arg2 : index) -> tensor<?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-// CHECK-LABEL: func @rank_reducing_subtensor_canonicalize
-//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?xf32>
-//       CHECK:   %[[SUBTENSOR:.+]] = subtensor %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1]
-//  CHECK-SAME:      [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x?xf32>
-//       CHECK:   %[[RESULT:.+]] = tensor.cast %[[SUBTENSOR]]
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-// CHECK-LABEL: func @trivial_subtensor
-//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
-//   CHECK-NOT:   subtensor
-//       CHECK:   return %[[ARG0]] :  tensor<4x6x16x32xi8>
-func @trivial_subtensor(%arg0 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
-  %0 = subtensor %arg0[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> to tensor<4x6x16x32xi8>
-  return %0 : tensor<4x6x16x32xi8>
-}
-
-// -----
-
-// CHECK-LABEL: func @trivial_subtensor_insert
-//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
-//   CHECK-NOT:   subtensor
-//       CHECK:   return %[[ARG0]] :  tensor<4x6x16x32xi8>
-func @trivial_subtensor_insert(%arg0 : tensor<4x6x16x32xi8>, %arg1 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
-  %0 = subtensor_insert %arg0 into %arg1[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> into tensor<4x6x16x32xi8>
-  return %0 : tensor<4x6x16x32xi8>
-}
-
-// -----
-
-// CHECK-LABEL: func @rank_reducing_tensor_of_cast
-//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
-//       CHECK:   %[[S:.+]] = subtensor %arg0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<4x6x16x32xi8> to tensor<16x32xi8>
-// Tensor cast is moved after subtensor and then gets canonicalized away.
-//   CHECK-NOT:   tensor.cast
-//       CHECK:   return %[[S]] : tensor<16x32xi8>
-func @rank_reducing_tensor_of_cast(%arg : tensor<4x6x16x32xi8>) -> tensor<16x32xi8> {
-  %0 = tensor.cast %arg : tensor<4x6x16x32xi8> to tensor<?x?x16x32xi8>
-  %1 = subtensor %0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<?x?x16x32xi8> to tensor<16x32xi8>
-  return %1 : tensor<16x32xi8>
-}
-
-// -----
-
-// CHECK-LABEL: func @rank_reducing_subtensor_insert_of_cast
-//  CHECK-SAME:   %[[A:.[a-z0-9A-Z_]+]]: tensor<16x32xi8>
-//  CHECK-SAME:   %[[B:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
-//       CHECK:   %[[S:.+]] = subtensor_insert %[[A]] into %[[B]][0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<16x32xi8> into tensor<4x6x16x32xi8>
-// Tensor cast is folded away.
-//   CHECK-NOT:   tensor.cast
-//       CHECK:   return %[[S]] : tensor<4x6x16x32xi8>
-func @rank_reducing_subtensor_insert_of_cast(%a : tensor<16x32xi8>, %b : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
-  %cast = tensor.cast %a : tensor<16x32xi8> to tensor<?x32xi8>
-  %res = subtensor_insert %cast into %b[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<?x32xi8> into tensor<4x6x16x32xi8>
-  return %res : tensor<4x6x16x32xi8>
-}
-
-// -----
-
-func @subtensor_insert_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
-    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor_insert %arg0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> into tensor<?x?x?xf32>
-  return %0 : tensor<?x?x?xf32>
-}
-// CHECK-LABEL: func @subtensor_insert_canonicalize
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[ARG0]]
-//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<?x?x?xf32> into tensor<?x?x?xf32>
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-func @subtensor_to_subtensor_insert_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
-    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
-  %1 = subtensor_insert %0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> into tensor<?x?x?xf32>
-  return %1 : tensor<?x?x?xf32>
-}
-// CHECK-LABEL: func @subtensor_to_subtensor_insert_canonicalize
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-//  CHECK-SAME:   %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-//       CHECK:   %[[SUBTENSOR:.+]] = subtensor %[[ARG0]]
-//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}} [1, 1, 1]
-//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x1x?xf32>
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[SUBTENSOR]]
-//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<4x1x?xf32> into tensor<?x?x?xf32>
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-func @rank_reducing_subtensor_insert_canonicalize(%arg0 : tensor<?x?xf32>, %arg1 : index,
-    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor_insert %arg0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?xf32> into tensor<?x?x?xf32>
-  return %0 : tensor<?x?x?xf32>
-}
-// CHECK-LABEL: func @rank_reducing_subtensor_insert_canonicalize
-//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?xf32>
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[ARG0]]
-//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<?x?xf32> into tensor<?x?x?xf32>
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-func @rank_reducing_subtensor_to_subtensor_insert_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
-    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-{
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c4 = constant 4 : index
-  %0 = subtensor %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?xf32>
-  %1 = subtensor_insert %0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?xf32> into tensor<?x?x?xf32>
-  return %1 : tensor<?x?x?xf32>
-}
-// CHECK-LABEL: func @rank_reducing_subtensor_to_subtensor_insert_canonicalize
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-//  CHECK-SAME:   %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-//       CHECK:   %[[SUBTENSOR:.+]] = subtensor %[[ARG0]]
-//  CHECK-SAME:     [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
-//  CHECK-SAME:     : tensor<?x?x?xf32> to tensor<4x?xf32>
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[SUBTENSOR]] into %[[ARG3]]
-//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
-//  CHECK-SAME:      : tensor<4x?xf32> into tensor<?x?x?xf32>
-//       CHEKC:   return %[[RESULT]]
-
-// -----
-
-func @subtensor_insert_output_dest_canonicalize(%arg0 : tensor<2x3xi32>, %arg1 : tensor<i32>) -> tensor<3x9xi32> {
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c2 = constant 2 : index
-  %c9 = constant 9 : index
-  %c3 = constant 3 : index
-  %2 = tensor.extract %arg1[] : tensor<i32>
-  %4 = tensor.generate %c3, %c9 {
-  ^bb0(%arg2: index, %arg3: index):
-    tensor.yield %2 : i32
-  } : tensor<?x?xi32>
-  %5 = subtensor_insert %arg0 into %4[%c0, %c1] [%c2, %c3] [1, 1] : tensor<2x3xi32> into tensor<?x?xi32>
-  %6 = tensor.cast %5 : tensor<?x?xi32> to tensor<3x9xi32>
-  return %6 : tensor<3x9xi32>
-}
-// CHECK-LABEL: func @subtensor_insert_output_dest_canonicalize
-//  CHECK-SAME:   %[[ARG0:[a-zA-z0-9_]+]]: tensor<2x3xi32>
-//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<i32>
-//       CHECK:   %[[PAD:.+]] = tensor.extract %[[ARG1]]
-//       CHECK:   %[[GENERATE:.+]] = tensor.generate
-//       CHECK:   %[[RESULT:.+]] = subtensor_insert %[[ARG0]] into %[[GENERATE]]
-//       CHECK:   return %[[RESULT]]
-
-// -----
-
 // CHECK-LABEL: @select_same_val
 //       CHECK:   return %arg1
 func @select_same_val(%arg0: i1, %arg1: i64) -> i64 {
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@ -263,3 +263,222 @@ func @from_elements.constant() -> tensor<3xindex> {
  %tensor = tensor.from_elements %c1, %c2, %c1 : tensor<3xindex>
  return %tensor : tensor<3xindex>
 }
+
+// -----
+
+func @slice_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
+    %arg2 : index) -> tensor<?x?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+  return %0 : tensor<?x?x?xf32>
+}
+// CHECK-LABEL: func @slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?xf32>
+//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1]
+//  CHECK-SAME:      [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x1x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.cast %[[SLICE]]
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+func @rank_reducing_slice_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
+    %arg2 : index) -> tensor<?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}
+// CHECK-LABEL: func @rank_reducing_slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?x?xf32>
+//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]][0, %{{[a-zA-Z0-9_]+}}, 1]
+//  CHECK-SAME:      [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.cast %[[SLICE]]
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+// CHECK-LABEL: func @trivial_slice
+//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
+//   CHECK-NOT:   tensor.extract_slice
+//       CHECK:   return %[[ARG0]] :  tensor<4x6x16x32xi8>
+func @trivial_slice(%arg0 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
+  %0 = tensor.extract_slice %arg0[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> to tensor<4x6x16x32xi8>
+  return %0 : tensor<4x6x16x32xi8>
+}
+
+// -----
+
+// CHECK-LABEL: func @trivial_insert_slice
+//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
+//   CHECK-NOT:   tensor.extract_slice
+//       CHECK:   return %[[ARG0]] :  tensor<4x6x16x32xi8>
+func @trivial_insert_slice(%arg0 : tensor<4x6x16x32xi8>, %arg1 : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
+  %0 = tensor.insert_slice %arg0 into %arg1[0, 0, 0, 0] [4, 6, 16, 32] [1, 1, 1, 1] : tensor<4x6x16x32xi8> into tensor<4x6x16x32xi8>
+  return %0 : tensor<4x6x16x32xi8>
+}
+
+// -----
+
+// CHECK-LABEL: func @rank_reducing_tensor_of_cast
+//  CHECK-SAME:   %[[ARG0:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
+//       CHECK:   %[[S:.+]] = tensor.extract_slice %arg0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<4x6x16x32xi8> to tensor<16x32xi8>
+// Tensor cast is moved after slice and then gets canonicalized away.
+//   CHECK-NOT:   tensor.cast
+//       CHECK:   return %[[S]] : tensor<16x32xi8>
+func @rank_reducing_tensor_of_cast(%arg : tensor<4x6x16x32xi8>) -> tensor<16x32xi8> {
+  %0 = tensor.cast %arg : tensor<4x6x16x32xi8> to tensor<?x?x16x32xi8>
+  %1 = tensor.extract_slice %0[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<?x?x16x32xi8> to tensor<16x32xi8>
+  return %1 : tensor<16x32xi8>
+}
+
+// -----
+
+// CHECK-LABEL: func @rank_reducing_insert_slice_of_cast
+//  CHECK-SAME:   %[[A:.[a-z0-9A-Z_]+]]: tensor<16x32xi8>
+//  CHECK-SAME:   %[[B:.[a-z0-9A-Z_]+]]: tensor<4x6x16x32xi8>
+//       CHECK:   %[[S:.+]] = tensor.insert_slice %[[A]] into %[[B]][0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<16x32xi8> into tensor<4x6x16x32xi8>
+// Tensor cast is folded away.
+//   CHECK-NOT:   tensor.cast
+//       CHECK:   return %[[S]] : tensor<4x6x16x32xi8>
+func @rank_reducing_insert_slice_of_cast(%a : tensor<16x32xi8>, %b : tensor<4x6x16x32xi8>) -> tensor<4x6x16x32xi8> {
+  %cast = tensor.cast %a : tensor<16x32xi8> to tensor<?x32xi8>
+  %res = tensor.insert_slice %cast into %b[0, 1, 0] [1, 1, 16] [1, 1, 1] : tensor<?x32xi8> into tensor<4x6x16x32xi8>
+  return %res : tensor<4x6x16x32xi8>
+}
+
+// -----
+
+func @insert_slice_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
+    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.insert_slice %arg0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> into tensor<?x?x?xf32>
+  return %0 : tensor<?x?x?xf32>
+}
+// CHECK-LABEL: func @insert_slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]]
+//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<?x?x?xf32> into tensor<?x?x?xf32>
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+func @slice_to_insert_slice_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
+    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
+  %1 = tensor.insert_slice %0 into %arg3[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> into tensor<?x?x?xf32>
+  return %1 : tensor<?x?x?xf32>
+}
+// CHECK-LABEL: func @slice_to_insert_slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//  CHECK-SAME:   %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]]
+//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}} [1, 1, 1]
+//  CHECK-SAME:      : tensor<?x?x?xf32> to tensor<4x1x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[SLICE]]
+//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<4x1x?xf32> into tensor<?x?x?xf32>
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+func @rank_reducing_insert_slice_canonicalize(%arg0 : tensor<?x?xf32>, %arg1 : index,
+    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.insert_slice %arg0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?xf32> into tensor<?x?x?xf32>
+  return %0 : tensor<?x?x?xf32>
+}
+// CHECK-LABEL: func @rank_reducing_insert_slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:.+]]: tensor<?x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]]
+//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<?x?xf32> into tensor<?x?x?xf32>
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+func @rank_reducing_slice_to_insert_slice_canonicalize(%arg0 : tensor<?x?x?xf32>, %arg1 : index,
+    %arg2 : index, %arg3 : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+{
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c4 = constant 4 : index
+  %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?x?xf32> to tensor<?x?xf32>
+  %1 = tensor.insert_slice %0 into %arg3[%c0, %arg1, %c1] [%c4, 1, %arg2] [%c1, %c1, %c1] : tensor<?x?xf32> into tensor<?x?x?xf32>
+  return %1 : tensor<?x?x?xf32>
+}
+// CHECK-LABEL: func @rank_reducing_slice_to_insert_slice_canonicalize
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//  CHECK-SAME:   %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
+//       CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]]
+//  CHECK-SAME:     [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
+//  CHECK-SAME:     : tensor<?x?x?xf32> to tensor<4x?xf32>
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[SLICE]] into %[[ARG3]]
+//  CHECK-SAME:      [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1]
+//  CHECK-SAME:      : tensor<4x?xf32> into tensor<?x?x?xf32>
+//       CHEKC:   return %[[RESULT]]
+
+// -----
+
+func @insert_slice_propagate_dest_cast(%arg0 : tensor<2x?xi32>, %arg1 : tensor<i32>,
+    %arg2 : index, %arg3 : index) -> tensor<?x?xi32> {
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c2 = constant 2 : index
+  %c8 = constant 8 : index
+  %0 = memref.dim %arg0, %c1 : tensor<2x?xi32>
+  %1 = tensor.extract %arg1[] : tensor<i32>
+  %2 = tensor.generate %arg2, %c8 {
+  ^bb0(%arg4: index, %arg5: index):
+    tensor.yield %1 : i32
+  } : tensor<?x?xi32>
+  %3 = tensor.insert_slice %arg0 into %2[%c0, %arg3] [%c2, %0] [%c1, %c1] : tensor<2x?xi32> into tensor<?x?xi32>
+  return %3 : tensor<?x?xi32>
+}
+// CHECK-LABEL: func @insert_slice_propagate_dest_cast
+//       CHECK:   %[[UPDATED:.+]] = tensor.insert_slice %{{.+}} into %{{.+}}[0, %{{.+}}] [2, %{{.+}}] [1, 1]
+//  CHECK-SAME:     tensor<2x?xi32> into tensor<?x8xi32>
+//       CHECK:   %[[CAST:.+]] = tensor.cast %[[UPDATED]]
+//       CHECK:   return %[[CAST]]
+
+// -----
+
+func @insert_slice_output_dest_canonicalize(%arg0 : tensor<2x3xi32>, %arg1 : tensor<i32>) -> tensor<3x9xi32> {
+  %c0 = constant 0 : index
+  %c1 = constant 1 : index
+  %c2 = constant 2 : index
+  %c9 = constant 9 : index
+  %c3 = constant 3 : index
+  %2 = tensor.extract %arg1[] : tensor<i32>
+  %4 = tensor.generate %c3, %c9 {
+  ^bb0(%arg2: index, %arg3: index):
+    tensor.yield %2 : i32
+  } : tensor<?x?xi32>
+  %5 = tensor.insert_slice %arg0 into %4[%c0, %c1] [%c2, %c3] [1, 1] : tensor<2x3xi32> into tensor<?x?xi32>
+  %6 = tensor.cast %5 : tensor<?x?xi32> to tensor<3x9xi32>
+  return %6 : tensor<3x9xi32>
+}
+// CHECK-LABEL: func @insert_slice_output_dest_canonicalize
+//  CHECK-SAME:   %[[ARG0:[a-zA-z0-9_]+]]: tensor<2x3xi32>
+//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<i32>
+//       CHECK:   %[[PAD:.+]] = tensor.extract %[[ARG1]]
+//       CHECK:   %[[GENERATE:.+]] = tensor.generate
+//       CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[ARG0]] into %[[GENERATE]]
+//       CHECK:   return %[[RESULT]]
--- a/mlir/test/IR/core-ops.mlir
+++ b/mlir/test/IR/core-ops.mlir
@ -825,31 +825,31 @@ func @assume_alignment(%0: memref<4x4xf16>) {
  return
 }

-// CHECK-LABEL: func @subtensor({{.*}}) {
-func @subtensor(%t: tensor<8x16x4xf32>, %idx : index) {
+// CHECK-LABEL: func @slice({{.*}}) {
+func @slice(%t: tensor<8x16x4xf32>, %idx : index) {
  %c0 = constant 0 : index
  %c1 = constant 1 : index

-  // CHECK: subtensor
+  // CHECK: tensor.extract_slice
  // CHECK-SAME: tensor<8x16x4xf32> to tensor<?x?x?xf32>
-  %1 = subtensor %t[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1]
+  %1 = tensor.extract_slice %t[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1]
    : tensor<8x16x4xf32> to tensor<?x?x?xf32>

-  // CHECK: subtensor
+  // CHECK: tensor.extract_slice
  // CHECK-SAME: tensor<8x16x4xf32> to tensor<4x4x4xf32>
-  %2 = subtensor %t[0, 2, 0][4, 4, 4][1, 1, 1]
+  %2 = tensor.extract_slice %t[0, 2, 0][4, 4, 4][1, 1, 1]
    : tensor<8x16x4xf32> to tensor<4x4x4xf32>

-  // CHECK: subtensor
+  // CHECK: tensor.extract_slice
  // CHECK-SAME: tensor<8x16x4xf32> to tensor<4x4xf32>
-  %3 = subtensor %t[0, 2, 0][4, 1, 4][1, 1, 1]
+  %3 = tensor.extract_slice %t[0, 2, 0][4, 1, 4][1, 1, 1]
    : tensor<8x16x4xf32> to tensor<4x4xf32>

  return
 }

-// CHECK-LABEL: func @subtensor_insert({{.*}}) {
-func @subtensor_insert(
+// CHECK-LABEL: func @insert_slice({{.*}}) {
+func @insert_slice(
    %t: tensor<8x16x4xf32>,
    %t2: tensor<16x32x8xf32>,
    %t3: tensor<4x4xf32>,
@ -857,19 +857,19 @@ func @subtensor_insert(
  %c0 = constant 0 : index
  %c1 = constant 1 : index

-  // CHECK: subtensor_insert
+  // CHECK: tensor.insert_slice
  // CHECK-SAME: tensor<8x16x4xf32> into tensor<16x32x8xf32>
-  %1 = subtensor_insert %t into %t2[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1]
+  %1 = tensor.insert_slice %t into %t2[%c0, %c0, %c0][%idx, %idx, %idx][%c1, %c1, %c1]
    : tensor<8x16x4xf32> into tensor<16x32x8xf32>

-  // CHECK: subtensor_insert
+  // CHECK: tensor.insert_slice
  // CHECK-SAME: tensor<8x16x4xf32> into tensor<16x32x8xf32>
-  %2 = subtensor_insert %t into %t2[%c0, %idx, %c0][%idx, 4, %idx][%c1, 1, %c1]
+  %2 = tensor.insert_slice %t into %t2[%c0, %idx, %c0][%idx, 4, %idx][%c1, 1, %c1]
    : tensor<8x16x4xf32> into tensor<16x32x8xf32>

-  // CHECK: subtensor_insert
+  // CHECK: tensor.insert_slice
  // CHECK-SAME: tensor<4x4xf32> into tensor<8x16x4xf32>
-  %3 = subtensor_insert %t3 into %t[0, 2, 0][4, 1, 4][1, 1, 1]
+  %3 = tensor.insert_slice %t3 into %t[0, 2, 0][4, 1, 4][1, 1, 1]
    : tensor<4x4xf32> into tensor<8x16x4xf32>

  return
--- a/mlir/test/IR/invalid-ops.mlir
+++ b/mlir/test/IR/invalid-ops.mlir
@ -1214,9 +1214,9 @@ func @assume_alignment(%0: memref<4x4xf16>) {

 // -----

-func @subtensor_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) {
+func @slice_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) {
      // expected-error @+1 {{expected result type to be 'tensor<4x4x4xf32>' or a rank-reduced version. (mismatch of result sizes)}}
-  %0 = subtensor %t[0, 2, 0][4, 4, 4][1, 1, 1]
+  %0 = tensor.extract_slice %t[0, 2, 0][4, 4, 4][1, 1, 1]
    : tensor<8x16x4xf32> to tensor<?x4x4xf32>

  return
@ -1224,9 +1224,9 @@ func @subtensor_wrong_dynamic_type(%t: tensor<8x16x4xf32>, %idx : index) {

 // -----

-func @subtensor_wrong_static_type(%t: tensor<8x16x4xf32>, %idx : index) {
+func @slice_wrong_static_type(%t: tensor<8x16x4xf32>, %idx : index) {
      // expected-error @+1 {{expected result type to be 'tensor<?x3x?xf32>' or a rank-reduced version. (mismatch of result sizes)}}
-  %0 = subtensor %t[0, 0, 0][%idx, 3, %idx][1, 1, 1]
+  %0 = tensor.extract_slice %t[0, 0, 0][%idx, 3, %idx][1, 1, 1]
    : tensor<8x16x4xf32> to tensor<4x4x4xf32>

  return
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
@ -10,12 +10,12 @@ func @main() {
  %const = constant dense<10.0> : tensor<2xf32>
  %insert_val = constant dense<20.0> : tensor<1xf32>

-  // Both of these subtensor_insert ops insert into the same original tensor
+  // Both of these insert_slice ops insert into the same original tensor
  // value `%const`. This can easily cause bugs if at the memref level
  // we attempt to write in-place into the memref that %const has been
  // converted into.
-  %inserted_at_position_0 = subtensor_insert %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32>
-  %inserted_at_position_1 = subtensor_insert %insert_val into %const[1][1][1] : tensor<1xf32> into tensor<2xf32>
+  %inserted_at_position_0 = tensor.insert_slice %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32>
+  %inserted_at_position_1 = tensor.insert_slice %insert_val into %const[1][1][1] : tensor<1xf32> into tensor<2xf32>

  %unranked_at_position_0 = tensor.cast %inserted_at_position_0 : tensor<2xf32> to tensor<*xf32>
  call @print_memref_f32(%unranked_at_position_0) : (tensor<*xf32>) -> ()
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
@ -9,7 +9,7 @@
 func @main() {
  %const = constant dense<10.0> : tensor<2xf32>
  %insert_val = constant dense<20.0> : tensor<1xf32>
-  %inserted = subtensor_insert %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32>
+  %inserted = tensor.insert_slice %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32>

  %unranked = tensor.cast %inserted : tensor<2xf32> to tensor<*xf32>
  call @print_memref_f32(%unranked) : (tensor<*xf32>) -> ()
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@ -1065,9 +1065,9 @@ func @memref_cast_folding_subview_static(%V: memref<16x16xf32>, %a: index, %b: i

 // -----

-// CHECK-LABEL: func @subtensor
+// CHECK-LABEL: func @slice
 // CHECK-SAME: %[[ARG0:[0-9a-z]*]]: index, %[[ARG1:[0-9a-z]*]]: index
-func @subtensor(%t: tensor<8x16x4xf32>, %arg0 : index, %arg1 : index)
+func @slice(%t: tensor<8x16x4xf32>, %arg0 : index, %arg1 : index)
  -> tensor<?x?x?xf32>
 {
  %c0 = constant 0 : index
@ -1076,18 +1076,18 @@ func @subtensor(%t: tensor<8x16x4xf32>, %arg0 : index, %arg1 : index)
  %c7 = constant 7 : index
  %c11 = constant 11 : index

-  // CHECK: subtensor %{{.*}}[0, 0, 0] [7, 11, 2] [1, 1, 1] :
+  // CHECK: tensor.extract_slice %{{.*}}[0, 0, 0] [7, 11, 2] [1, 1, 1] :
  // CHECK-SAME: tensor<8x16x4xf32> to tensor<7x11x2xf32>
  // tensor.cast gets folded away in consumer.
  //  CHECK-NOT: tensor.cast
-  %1 = subtensor %t[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1]
+  %1 = tensor.extract_slice %t[%c0, %c0, %c0] [%c7, %c11, %c2] [%c1, %c1, %c1]
    : tensor<8x16x4xf32> to tensor<?x?x?xf32>

-  // Test: subtensor with one dynamic operand can also be folded.
-  // CHECK: subtensor %{{.*}}[0, 0, 0] [2, %[[ARG0]], 2] [1, 1, 1] :
+  // Test: slice with one dynamic operand can also be folded.
+  // CHECK: tensor.extract_slice %{{.*}}[0, 0, 0] [2, %[[ARG0]], 2] [1, 1, 1] :
  // CHECK-SAME: tensor<7x11x2xf32> to tensor<2x?x2xf32>
  // CHECK: tensor.cast %{{.*}} : tensor<2x?x2xf32> to tensor<?x?x?xf32>
-  %2 = subtensor %1[%c0, %c0, %c0] [%c2, %arg0, %c2] [%c1, %c1, %c1]
+  %2 = tensor.extract_slice %1[%c0, %c0, %c0] [%c2, %arg0, %c2] [%c1, %c1, %c1]
    : tensor<?x?x?xf32> to tensor<?x?x?xf32>

  return %2 : tensor<?x?x?xf32>
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
@ -529,9 +529,9 @@ static void applyPadTensorToGenericPatterns(FuncOp funcOp) {
  (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
 }

-static void applySubTensorOfPadTensorSwapPattern(FuncOp funcOp) {
+static void applyExtractSliceOfPadTensorSwapPattern(FuncOp funcOp) {
  RewritePatternSet patterns(funcOp.getContext());
-  patterns.add<SubTensorOfPadTensorSwapPattern>(funcOp.getContext());
+  patterns.add<ExtractSliceOfPadTensorSwapPattern>(funcOp.getContext());
  (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
 }

@ -614,7 +614,7 @@ void TestLinalgTransforms::runOnFunction() {
  if (testTransformPadTensor)
    return applyPadTensorToGenericPatterns(getFunction());
  if (testSwapSubTensorPadTensor)
-    return applySubTensorOfPadTensorSwapPattern(getFunction());
+    return applyExtractSliceOfPadTensorSwapPattern(getFunction());
  if (testAffineMinSCFCanonicalizationPatterns)
    return applyAffineMinSCFCanonicalizationPatterns(getFunction());
  if (testTileAndPadPattern)