[mlir] Add an interface to allow operations to specify how they can be tiled.

An interface to allow for tiling of operations is introduced. The tiling of the linalg.pad_tensor operation is modified to use this interface. Differential Revision: https://reviews.llvm.org/D108611
2021-08-30 15:55:30 -07:00 · 2021-08-30 15:55:30 -07:00 · ba72cfe734
parent 3fefebabe5
commit ba72cfe734
14 changed files with 460 additions and 241 deletions
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h
@ -25,6 +25,7 @@
 #include "mlir/Interfaces/CopyOpInterface.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/TilingInterface.h"
 #include "mlir/Interfaces/ViewLikeInterface.h"
 #include "mlir/Support/LLVM.h"

--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@ -18,6 +18,7 @@ include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/LoopLikeInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/TilingInterface.td"
 include "mlir/Interfaces/ViewLikeInterface.td"

 // Base class for Linalg dialect ops that do not correspond to library calls.
@ -130,7 +131,10 @@ def Linalg_InitTensorOp : Linalg_Op<"init_tensor",

 def Linalg_PadTensorOp : Linalg_Op<"pad_tensor",
    [AttrSizedOperandSegments, NoSideEffect,
-     DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>]> {
+     DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
+     DeclareOpInterfaceMethods<TilingInterface,
+         ["getDestinationOperands", "getLoopIteratorTypes", "getLoopBounds",
+          "getTiledImplementation"]>]> {
  let summary = "tensor pad operation";
  let description = [{
    `linalg.pad_tensor` is an operation that pads the `source` tensor
--- a/mlir/include/mlir/Interfaces/CMakeLists.txt
+++ b/mlir/include/mlir/Interfaces/CMakeLists.txt
@ -6,6 +6,7 @@ add_mlir_interface(DerivedAttributeOpInterface)
 add_mlir_interface(InferTypeOpInterface)
 add_mlir_interface(LoopLikeInterface)
 add_mlir_interface(SideEffectInterfaces)
+add_mlir_interface(TilingInterface)
 add_mlir_interface(VectorInterfaces)
 add_mlir_interface(ViewLikeInterface)

--- a/mlir/include/mlir/Interfaces/TilingInterface.h
+++ b/mlir/include/mlir/Interfaces/TilingInterface.h
@ -0,0 +1,26 @@
+//===- TilingInterface.h - Interface for tiling operations ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the TilingInterface defined in
+// `TilingInterface.td`.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_TILINGINTERFACE_H_
+#define MLIR_INTERFACES_TILINGINTERFACE_H_
+
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Interfaces/ViewLikeInterface.h"
+#include "mlir/Support/LLVM.h"
+
+/// Include the ODS generated interface header files.
+#include "mlir/Interfaces/TilingInterface.h.inc"
+
+#endif // MLIR_INTERFACES_TILINGINTERFACE_H_
--- a/mlir/include/mlir/Interfaces/TilingInterface.td
+++ b/mlir/include/mlir/Interfaces/TilingInterface.td
@ -0,0 +1,95 @@
+//===- TilingInterface.td - Interface for tiling operations *- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an interface to allow operations to generate a tiled
+// implementation of themselves.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TILINGINTERFACE
+#define MLIR_TILINGINTERFACE
+
+include "mlir/IR/OpBase.td"
+
+def TilingInterface : OpInterface<"TilingInterface"> {
+  let description = [{
+    Interface for allowing operations to expose information needed to
+    tile them (similar to LinalgOp, but without having access to
+    indexing maps)
+  }];
+  let cppNamespace = "::mlir";
+  let methods = [
+      InterfaceMethod<
+        /*desc=*/[{
+          Returns a list of operands into which the result of the
+          tiled implementation is written into. With `tensor`
+          operands, this will be used as the initial tensor into which
+          the tiled results are inserted into. With `memref` operands,
+          this will be the operand into which the result of the tiled
+          operation is written into.
+        }],
+        /*retType=*/"SmallVector<Value>",
+        /*methodName=*/"getDestinationOperands",
+        /*args=*/(ins "OpBuilder &":$b),
+        /*methodBody=*/"",
+        /*defaultImplementation=*/"return ValueRange{};"
+      >,
+      InterfaceMethod<
+        /*desc=*/[{
+          Returns a list of `StringRef`s that describe the number of
+          loops and the iterator types of the operation. The list is
+          expected to use
+          `getParallelIteratorTypeName()`/`getReductionIteratorTypeName()`
+          from MLIR Structured Op Utils.
+        }],
+        /*retType=*/"SmallVector<StringRef>",
+        /*methodName=*/"getLoopIteratorTypes"
+      >,
+      InterfaceMethod<
+        /*desc=*/[{
+          Returns a list of ranges that describe the loop bounds and
+          step for the loops of the operation.
+        }],
+        /*retTy=*/"SmallVector<Range>",
+        /*methodName=*/"getLoopBounds",
+        /*args=*/(ins "OpBuilder &":$b)
+      >,
+      InterfaceMethod<
+        /*desc=*/[{
+          Method to generate the tiled implementation of an operation.
+
+          The iteration space of the operation is returned by
+          `getLoopBounds`. The caller provides the information of the
+          tile within this iteration space whose implementation the
+          caller needs.
+          - `dest` are the Value into which the result of the tiled
+            operation is to be inserted into. The type of the `dest`
+            Values is same as the types returned by
+            `getDestinationOperands` method.
+          - `offsets` provides the offset of the tile within the
+            iteration space
+          - `sizes` provides the size of the tile.
+
+          The method returns the operation that is the tiled
+          implementation.
+        }],
+        /*retType=*/"Operation *",
+        /*methodName=*/"getTiledImplementation",
+        /*args=*/(ins
+            "OpBuilder &":$b,
+            "ValueRange ":$dest,
+            "ArrayRef<OpFoldResult> ":$offsets,
+            "ArrayRef<OpFoldResult> ":$sizes),
+        /*methodBody=*/"",
+        /*defaultImplementation=*/[{
+          return nullptr;
+        }]
+      >
+  ];
+}
+#endif // MLIR_TILINGINTERFACE
--- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
@ -18,9 +18,11 @@ add_mlir_dialect_library(MLIRLinalg
  MLIRIR
  MLIRParser
  MLIRSideEffectInterfaces
-  MLIRViewLikeInterface
+  MLIRSCF
  MLIRStandard
  MLIRMath
  MLIRMemRef
  MLIRTensor
+  MLIRTilingInterface
+  MLIRViewLikeInterface
  )
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@ -15,6 +15,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
@ -1203,6 +1204,253 @@ LogicalResult PadTensorOp::reifyResultShapes(
  return success();
 }

+//===----------------------------------------------------------------------===//
+// Methods related to PadTensor tiling.
+//===----------------------------------------------------------------------===//
+
+/// Given an OpFoldResult, return a Value. If the OpFoldResult is an Attribute,
+/// it must be of type Integer.
+static Value getAsValue(OpBuilder &builder, Location loc, OpFoldResult ofr) {
+  if (auto val = ofr.dyn_cast<Value>())
+    return val;
+  auto intVal = getConstantIntValue(ofr);
+  auto intAttr = ofr.dyn_cast<Attribute>().dyn_cast_or_null<IntegerAttr>();
+  assert(intVal && "expected Value or IntegerAttr");
+  return builder.create<ConstantIndexOp>(loc, intAttr.getInt());
+}
+
+SmallVector<Value> PadTensorOp::getDestinationOperands(OpBuilder &b) {
+  ReifiedRankedShapedTypeDims reifiedShapes;
+  (void)reifyResultShapes(b, reifiedShapes);
+  Value initTensor = b.create<InitTensorOp>(getLoc(), reifiedShapes[0],
+                                            getResultType().getElementType());
+  return {initTensor};
+}
+
+SmallVector<StringRef> PadTensorOp::getLoopIteratorTypes() {
+  SmallVector<StringRef> iteratorTypes(getResultType().getRank(),
+                                       getParallelIteratorTypeName());
+  return iteratorTypes;
+}
+
+SmallVector<Range> PadTensorOp::getLoopBounds(OpBuilder &b) {
+  ReifiedRankedShapedTypeDims reifiedShapes;
+  (void)reifyResultShapes(b, reifiedShapes);
+  Value zero = b.create<ConstantIndexOp>(getLoc(), 0);
+  Value one = b.create<ConstantIndexOp>(getLoc(), 1);
+  // Initialize all the ranges to {zero, one, one}. All the `ub`s are
+  // overwritten.
+  SmallVector<Range> loopRanges(reifiedShapes[0].size(), {zero, one, one});
+  for (auto ub : enumerate(reifiedShapes[0]))
+    loopRanges[ub.index()].size = ub.value();
+  return loopRanges;
+}
+
+Operation *PadTensorOp::getTiledImplementation(OpBuilder &b, ValueRange dest,
+                                               ArrayRef<OpFoldResult> offsets,
+                                               ArrayRef<OpFoldResult> sizes) {
+  // Only constant padding value supported.
+  Value padValue = getConstantPaddingValue();
+  if (!padValue)
+    return nullptr;
+
+  // Helper variables and functions for various arithmetic operations. These are
+  // used extensively for computing new offset/length and padding values.
+  Location loc = getLoc();
+  AffineExpr dim0, dim1;
+  bindDims(b.getContext(), dim0, dim1);
+  // Add two integers.
+  auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
+  auto add = [&](Value v1, Value v2) {
+    return b.createOrFold<AffineApplyOp>(loc, addMap, ValueRange{v1, v2});
+  };
+  // Subtract two integers.
+  auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
+  auto sub = [&](Value v1, Value v2) {
+    return b.createOrFold<AffineApplyOp>(loc, subMap, ValueRange{v1, v2});
+  };
+  // Take the minimum of two integers.
+  auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
+  auto min = [&](Value v1, Value v2) {
+    return b.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
+  };
+  // Take the maximum of two integers.
+  auto max = [&](Value v1, Value v2) {
+    return b.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
+  };
+  // Zero index-typed integer.
+  auto zero = b.create<ConstantIndexOp>(loc, 0);
+
+  // Helper function for filling static/dynamic low/high padding indices vectors
+  // of PadTensorOp.
+  auto appendIndex = [&](Value val, SmallVector<Value> &dynIndices,
+                         SmallVector<int64_t> &staticIndices) {
+    if (auto constInt = getConstantIntValue(val)) {
+      staticIndices.push_back(*constInt);
+    } else {
+      staticIndices.push_back(ShapedType::kDynamicSize);
+      dynIndices.push_back(val);
+    }
+  };
+
+  // Compute new offsets, lengths, low padding, high padding.
+  SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
+  SmallVector<Value> newLows, newHighs;
+  SmallVector<int64_t> staticNewLows, staticNewHighs;
+  // Set to true if the original data source is not read at all.
+  bool hasZeroLen = false;
+  // Same as hasZeroLen, but for dynamic dimension sizes. This condition
+  // is true if the original data source turns out to be unused at runtime.
+  Value dynHasZeroLenCond;
+
+  int64_t rank = getSourceType().getRank();
+  for (unsigned dim = 0; dim < rank; ++dim) {
+    auto low = getAsValue(b, loc, getMixedLowPad()[dim]);
+    bool hasLowPad = getConstantIntValue(low) != static_cast<int64_t>(0);
+    auto high = getAsValue(b, loc, getMixedHighPad()[dim]);
+    bool hasHighPad = getConstantIntValue(high) != static_cast<int64_t>(0);
+    auto offset = getAsValue(b, loc, offsets[dim]);
+    auto length = getAsValue(b, loc, sizes[dim]);
+    auto srcSize = b.createOrFold<tensor::DimOp>(loc, source(), dim);
+
+    // The new amount of low padding is `low - offset`. Except for the case
+    // where none of the low padding is read. In that case, the new amount of
+    // low padding is zero.
+    //
+    // Optimization: If low = 0, then newLow = 0.
+    Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
+    appendIndex(newLow, newLows, staticNewLows);
+
+    // Start reading the data from position `offset - low`. Since the original
+    // read may have started in the low padding zone, this value could be
+    // negative. Therefore, start reading from:
+    //
+    // max(offset - low, 0)
+    //
+    // The original read could also have started in the high padding zone.
+    // In that case, set the offset to the end of source tensor. The new
+    // ExtractSliceOp length will be zero in that case. (Effectively reading no
+    // data from the source.)
+    //
+    // Optimization: If low = 0, then the formula can be simplified.
+    Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize)
+                                : min(offset, srcSize);
+    newOffsets.push_back(getAsOpFoldResult(newOffset));
+
+    // The original ExtractSliceOp was reading until position `offset + length`.
+    // Therefore, the corresponding position within the source tensor is:
+    //
+    // offset + length - low
+    //
+    // In case the original ExtractSliceOp stopped reading within the low
+    // padding zone, this value can be negative. In that case, the end position
+    // of the read should be zero. (Similar to newOffset.)
+    //
+    // The original read could also have stopped in the high padding zone.
+    // In that case, set the end positition of the read should be the end of the
+    // source tensor. (Similar to newOffset.)
+    //
+    // endLoc = min(max(offset - low + length, 0), srcSize)
+    //
+    // The new ExtractSliceOp length is `endLoc - newOffset`.
+    //
+    // Optimization: If low = 0, then the formula can be simplified.
+    Value endLoc = hasLowPad
+                       ? min(max(add(sub(offset, low), length), zero), srcSize)
+                       : min(add(offset, length), srcSize);
+    Value newLength = sub(endLoc, newOffset);
+    newLengths.push_back(getAsOpFoldResult(newLength));
+
+    // Check if newLength is zero. In that case, no SubTensorOp should be
+    // executed.
+    if (auto newLengthInt = getConstantIntValue(newLength)) {
+      hasZeroLen |= *newLengthInt == 0;
+    } else {
+      Value check = b.create<CmpIOp>(loc, CmpIPredicate::eq, newLength, zero);
+      dynHasZeroLenCond = dynHasZeroLenCond
+                              ? b.create<OrOp>(loc, check, dynHasZeroLenCond)
+                              : check;
+    }
+
+    // The amount of high padding is simply the number of elements remaining,
+    // so that the result has the same length as the original ExtractSliceOp.
+    // As an optimization, if the original high padding is zero, then the new
+    // high padding must also be zero.
+    Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero;
+    appendIndex(newHigh, newHighs, staticNewHighs);
+
+    // Only unit stride supported.
+    newStrides.push_back(b.getIndexAttr(1));
+  }
+
+  // The shape of the result can be obtained from the sizes passed in.
+  SmallVector<Value> dynDims;
+  SmallVector<int64_t> shape;
+  dispatchIndexOpFoldResults(sizes, dynDims, shape, ShapedType::kDynamicSize);
+  RankedTensorType resultType =
+      RankedTensorType::get(shape, getResultType().getElementType());
+
+  // Insert cast to ensure that types match. (May be folded away.)
+  auto castResult = [&](Value val) -> Operation * {
+    auto castOp = b.create<tensor::CastOp>(loc, resultType, val);
+    return castOp;
+  };
+
+  // In cases where the original data source is unused: Emit a GenerateOp and
+  // do not generate a SliceOp. (The result shape of the SliceOp would
+  // have a dimension of size 0, the semantics of which is unclear.)
+  auto createGenerateOp = [&]() {
+    // Create GenerateOp.
+    auto generateOp = b.create<tensor::GenerateOp>(
+        loc, resultType, dynDims,
+        [&](OpBuilder &builder, Location gLoc, ValueRange indices) {
+          builder.create<tensor::YieldOp>(gLoc, padValue);
+        });
+    return castResult(generateOp);
+  };
+
+  // Emit a SliceOp and a PadTensorOp. Should not be used in cases where
+  // the result shape of the new SliceOp has a zero dimension.
+  auto createPadTensorOfSubTensor = [&]() {
+    // Create pad_tensor(subtensor(x)).
+    auto newSliceOp = b.create<tensor::ExtractSliceOp>(
+        loc, source(), newOffsets, newLengths, newStrides);
+    auto newPadTensorOp = b.create<PadTensorOp>(
+        loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs);
+
+    // Copy region to new PadTensorOp.
+    BlockAndValueMapping bvm;
+    region().cloneInto(&newPadTensorOp.getRegion(), bvm);
+
+    // Cast result and return.
+    return castResult(newPadTensorOp);
+  };
+
+  // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known
+  // that the original data source x is not used.
+  if (hasZeroLen) {
+    return createGenerateOp();
+  }
+
+  // If there are dynamic dimensions: Generate an scf.if check to avoid creating
+  // SliceOps with result dimensions of size 0 at runtime.
+  if (dynHasZeroLenCond) {
+    auto result = b.create<scf::IfOp>(
+        loc, resultType, dynHasZeroLenCond,
+        /*thenBuilder=*/
+        [&](OpBuilder &b, Location loc) {
+          b.create<scf::YieldOp>(loc, createGenerateOp()->getResult(0));
+        },
+        /*elseBuilder=*/
+        [&](OpBuilder &b, Location loc) {
+          b.create<scf::YieldOp>(loc,
+                                 createPadTensorOfSubTensor()->getResult(0));
+        });
+    return result;
+  }
+  return createPadTensorOfSubTensor();
+}
+
 namespace {
 // Folds linalg.pad_tensor when padding is static zeros.
 struct FoldStaticZeroPadding : public OpRewritePattern<PadTensorOp> {
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@ -373,11 +373,12 @@ static LogicalResult tilePadTensorOp(OpBuilder &builder, PadTensorOp op,
      options.tileSizeComputationFunction(builder, op);
  assert(static_cast<int64_t>(tileSizes.size()) == rank);
  // Compute lower and upper bounds of the loop nest.
+  SmallVector<Range> ranges = op.getLoopBounds(builder);
  SmallVector<Value> lbs, dims, steps;
  for (int64_t i = 0; i < rank; ++i) {
    if (!isZero(tileSizes[i])) {
-      lbs.push_back(builder.create<ConstantIndexOp>(loc, 0));
-      dims.push_back(builder.create<tensor::DimOp>(loc, op.output(), i));
+      lbs.push_back(ranges[i].offset);
+      dims.push_back(ranges[i].size);
      steps.push_back(tileSizes[i]);
    }
  }
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@ -648,16 +648,6 @@ GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp,
  return success();
 }

-/// Given an OpFoldResult, return a Value. If the OpFoldResult is an Attribute,
-/// it must be of type Integer.
-static Value asValue(OpBuilder &builder, Location loc, OpFoldResult ofr) {
-  if (auto val = ofr.dyn_cast<Value>())
-    return val;
-  auto intVal = getConstantIntValue(ofr);
-  assert(intVal && "expected Value or IntegerAttr");
-  return builder.create<ConstantIndexOp>(loc, *intVal);
-}
-
 LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
    tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const {
  auto padOp = sliceOp.source().getDefiningOp<PadTensorOp>();
@ -666,227 +656,12 @@ LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
  // Only unit stride supported.
  if (!sliceOp.hasUnitStride())
    return failure();
-  // Only constant padding value supported.
-  Value padValue = padOp.getConstantPaddingValue();
-  if (!padValue)
-    return failure();
-
-  // Helper variables and functions for various arithmetic operations. These are
-  // used extensively for computing new offset/length and padding values.
-  Location loc = sliceOp.getLoc();
-  AffineExpr dim0, dim1;
-  bindDims(rewriter.getContext(), dim0, dim1);
-  // Add two integers.
-  auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
-  auto add = [&](Value v1, Value v2) {
-    return rewriter.createOrFold<AffineApplyOp>(loc, addMap,
-                                                ValueRange{v1, v2});
-  };
-  // Subtract two integers.
-  auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
-  auto sub = [&](Value v1, Value v2) {
-    return rewriter.createOrFold<AffineApplyOp>(loc, subMap,
-                                                ValueRange{v1, v2});
-  };
-  // Take the minimum of two integers.
-  auto idMap = AffineMap::getMultiDimIdentityMap(2, rewriter.getContext());
-  auto min = [&](Value v1, Value v2) {
-    return rewriter.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
-  };
-  // Take the maximum of two integers.
-  auto max = [&](Value v1, Value v2) {
-    return rewriter.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
-  };
-  // Zero index-typed integer.
-  auto zero = rewriter.create<ConstantIndexOp>(loc, 0);
-
-  // Helper function for filling static/dynamic low/high padding indices vectors
-  // of PadTensorOp.
-  auto appendIndex = [&](Value val, SmallVector<Value> &dynIndices,
-                         SmallVector<int64_t> &staticIndices) {
-    if (auto constInt = getConstantIntValue(val)) {
-      staticIndices.push_back(*constInt);
-    } else {
-      staticIndices.push_back(ShapedType::kDynamicSize);
-      dynIndices.push_back(val);
-    }
-  };
-
-  // Compute new offsets, lengths, low padding, high padding.
-  SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
-  SmallVector<Value> newLows, newHighs;
-  SmallVector<int64_t> staticNewLows, staticNewHighs;
-  // Set to true if the original data source is not read at all.
-  bool hasZeroLen = false;
-  // Same as hasZeroLen, but for dynamic dimension sizes. This condition
-  // is true if the original data source turns out to be unused at runtime.
-  Value dynHasZeroLenCond;
-
-  int64_t rank = padOp.getSourceType().getRank();
-  for (unsigned dim = 0; dim < rank; ++dim) {
-    auto low = asValue(rewriter, loc, padOp.getMixedLowPad()[dim]);
-    bool hasLowPad = getConstantIntValue(low) != static_cast<int64_t>(0);
-    auto high = asValue(rewriter, loc, padOp.getMixedHighPad()[dim]);
-    bool hasHighPad = getConstantIntValue(high) != static_cast<int64_t>(0);
-    auto offset = asValue(rewriter, loc, sliceOp.getMixedOffsets()[dim]);
-    auto length = asValue(rewriter, loc, sliceOp.getMixedSizes()[dim]);
-    auto srcSize =
-        rewriter.createOrFold<tensor::DimOp>(loc, padOp.source(), dim);
-
-    // The new amount of low padding is `low - offset`. Except for the case
-    // where none of the low padding is read. In that case, the new amount of
-    // low padding is zero.
-    //
-    // Optimization: If low = 0, then newLow = 0.
-    Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
-    appendIndex(newLow, newLows, staticNewLows);
-
-    // Start reading the data from position `offset - low`. Since the original
-    // read may have started in the low padding zone, this value could be
-    // negative. Therefore, start reading from:
-    //
-    // max(offset - low, 0)
-    //
-    // The original read could also have started in the high padding zone.
-    // In that case, set the offset to the end of source tensor. The new
-    // ExtractSliceOp length will be zero in that case. (Effectively reading no
-    // data from the source.)
-    //
-    // Optimization: If low = 0, then the formula can be simplified.
-    Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize)
-                                : min(offset, srcSize);
-    newOffsets.push_back(getAsOpFoldResult(newOffset));
-
-    // The original ExtractSliceOp was reading until position `offset + length`.
-    // Therefore, the corresponding position within the source tensor is:
-    //
-    // offset + length - low
-    //
-    // In case the original ExtractSliceOp stopped reading within the low
-    // padding zone, this value can be negative. In that case, the end position
-    // of the read should be zero. (Similar to newOffset.)
-    //
-    // The original read could also have stopped in the high padding zone.
-    // In that case, set the end positition of the read should be the end of the
-    // source tensor. (Similar to newOffset.)
-    //
-    // endLoc = min(max(offset - low + length, 0), srcSize)
-    //
-    // The new ExtractSliceOp length is `endLoc - newOffset`.
-    //
-    // Optimization: If low = 0, then the formula can be simplified.
-    Value endLoc = hasLowPad
-                       ? min(max(add(sub(offset, low), length), zero), srcSize)
-                       : min(add(offset, length), srcSize);
-    Value newLength = sub(endLoc, newOffset);
-    newLengths.push_back(getAsOpFoldResult(newLength));
-
-    // Check if newLength is zero. In that case, no SubTensorOp should be
-    // executed.
-    if (auto newLengthInt = getConstantIntValue(newLength)) {
-      hasZeroLen |= *newLengthInt == 0;
-    } else {
-      Value check = rewriter.create<CmpIOp>(
-          loc, CmpIPredicate::eq, newLength, zero);
-      dynHasZeroLenCond =
-          dynHasZeroLenCond
-              ? rewriter.create<OrOp>(loc, check, dynHasZeroLenCond)
-              : check;
-    }
-
-    // The amount of high padding is simply the number of elements remaining,
-    // so that the result has the same length as the original ExtractSliceOp.
-    // As an optimization, if the original high padding is zero, then the new
-    // high padding must also be zero.
-    Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero;
-    appendIndex(newHigh, newHighs, staticNewHighs);
-
-    // Only unit stride supported.
-    newStrides.push_back(rewriter.getIndexAttr(1));
-  }
-
-  // Insert cast to ensure that types match. (May be folded away.)
-  auto castResult = [&](Value val) -> Value {
-    auto castOp = rewriter.create<tensor::CastOp>(loc, sliceOp.getType(), val);
-    return castOp;
-  };
-
-  // In cases where the original data source is unused: Emit a GenerateOp and
-  // do not generate a SliceOp. (The result shape of the SliceOp would
-  // have a dimension of size 0, the semantics of which is unclear.)
-  auto createGenerateOp = [&]() {
-    // The shape of the GenerateOp is the same as the existing SliceOp.
-    RankedTensorType type = sliceOp.getType();
-    SmallVector<Value> dynDims;
-    for (unsigned i = 0; i < type.getRank(); ++i) {
-      if (type.isDynamicDim(i))
-        dynDims.push_back(asValue(rewriter, loc, sliceOp.getMixedSizes()[i]));
-    }
-
-    // Create GenerateOp.
-    auto generateOp  = rewriter.create<tensor::GenerateOp>(loc, type, dynDims);
-
-    // Copy region to new op.
-    BlockAndValueMapping bvm;
-    padOp.region().cloneInto(&generateOp.getRegion(), bvm);
-    // Rewrite linalg::YieldOp to tensor::YieldOp.
-    {
-      OpBuilder::InsertionGuard guard(rewriter);
-      auto yieldOp = dyn_cast<linalg::YieldOp>(
-          generateOp.getRegion().front().getTerminator());
-      assert(yieldOp && "malformed PadTensorOp: expected YieldOp terminator");
-      assert(yieldOp.values().size() == 1);
-      rewriter.setInsertionPoint(yieldOp);
-      rewriter.replaceOpWithNewOp<tensor::YieldOp>(
-          yieldOp, yieldOp.values()[0]);
-    }
-
-    return castResult(generateOp);
-  };
-
-  // Emit a SliceOp and a PadTensorOp. Should not be used in cases where
-  // the result shape of the new SliceOp has a zero dimension.
-  auto createPadTensorOfSubTensor = [&]() {
-    // Create pad_tensor(subtensor(x)).
-    auto newSliceOp = rewriter.create<tensor::ExtractSliceOp>(
-        loc, padOp.source(), newOffsets, newLengths, newStrides);
-    auto newPadTensorOp = rewriter.create<PadTensorOp>(
-        loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs);
-
-    // Copy region to new PadTensorOp.
-    BlockAndValueMapping bvm;
-    padOp.region().cloneInto(&newPadTensorOp.getRegion(), bvm);
-
-    // Cast result and return.
-    return castResult(newPadTensorOp);
-  };
-
-  // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known
-  // that the original data source x is not used.
-  if (hasZeroLen) {
-    rewriter.replaceOp(sliceOp, createGenerateOp());
-    return success();
-  }
-
-  // If there are dynamic dimensions: Generate an scf.if check to avoid creating
-  // SliceOps with result dimensions of size 0 at runtime.
-  if (dynHasZeroLenCond) {
-    auto result = rewriter.create<scf::IfOp>(
-        loc, sliceOp.getType(), dynHasZeroLenCond,
-        /*thenBuilder=*/
-        [&](OpBuilder &b, Location loc) {
-          b.create<scf::YieldOp>(loc, createGenerateOp());
-        },
-        /*elseBuilder=*/
-        [&](OpBuilder &b, Location loc) {
-          b.create<scf::YieldOp>(loc, createPadTensorOfSubTensor());
-        });
-    rewriter.replaceOp(sliceOp, result.getResult(0));
-    return success();
-  }

+  Operation *tiledPadOp = padOp.getTiledImplementation(
+      rewriter, /*dest=*/ValueRange{}, sliceOp.getMixedOffsets(),
+      sliceOp.getMixedSizes());
  // All shapes are static and the data source is actually used. Rewrite into
  // pad_tensor(subtensor(x)).
-  rewriter.replaceOp(sliceOp, createPadTensorOfSubTensor());
+  rewriter.replaceOp(sliceOp, tiledPadOp->getResults());
  return success();
 }
--- a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp
+++ b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp
@ -90,6 +90,4 @@ bool isEqualConstantIntOrValue(OpFoldResult ofr1, OpFoldResult ofr2) {
  auto v1 = ofr1.dyn_cast<Value>(), v2 = ofr2.dyn_cast<Value>();
  return v1 && v1 == v2;
 }
-
 } // namespace mlir
-
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@ -8,6 +8,7 @@ set(LLVM_OPTIONAL_SOURCES
  InferTypeOpInterface.cpp
  LoopLikeInterface.cpp
  SideEffectInterfaces.cpp
+  TilingInterface.cpp
  VectorInterfaces.cpp
  ViewLikeInterface.cpp
  )
@ -37,6 +38,6 @@ add_mlir_interface_library(DerivedAttributeOpInterface)
 add_mlir_interface_library(InferTypeOpInterface)
 add_mlir_interface_library(LoopLikeInterface)
 add_mlir_interface_library(SideEffectInterfaces)
+add_mlir_interface_library(TilingInterface)
 add_mlir_interface_library(VectorInterfaces)
 add_mlir_interface_library(ViewLikeInterface)
-
--- a/mlir/lib/Interfaces/TilingInterface.cpp
+++ b/mlir/lib/Interfaces/TilingInterface.cpp
@ -0,0 +1,18 @@
+//===- TilingInterface.cpp - Tiling interface -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the interface in `TilingInterface.td`.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Interfaces/TilingInterface.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+
+using namespace mlir;
+
+#include "mlir/Interfaces/TilingInterface.cpp.inc"
--- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
+++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
@ -3,14 +3,18 @@
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,3" -cse -split-input-file | \
 // RUN: FileCheck %s -check-prefix=TILE1

-// TILE2-LABEL: func @dynamic_pad_tensor(
+//  TILE2-DAG:  #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)>
+//  TILE2-DAG:  #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)>
+//       TILE2: func @dynamic_pad_tensor(
 //  TILE2-SAME:     %[[IN:.*]]: tensor<?x?xf32>, %[[OUT:.*]]: tensor<?x?xf32>
 //   TILE2-DAG:   %[[C0:.*]] = constant 0 : index
 //   TILE2-DAG:   %[[C1:.*]] = constant 1 : index
 //   TILE2-DAG:   %[[C2:.*]] = constant 2 : index
 //   TILE2-DAG:   %[[C3:.*]] = constant 3 : index
-//       TILE2:   %[[DIM0:.*]] = tensor.dim %[[OUT]], %[[C0]]
-//       TILE2:   %[[DIM1:.*]] = tensor.dim %[[OUT]], %[[C1]]
+//       TILE2:   %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
+//       TILE2:   %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]]
+//       TILE2:   %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
+//       TILE2:   %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]]
 //       TILE2:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]]
 //       TILE2:     scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
 //       TILE2:       %[[SWAP_RESULT:.*]] = scf.if
@ -21,12 +25,14 @@
 //       TILE2:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
 //       TILE2:   return %[[RESULT]]

-// TILE1-LABEL: func @dynamic_pad_tensor(
+//   TILE1-DAG: #[[MAP:.*]] = affine_map<()[s0] -> (s0 + 7)>
+//       TILE1: func @dynamic_pad_tensor(
 //  TILE1-SAME:     %[[IN:.*]]: tensor<?x?xf32>, %[[OUT:.*]]: tensor<?x?xf32>
 //   TILE1-DAG:   %[[C0:.*]] = constant 0 : index
 //   TILE1-DAG:   %[[C1:.*]] = constant 1 : index
 //   TILE1-DAG:   %[[C3:.*]] = constant 3 : index
-//       TILE1:   %[[DIM1:.*]] = tensor.dim %[[OUT]], %[[C1]]
+//       TILE1:   %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
+//       TILE1:   %[[DIM1:.*]] = affine.apply #[[MAP]]()[%[[DIM_IN1]]]
 //       TILE1:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
 //       TILE1:     %[[DIM0:.*]] = tensor.dim %[[OUT]], %[[C0]]
 //       TILE1:     %[[SWAP_RESULT:.*]] = scf.if
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@ -757,6 +757,13 @@ alias(
    actual = ":SideEffectInterfacesTdFiles",
 )

+td_library(
+    name = "TilingInterfaceTdFiles",
+    srcs = ["include/mlir/Interfaces/TilingInterface.td"],
+    includes = ["include"],
+    deps = [":OpBaseTdFiles"],
+)
+
 td_library(
    name = "VectorInterfacesTdFiles",
    srcs = ["include/mlir/Interfaces/VectorInterfaces.td"],
@ -4603,6 +4610,24 @@ alias(
    actual = "SideEffectInterfaces",
 )

+gentbl_cc_library(
+    name = "TilingInterfaceIncGen",
+    strip_include_prefix = "include",
+    tbl_outs = [
+        (
+            ["-gen-op-interface-decls"],
+            "include/mlir/Interfaces/TilingInterface.h.inc",
+        ),
+        (
+            ["-gen-op-interface-defs"],
+            "include/mlir/Interfaces/TilingInterface.cpp.inc",
+        ),
+    ],
+    tblgen = ":mlir-tblgen",
+    td_file = "include/mlir/Interfaces/TilingInterface.td",
+    deps = [":TilingInterfaceTdFiles"],
+)
+
 cc_library(
    name = "Analysis",
    srcs = glob(
@ -5790,6 +5815,7 @@ td_library(
        ":LoopLikeInterfaceTdFiles",
        ":OpBaseTdFiles",
        ":SideEffectInterfacesTdFiles",
+        ":TilingInterfaceTdFiles",
        ":ViewLikeInterfaceTdFiles",
    ],
 )
@ -6045,10 +6071,12 @@ cc_library(
        ":MathDialect",
        ":MemRefDialect",
        ":Parser",
+        ":SCFDialect",
        ":SideEffectInterfaces",
        ":StandardOps",
        ":Support",
        ":TensorDialect",
+        ":TilingInterface",
        ":ViewLikeInterface",
        "//llvm:Support",
    ],
@ -6129,6 +6157,21 @@ cc_library(
    ],
 )

+cc_library(
+    name = "TilingInterface",
+    srcs = ["lib/Interfaces/TilingInterface.cpp"],
+    hdrs = ["include/mlir/Interfaces/TilingInterface.h"],
+    includes = ["include"],
+    deps = [
+        ":IR",
+        ":Support",
+        ":TensorDialect",
+        ":TilingInterfaceIncGen",
+        ":ViewLikeInterface",
+        "//llvm:Support",
+    ],
+)
+
 td_library(
    name = "VectorOpsTdFiles",
    srcs = ["include/mlir/Dialect/Vector/VectorOps.td"],