[mlir][linalg] Vectorize linalg.pad_op source copying (improved)

Vectorize linalg.pad_op source copying if source or result shape are static. Differential Revision: https://reviews.llvm.org/D103791
2021-06-14 14:41:07 +09:00 · 2021-06-14 14:41:07 +09:00 · 01e3b34469
parent 4c2f3d810b
commit 01e3b34469
2 changed files with 49 additions and 20 deletions
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@ -719,15 +719,9 @@ struct GenericPadTensorOpVectorizationPattern
    auto sourceType = padOp.getSourceType();
-    // Copy of source with static shape can be vectorized.
+    // Try vectorizing the copy of source.
-    if (sourceType.hasStaticShape()) {
+    if (tryVectorizeCopy(rewriter, padOp, padValue, fill).succeeded())
      auto vecType = VectorType::get(sourceType.getShape(),
                                     sourceType.getElementType());
      vectorizeStaticShapeSource(rewriter, padOp, fill, vecType);
      return success();
    }
    // TODO: Vectorize dynamic source but static destination.
    // Neither source type nor PadTensorOp result type have static shape. Such
    // PadTensorOps cannot be vectorized. Generate a SubTensorInsertOp instead.
@ -751,23 +745,57 @@ struct GenericPadTensorOpVectorizationPattern
    return success();
  }
-  /// Vectorize the copying of a PadTensorOp's source that has static shape.
+  /// Vectorize the copying of a PadTensorOp's source. This is possible if each
-  void vectorizeStaticShapeSource(PatternRewriter &rewriter, PadTensorOp padOp,
+  /// dimension size is statically know in the source type or the result type
-                                  Value dest, VectorType vecType) const {
+  /// (or both).
  LogicalResult tryVectorizeCopy(PatternRewriter &rewriter, PadTensorOp padOp,
                                 Value padValue, Value dest) const {
    auto sourceType = padOp.getSourceType();
    auto resultType = padOp.getResultType();
    SmallVector<int64_t> vecShape;
    SmallVector<bool> readInBounds;
    SmallVector<bool> writeInBounds;
    for (unsigned i = 0; i < sourceType.getRank(); ++i) {
      if (!sourceType.isDynamicDim(i)) {
        vecShape.push_back(sourceType.getDimSize(i));
        // Source shape is statically known: Neither read nor write are out-of-
        // bounds.
        readInBounds.push_back(true);
        writeInBounds.push_back(true);
      } else if (!resultType.isDynamicDim(i)) {
        // Source shape is not statically known, but result shape is. Vectorize
        // with size of result shape. This may be larger than the source size.
        vecShape.push_back(resultType.getDimSize(i));
        // Read may be out-of-bounds because the result size could be larger
        // than the source size.
        readInBounds.push_back(false);
        // Write is out-of-bounds if low padding > 0.
        writeInBounds.push_back(
            isEqualConstantIntOrValue(padOp.getMixedLowPad()[i],
                                      rewriter.getIndexAttr(0)));
      } else {
        // Neither source nor result dim of padOp is static. Cannot vectorize
        // the copy.
        return failure();
      }
    }
    auto vecType = VectorType::get(vecShape, sourceType.getElementType());
    // Generate TransferReadOp.
    SmallVector<Value> readIndices(
        vecType.getRank(), rewriter.create<ConstantIndexOp>(padOp.getLoc(), 0));
    auto read = rewriter.create<vector::TransferReadOp>(
-        padOp.getLoc(), vecType, padOp.source(), readIndices);
+        padOp.getLoc(), vecType, padOp.source(), readIndices, padValue,
        readInBounds);
-    // Generate TransferWriteOp. The destination dimensions may be dynamic, but
+    // Generate TransferWriteOp.
    // the write cannot be out-of-bounds. (A large enough destination tensor is
    // allocated in this pattern.)
    auto writeIndices = ofrToIndexValues(
        rewriter, padOp.getLoc(), padOp.getMixedLowPad());
    SmallVector<bool> inBounds(vecType.getRank(), true);
    rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
-        padOp, read, dest, writeIndices, inBounds);
+        padOp, read, dest, writeIndices, writeInBounds);
    return success();
  }
 };
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@ -515,12 +515,13 @@ func @matmul_i8_i8_i32(%a: memref<4x6xi8>, %b: memref<6x12xi8>, %c: memref<4x12x
 // CHECK-LABEL: func @pad_static(
 //  CHECK-SAME:                  %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32
 //   CHECK-NOT:   linalg.pad_tensor
-//   CHECK-DAG:   %[[C1:.*]] = constant 1 : index
+//   CHECK-DAG:   %[[C0:.*]] = constant 0 : index
 //   CHECK-DAG:   %[[C2:.*]] = constant 2 : index
 //   CHECK-DAG:   %[[INIT:.*]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32>
 //   CHECK-DAG:   %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x3x4xf32>
 //       CHECK:   %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]]{{.*}} : vector<2x3x4xf32>, tensor<2x3x4xf32>
-//   CHECK-DAG:   %[[DIM1:.*]] = memref.dim %[[ARG0]], %[[C1]]
+//       CHECK:   %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, false, true]} : tensor<2x?x2xf32>, vector<2x3x2xf32>
-//       CHECK:   %[[RESULT:.*]] = subtensor_insert %[[ARG0]] into %2[0, 0, 2] [2, %[[DIM1]], 2] [1, 1, 1] : tensor<2x?x2xf32> into tensor<2x3x4xf32>
+//       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x3x2xf32>, tensor<2x3x4xf32>
 //       CHECK:   return %[[RESULT]]
 func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
  %0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] {