[mlir][Linalg] Migrate 2D pooling ops from tc definition to yaml definition.

This deletes all the pooling ops in LinalgNamedStructuredOpsSpec.tc. All the uses are replaced with the yaml pooling ops. Reviewed By: gysit, rsuderman Differential Revision: https://reviews.llvm.org/D106181
2021-07-19 09:23:55 -07:00 · 2021-07-19 09:23:55 -07:00 · 9c49195330
parent 59e4e53ead
commit 9c49195330
9 changed files with 56 additions and 325 deletions
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml
@ -524,8 +524,8 @@ structured_op: !LinalgStructuredOpConfig
                  scalar_arg: K
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
-  name: pooling_nhwc_sum_poly
-  cpp_class_name: PoolingNhwcSumPolyOp
+  name: pooling_nhwc_sum
+  cpp_class_name: PoolingNhwcSumOp
  doc: |-
    Performs sum pooling.

@ -595,8 +595,8 @@ structured_op: !LinalgStructuredOpConfig
              scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
-  name: pooling_nhwc_max_poly
-  cpp_class_name: PoolingNhwcMaxPolyOp
+  name: pooling_nhwc_max
+  cpp_class_name: PoolingNhwcMaxOp
  doc: |-
    Performs max pooling.

@ -666,8 +666,8 @@ structured_op: !LinalgStructuredOpConfig
              scalar_arg: I
 --- !LinalgOpConfig
 metadata: !LinalgOpMetadata
-  name: pooling_nhwc_min_poly
-  cpp_class_name: PoolingNhwcMinPolyOp
+  name: pooling_nhwc_min
+  cpp_class_name: PoolingNhwcMinOp
  doc: |-
    Performs min pooling.

--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc
@ -340,89 +340,3 @@ order of (`N`, `F`, `D`, `H`, `W`, `KD`, `KH`, `KW`, `C`).
                                   w * strides[2] + kw * dilations[2]),
                                 K(kd, kh, kw, c, f)));
 }
-
-ods_def<PoolingNHWCSumFOp>:
-def pooling_nhwc_sum
-    (I: f32(N, H, W, C), K: f32(KH, KW))
-  -> (O: f32(N, OH, OW, C))
-  attr(strides: 2xi64, dilations: 2xi64)
-{
-  O(n, oh, ow, c) = AddFOp<kh, kw>(O(n, oh, ow, c),
-                                     I(n, oh * strides[0] + kh * dilations[0],
-                                       ow * strides[1] + kw * dilations[1], c));
-}
-
-ods_def<PoolingNHWCMaxI8Op>:
-def pooling_nhwc_i8_max
-    (I: i8(N, H, W, C), K: i8(KH, KW))
-  -> (O: i8(N, OH, OW, C))
-  attr(strides: 2xi64, dilations: 2xi64)
-{
-  O(n, oh, ow, c) =
-      SelectOp<kh, kw>(CmpIOpSGT(I(n, oh * strides[0] + kh * dilations[0],
-                                        ow * strides[1] + kw * dilations[1], c),
-                                      O(n, oh, ow, c)),
-                         I(n, oh * strides[0] + kh * dilations[0],
-                           ow * strides[1] + kw * dilations[1], c),
-                         O(n, oh, ow, c));
-}
-
-ods_def<PoolingNHWCMaxI16Op>:
-def pooling_nhwc_i16_max
-    (I: i16(N, H, W, C), K: i16(KH, KW))
-  -> (O: i16(N, OH, OW, C))
-  attr(strides: 2xi64, dilations: 2xi64)
-{
-  O(n, oh, ow, c) =
-      SelectOp<kh, kw>(CmpIOpSGT(I(n, oh * strides[0] + kh * dilations[0],
-                                        ow * strides[1] + kw * dilations[1], c),
-                                      O(n, oh, ow, c)),
-                         I(n, oh * strides[0] + kh * dilations[0],
-                           ow * strides[1] + kw * dilations[1], c),
-                         O(n, oh, ow, c));
-}
-
-ods_def<PoolingNHWCMaxI32Op>:
-def pooling_nhwc_i32_max
-    (I: i32(N, H, W, C), K: i32(KH, KW))
-  -> (O: i32(N, OH, OW, C))
-  attr(strides: 2xi64, dilations: 2xi64)
-{
-  O(n, oh, ow, c) =
-      SelectOp<kh, kw>(CmpIOpSGT(I(n, oh * strides[0] + kh * dilations[0],
-                                        ow * strides[1] + kw * dilations[1], c),
-                                      O(n, oh, ow, c)),
-                         I(n, oh * strides[0] + kh * dilations[0],
-                           ow * strides[1] + kw * dilations[1], c),
-                         O(n, oh, ow, c));
-}
-
-ods_def<PoolingNHWCMaxFOp>:
-def pooling_nhwc_max
-    (I: f32(N, H, W, C), K: f32(KH, KW))
-  -> (O: f32(N, OH, OW, C))
-  attr(strides: 2xi64, dilations: 2xi64)
-{
-  O(n, oh, ow, c) =
-      SelectOp<kh, kw>(CmpFOpOGT(I(n, oh * strides[0] + kh * dilations[0],
-                                        ow * strides[1] + kw * dilations[1], c),
-                                      O(n, oh, ow, c)),
-                         I(n, oh * strides[0] + kh * dilations[0],
-                           ow * strides[1] + kw * dilations[1], c),
-                         O(n, oh, ow, c));
-}
-
-ods_def<PoolingNHWCMinFOp>:
-def pooling_nhwc_min
-    (I: f32(N, H, W, C), K: f32(KH, KW))
-  -> (O: f32(N, OH, OW, C))
-  attr(strides: 2xi64, dilations: 2xi64)
-{
-  O(n, oh, ow, c) =
-      SelectOp<kh, kw>(CmpFOpOLT(I(n, oh * strides[0] + kh * dilations[0],
-                                        ow * strides[1] + kw * dilations[1], c),
-                                      O(n, oh, ow, c)),
-                         I(n, oh * strides[0] + kh * dilations[0],
-                           ow * strides[1] + kw * dilations[1], c),
-                         O(n, oh, ow, c));
-}
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@ -2291,48 +2291,20 @@ public:
    Value fakeWindowDims =
        rewriter.create<linalg::InitTensorOp>(loc, kernel, outElementTy);

-    auto createOp = [&](auto *typePtr) -> linalg::LinalgOp {
-      return cast<linalg::LinalgOp>(
-          rewriter
-              .create<std::remove_pointer_t<decltype(typePtr)>>(
-                  loc, ArrayRef<Type>{resultTy},
-                  ValueRange{paddedInput, fakeWindowDims}, filledInitTensor,
-                  dilationAttr, strideAttr)
-              .getOperation());
-    };
-
-    if (isa<tosa::MaxPool2dOp>(op) && inElementTy.isF32()) {
-      linalg::LinalgOp poolingOp =
-          createOp(static_cast<linalg::PoolingNHWCMaxFOp *>(nullptr));
-      rewriter.replaceOp(op, poolingOp->getResult(0));
-      return success();
-    }
-
-    if (isa<tosa::MaxPool2dOp>(op) && inElementTy.isInteger(8)) {
-      linalg::LinalgOp poolingOp =
-          createOp(static_cast<linalg::PoolingNHWCMaxI8Op *>(nullptr));
-      rewriter.replaceOp(op, poolingOp->getResult(0));
-      return success();
-    }
-
-    if (isa<tosa::MaxPool2dOp>(op) && inElementTy.isInteger(16)) {
-      linalg::LinalgOp poolingOp =
-          createOp(static_cast<linalg::PoolingNHWCMaxI16Op *>(nullptr));
-      rewriter.replaceOp(op, poolingOp->getResult(0));
-      return success();
-    }
-
-    if (isa<tosa::MaxPool2dOp>(op) && inElementTy.isInteger(32)) {
-      linalg::LinalgOp poolingOp =
-          createOp(static_cast<linalg::PoolingNHWCMaxI32Op *>(nullptr));
-      rewriter.replaceOp(op, poolingOp->getResult(0));
+    if (isa<tosa::MaxPool2dOp>(op)) {
+      rewriter.replaceOpWithNewOp<linalg::PoolingNhwcMaxOp>(
+          op, ArrayRef<Type>{resultTy}, ValueRange{paddedInput, fakeWindowDims},
+          filledInitTensor, strideAttr, dilationAttr);
      return success();
    }

    if (isa<tosa::AvgPool2dOp>(op) && inElementTy.isF32()) {
-      Value poolingOp =
-          createOp(static_cast<linalg::PoolingNHWCSumFOp *>(nullptr))
-              ->getResult(0);
+      Value poolingOp = rewriter
+                            .create<linalg::PoolingNhwcSumOp>(
+                                loc, ArrayRef<Type>{resultTy},
+                                ValueRange{paddedInput, fakeWindowDims},
+                                filledInitTensor, strideAttr, dilationAttr)
+                            .getResult(0);
      auto poolingOpTy = poolingOp.getType().cast<ShapedType>();
      auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank());
      auto genericOp = rewriter.create<linalg::GenericOp>(
--- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py
+++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py
@ -132,7 +132,7 @@ def depthwise_conv_2d_input_nhwc_filter_hwc_poly(


@linalg_structured_op
-def pooling_nhwc_sum_poly(
+def pooling_nhwc_sum(
    I=TensorDef(T1, S.N, S.H, S.W, S.C),
    K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]),
    O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True),
@ -149,7 +149,7 @@ def pooling_nhwc_sum_poly(


@linalg_structured_op
-def pooling_nhwc_max_poly(
+def pooling_nhwc_max(
    I=TensorDef(T1, S.N, S.H, S.W, S.C),
    K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]),
    O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True),
@ -167,7 +167,7 @@ def pooling_nhwc_max_poly(


@linalg_structured_op
-def pooling_nhwc_min_poly(
+def pooling_nhwc_min(
    I=TensorDef(T1, S.N, S.H, S.W, S.C),
    K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]),
    O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True),
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@ -1064,7 +1064,7 @@ func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () {
 // CHECK-LABEL: @max_pool_i8
 func @max_pool_i8(%arg0: tensor<1x6x34x62xi8>) -> () {
  // CHECK: constant -128
-  // CHECK: linalg.pooling_nhwc_i8_max
+  // CHECK: linalg.pooling_nhwc_max
  %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi8>)  -> (tensor<1x4x32x62xi8>)
  return
 }
@ -1072,7 +1072,7 @@ func @max_pool_i8(%arg0: tensor<1x6x34x62xi8>) -> () {
 // CHECK-LABEL: @max_pool_i16
 func @max_pool_i16(%arg0: tensor<1x6x34x62xi16>) -> () {
  // CHECK: constant -32768
-  // CHECK: linalg.pooling_nhwc_i16_max
+  // CHECK: linalg.pooling_nhwc_max
  %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi16>)  -> (tensor<1x4x32x62xi16>)
  return
 }
@ -1080,7 +1080,7 @@ func @max_pool_i16(%arg0: tensor<1x6x34x62xi16>) -> () {
 // CHECK-LABEL: @max_pool_i32
 func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () {
  // CHECK: constant -2147483648
-  // CHECK: linalg.pooling_nhwc_i32_max
+  // CHECK: linalg.pooling_nhwc_max
  %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi32>)  -> (tensor<1x4x32x62xi32>)
  return
 }
--- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
@ -316,161 +316,6 @@ func @conv_3d_input_ncdhw_filter_dhwcf(%input: memref<?x?x?x?x?xf32>, %filter: m

 // -----

-func @pooling_nhwc_sum(%input: memref<?x?x?x?xf32>, %fake: memref<2x3xf32>, %init: memref<?x?x?x?xf32>) {
-  linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
-    ins(%input, %fake: memref<?x?x?x?xf32>, memref<2x3xf32>)
-    outs(%init: memref<?x?x?x?xf32>)
-  return
-}
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 + d4, d2 + d5, d3)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
-
-// CHECK: func @pooling_nhwc_sum
-
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
-// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?x?xf32>, memref<2x3xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?x?xf32>)
-
-// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
-// CHECK-NEXT:      %[[RES:.+]] = addf %[[BBARG2]], %[[BBARG0]] : f32
-// CHECK-NEXT:      linalg.yield %[[RES]] : f32
-
-// -----
-
-func @pooling_nhwc_max(%input: memref<?x?x?x?xf32>, %fake: memref<2x3xf32>, %init: memref<?x?x?x?xf32>) {
-  linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<[2, 3]> : tensor<2xi64>}
-    ins(%input, %fake: memref<?x?x?x?xf32>, memref<2x3xf32>)
-    outs(%init: memref<?x?x?x?xf32>)
-  return
-}
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d4, d2 * 3 + d5, d3)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
-
-// CHECK: func @pooling_nhwc_max
-
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
-// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?x?xf32>, memref<2x3xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?x?xf32>)
-
-// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
-// CHECK-NEXT:      %[[CMP:.+]] = cmpf ogt, %[[BBARG0]], %[[BBARG2]] : f32
-// CHECK-NEXT:      %[[RES:.+]] = select %[[CMP]], %[[BBARG0]], %[[BBARG2]] : f32
-// CHECK-NEXT:      linalg.yield %[[RES]] : f32
-
-// -----
-
-func @pooling_nhwc_i8_max(%input: memref<?x?x?x?xi8>, %fake: memref<2x3xi8>, %init: memref<?x?x?x?xi8>) {
-  linalg.pooling_nhwc_i8_max {dilations = dense<1> : tensor<2xi64>, strides = dense<[2, 3]> : tensor<2xi64>}
-    ins(%input, %fake: memref<?x?x?x?xi8>, memref<2x3xi8>)
-    outs(%init: memref<?x?x?x?xi8>)
-  return
-}
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d4, d2 * 3 + d5, d3)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
-
-// CHECK: func @pooling_nhwc_i8_max
-
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
-// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?x?xi8>, memref<2x3xi8>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?x?xi8>)
-
-// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: i8, %[[BBARG1:.+]]: i8, %[[BBARG2:.+]]: i8)
-// CHECK-NEXT:      %[[CMP:.+]] = cmpi sgt, %[[BBARG0]], %[[BBARG2]] : i8
-// CHECK-NEXT:      %[[RES:.+]] = select %[[CMP]], %[[BBARG0]], %[[BBARG2]] : i8
-// CHECK-NEXT:      linalg.yield %[[RES]] : i8
-
-// -----
-
-func @pooling_nhwc_i16_max(%input: memref<?x?x?x?xi16>, %fake: memref<2x3xi16>, %init: memref<?x?x?x?xi16>) {
-  linalg.pooling_nhwc_i16_max {dilations = dense<1> : tensor<2xi64>, strides = dense<[2, 3]> : tensor<2xi64>}
-    ins(%input, %fake: memref<?x?x?x?xi16>, memref<2x3xi16>)
-    outs(%init: memref<?x?x?x?xi16>)
-  return
-}
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d4, d2 * 3 + d5, d3)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
-
-// CHECK: func @pooling_nhwc_i16_max
-
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
-// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?x?xi16>, memref<2x3xi16>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?x?xi16>)
-
-// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: i16, %[[BBARG1:.+]]: i16, %[[BBARG2:.+]]: i16)
-// CHECK-NEXT:      %[[CMP:.+]] = cmpi sgt, %[[BBARG0]], %[[BBARG2]] : i16
-// CHECK-NEXT:      %[[RES:.+]] = select %[[CMP]], %[[BBARG0]], %[[BBARG2]] : i16
-// CHECK-NEXT:      linalg.yield %[[RES]] : i16
-
-// -----
-
-func @pooling_nhwc_i32_max(%input: memref<?x?x?x?xi32>, %fake: memref<2x3xi32>, %init: memref<?x?x?x?xi32>) {
-  linalg.pooling_nhwc_i32_max {dilations = dense<1> : tensor<2xi64>, strides = dense<[2, 3]> : tensor<2xi64>}
-    ins(%input, %fake: memref<?x?x?x?xi32>, memref<2x3xi32>)
-    outs(%init: memref<?x?x?x?xi32>)
-  return
-}
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d4, d2 * 3 + d5, d3)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
-
-// CHECK: func @pooling_nhwc_i32_max
-
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
-// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?x?xi32>, memref<2x3xi32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?x?xi32>)
-
-// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: i32, %[[BBARG2:.+]]: i32)
-// CHECK-NEXT:      %[[CMP:.+]] = cmpi sgt, %[[BBARG0]], %[[BBARG2]] : i32
-// CHECK-NEXT:      %[[RES:.+]] = select %[[CMP]], %[[BBARG0]], %[[BBARG2]] : i32
-// CHECK-NEXT:      linalg.yield %[[RES]] : i32
-
-// -----
-
-func @pooling_nhwc_min(%input: memref<?x?x?x?xf32>, %fake: memref<2x3xf32>, %init: memref<?x?x?x?xf32>) {
-  linalg.pooling_nhwc_min {dilations = dense<3> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
-    ins(%input, %fake: memref<?x?x?x?xf32>, memref<2x3xf32>)
-    outs(%init: memref<?x?x?x?xf32>)
-  return
-}
-
-// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d4 * 3, d2 * 2 + d5 * 3, d3)>
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5)>
-// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)>
-
-// CHECK: func @pooling_nhwc_min
-
-// CHECK: linalg.generic
-// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
-// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?x?xf32>, memref<2x3xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?x?xf32>)
-
-// CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
-// CHECK-NEXT:      %[[CMP:.+]] = cmpf olt, %[[BBARG0]], %[[BBARG2]] : f32
-// CHECK-NEXT:      %[[RES:.+]] = select %[[CMP]], %[[BBARG0]], %[[BBARG2]] : f32
-// CHECK-NEXT:      linalg.yield %[[RES]] : f32
-
-// -----
-
 func @generalize_fill(%output: memref<?x?xf32>, %value : f32) {
  linalg.fill(%value, %output) : f32, memref<?x?xf32>
  return
--- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
@ -60,13 +60,13 @@ func @generalize_depthwise_conv_2d_input_nhwc_filter_hwc_poly_i32(%input : tenso

 // -----

-func @generalize_pooling_nhwc_max_poly_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
-  %0 = linalg.pooling_nhwc_max_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
+func @generalize_pooling_nhwc_max_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
+  %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
  return %0: tensor<1x2x4x1xf32>
 }

-// CHECK-LABEL: @generalize_pooling_nhwc_max_poly_f32
+// CHECK-LABEL: @generalize_pooling_nhwc_max_f32
 // CHECK:      ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32)
 // CHECK-NEXT:   %[[COND:.+]] = cmpf ogt, %[[OUT_ARG]], %[[IN_ARG]] : f32
 // CHECK-NEXT:   %[[MAX:.+]] = select %[[COND]], %[[OUT_ARG]], %[[IN_ARG]] : f32
@ -75,13 +75,13 @@ func @generalize_pooling_nhwc_max_poly_f32(%input : tensor<1x4x16x1xf32>, %shape

 // -----

-func @generalize_pooling_nhwc_max_poly_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
-  %0 = linalg.pooling_nhwc_max_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
+func @generalize_pooling_nhwc_max_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
+  %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
  return %0: tensor<1x2x4x1xi32>
 }

-// CHECK-LABEL: @generalize_pooling_nhwc_max_poly_i32
+// CHECK-LABEL: @generalize_pooling_nhwc_max_i32
 // CHECK:      ^{{.*}}(%[[IN_ARG:.+]]: i32, %[[SHAPE_ARG:.+]]: i32, %[[OUT_ARG:.+]]: i32)
 // CHECK-NEXT:   %[[COND:.+]] = cmpi sgt, %[[OUT_ARG]], %[[IN_ARG]] : i32
 // CHECK-NEXT:   %[[MAX:.+]] = select %[[COND]], %[[OUT_ARG]], %[[IN_ARG]] : i32
@ -90,13 +90,13 @@ func @generalize_pooling_nhwc_max_poly_i32(%input : tensor<1x4x16x1xi32>, %shape

 // -----

-func @generalize_pooling_nhwc_min_poly_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
-  %0 = linalg.pooling_nhwc_min_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
+func @generalize_pooling_nhwc_min_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
+  %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
  return %0: tensor<1x2x4x1xf32>
 }

-// CHECK-LABEL: @generalize_pooling_nhwc_min_poly_f32
+// CHECK-LABEL: @generalize_pooling_nhwc_min_f32
 // CHECK:      ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32)
 // CHECK-NEXT:   %[[COND:.+]] = cmpf olt, %[[OUT_ARG]], %[[IN_ARG]] : f32
 // CHECK-NEXT:   %[[MAX:.+]] = select %[[COND]], %[[OUT_ARG]], %[[IN_ARG]] : f32
@ -105,13 +105,13 @@ func @generalize_pooling_nhwc_min_poly_f32(%input : tensor<1x4x16x1xf32>, %shape

 // -----

-func @generalize_pooling_nhwc_min_poly_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
-  %0 = linalg.pooling_nhwc_min_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
+func @generalize_pooling_nhwc_min_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
+  %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
  return %0: tensor<1x2x4x1xi32>
 }

-// CHECK-LABEL: @generalize_pooling_nhwc_min_poly_i32
+// CHECK-LABEL: @generalize_pooling_nhwc_min_i32
 // CHECK:      ^{{.*}}(%[[IN_ARG:.+]]: i32, %[[SHAPE_ARG:.+]]: i32, %[[OUT_ARG:.+]]: i32)
 // CHECK-NEXT:   %[[COND:.+]] = cmpi slt, %[[OUT_ARG]], %[[IN_ARG]] : i32
 // CHECK-NEXT:   %[[MAX:.+]] = select %[[COND]], %[[OUT_ARG]], %[[IN_ARG]] : i32
@ -120,13 +120,13 @@ func @generalize_pooling_nhwc_min_poly_i32(%input : tensor<1x4x16x1xi32>, %shape

 // -----

-func @generalize_pooling_nhwc_sum_poly_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
-  %0 = linalg.pooling_nhwc_sum_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
+func @generalize_pooling_nhwc_sum_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
+  %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
  return %0: tensor<1x2x4x1xf32>
 }

-// CHECK-LABEL: @generalize_pooling_nhwc_sum_poly_f32
+// CHECK-LABEL: @generalize_pooling_nhwc_sum_f32
 // CHECK:      ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32)
 // CHECK-NEXT:   %[[ADD:.+]] = addf %[[OUT_ARG]], %[[IN_ARG]] : f32
 // CHECK-NEXT:   linalg.yield %[[ADD]] : f32
@ -134,13 +134,13 @@ func @generalize_pooling_nhwc_sum_poly_f32(%input : tensor<1x4x16x1xf32>, %shape

 // -----

-func @generalize_pooling_nhwc_sum_poly_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
-  %0 = linalg.pooling_nhwc_sum_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
+func @generalize_pooling_nhwc_sum_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
+  %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
  return %0: tensor<1x2x4x1xi32>
 }

-// CHECK-LABEL: @generalize_pooling_nhwc_sum_poly_i32
+// CHECK-LABEL: @generalize_pooling_nhwc_sum_i32
 // CHECK:      ^{{.*}}(%[[IN_ARG:.+]]: i32, %[[SHAPE_ARG:.+]]: i32, %[[OUT_ARG:.+]]: i32)
 // CHECK-NEXT:   %[[ADD:.+]] = addi %[[OUT_ARG]], %[[IN_ARG]] : i32
 // CHECK-NEXT:   linalg.yield %[[ADD]] : i32
--- a/mlir/test/Dialect/Linalg/named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/named-ops.mlir
@ -385,7 +385,7 @@ func @pooling_nhwc_max(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %out
 // -----

 // CHECK-LABEL: func @pooling_nhwc_i8_max_tensor
-// CHECK:         %{{.+}} = linalg.pooling_nhwc_i8_max
+// CHECK:         %{{.+}} = linalg.pooling_nhwc_max
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi8>, tensor<3x3xi8>)
@ -395,7 +395,7 @@ func @pooling_nhwc_i8_max_tensor(%input: tensor<1x4x4x1xi8>) -> tensor<1x2x2x1xi
  %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi8>
  %cst = constant 0 : i8
  %fill = linalg.fill(%cst, %init) : i8, tensor<1x2x2x1xi8> -> tensor<1x2x2x1xi8>
-  %res = linalg.pooling_nhwc_i8_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
+  %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
    ins(%input, %fake: tensor<1x4x4x1xi8>, tensor<3x3xi8>)
    outs(%fill: tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
  return %res : tensor<1x2x2x1xi8>
@ -404,13 +404,13 @@ func @pooling_nhwc_i8_max_tensor(%input: tensor<1x4x4x1xi8>) -> tensor<1x2x2x1xi
 // -----

 // CHECK-LABEL: func @pooling_nhwc_i8_max
-// CHECK:         linalg.pooling_nhwc_i8_max
+// CHECK:         linalg.pooling_nhwc_max
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi8>, memref<3x3xi8>)
 // CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xi8>)
 func @pooling_nhwc_i8_max(%input: memref<1x4x4x1xi8>, %fake: memref<3x3xi8>, %output: memref<1x2x2x1xi8>) {
-  linalg.pooling_nhwc_i8_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
+  linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
    ins(%input, %fake: memref<1x4x4x1xi8>, memref<3x3xi8>)
    outs(%output: memref<1x2x2x1xi8>)
  return
@ -419,7 +419,7 @@ func @pooling_nhwc_i8_max(%input: memref<1x4x4x1xi8>, %fake: memref<3x3xi8>, %ou
 // -----

 // CHECK-LABEL: func @pooling_nhwc_i16_max_tensor
-// CHECK:         %{{.+}} = linalg.pooling_nhwc_i16_max
+// CHECK:         %{{.+}} = linalg.pooling_nhwc_max
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi16>, tensor<3x3xi16>)
@ -429,7 +429,7 @@ func @pooling_nhwc_i16_max_tensor(%input: tensor<1x4x4x1xi16>) -> tensor<1x2x2x1
  %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi16>
  %cst = constant 0 : i16
  %fill = linalg.fill(%cst, %init) : i16, tensor<1x2x2x1xi16> -> tensor<1x2x2x1xi16>
-  %res = linalg.pooling_nhwc_i16_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
+  %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
    ins(%input, %fake: tensor<1x4x4x1xi16>, tensor<3x3xi16>)
    outs(%fill: tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
  return %res : tensor<1x2x2x1xi16>
@ -438,13 +438,13 @@ func @pooling_nhwc_i16_max_tensor(%input: tensor<1x4x4x1xi16>) -> tensor<1x2x2x1
 // -----

 // CHECK-LABEL: func @pooling_nhwc_i16_max
-// CHECK:         linalg.pooling_nhwc_i16_max
+// CHECK:         linalg.pooling_nhwc_max
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi16>, memref<3x3xi16>)
 // CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xi16>)
 func @pooling_nhwc_i16_max(%input: memref<1x4x4x1xi16>, %fake: memref<3x3xi16>, %output: memref<1x2x2x1xi16>) {
-  linalg.pooling_nhwc_i16_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
+  linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
    ins(%input, %fake: memref<1x4x4x1xi16>, memref<3x3xi16>)
    outs(%output: memref<1x2x2x1xi16>)
  return
@ -453,7 +453,7 @@ func @pooling_nhwc_i16_max(%input: memref<1x4x4x1xi16>, %fake: memref<3x3xi16>,
 // -----

 // CHECK-LABEL: func @pooling_nhwc_i32_max_tensor
-// CHECK:         %{{.+}} = linalg.pooling_nhwc_i32_max
+// CHECK:         %{{.+}} = linalg.pooling_nhwc_max
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi32>, tensor<3x3xi32>)
@ -463,7 +463,7 @@ func @pooling_nhwc_i32_max_tensor(%input: tensor<1x4x4x1xi32>) -> tensor<1x2x2x1
  %init = linalg.init_tensor [1, 2, 2, 1] : tensor<1x2x2x1xi32>
  %cst = constant 0 : i32
  %fill = linalg.fill(%cst, %init) : i32, tensor<1x2x2x1xi32> -> tensor<1x2x2x1xi32>
-  %res = linalg.pooling_nhwc_i32_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
+  %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
    ins(%input, %fake: tensor<1x4x4x1xi32>, tensor<3x3xi32>)
    outs(%fill: tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
  return %res : tensor<1x2x2x1xi32>
@ -472,13 +472,13 @@ func @pooling_nhwc_i32_max_tensor(%input: tensor<1x4x4x1xi32>) -> tensor<1x2x2x1
 // -----

 // CHECK-LABEL: func @pooling_nhwc_i32_max
-// CHECK:         linalg.pooling_nhwc_i32_max
+// CHECK:         linalg.pooling_nhwc_max
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi32>, memref<3x3xi32>)
 // CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xi32>)
 func @pooling_nhwc_i32_max(%input: memref<1x4x4x1xi32>, %fake: memref<3x3xi32>, %output: memref<1x2x2x1xi32>) {
-  linalg.pooling_nhwc_i32_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
+  linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
    ins(%input, %fake: memref<1x4x4x1xi32>, memref<3x3xi32>)
    outs(%output: memref<1x2x2x1xi32>)
  return
--- a/mlir/test/python/integration/dialects/linalg/opsrun.py
+++ b/mlir/test/python/integration/dialects/linalg/opsrun.py
@ -318,7 +318,7 @@ def test_max_pooling_builtin():
          MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
          MemRefType.get((1, 2, 4, 1), i32))
      def pooling_on_buffers(input, shape, output):
-        linalg.pooling_nhwc_max_poly(
+        linalg.pooling_nhwc_max(
            input, shape, outs=[output], strides=[2, 4], dilations=[1, 2])

    execution_engine = ExecutionEngine(transform(module, pooling_boiler))
@ -349,7 +349,7 @@ def test_max_pooling_generic():
          MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
          MemRefType.get((1, 2, 4, 1), i32))
      def pooling_on_buffers(input, shape, output):
-        linalg.pooling_nhwc_max_poly(
+        linalg.pooling_nhwc_max(
            input,
            shape,
            outs=[output],
@ -385,7 +385,7 @@ def test_min_pooling_builtin():
          MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
          MemRefType.get((1, 2, 4, 1), i32))
      def pooling_on_buffers(input, shape, output):
-        linalg.pooling_nhwc_min_poly(
+        linalg.pooling_nhwc_min(
            input, shape, outs=[output], strides=[2, 4], dilations=[1, 2])

    execution_engine = ExecutionEngine(transform(module, pooling_boiler))
@ -415,7 +415,7 @@ def test_min_pooling_generic():
          MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
          MemRefType.get((1, 2, 4, 1), i32))
      def pooling_on_buffers(input, shape, output):
-        linalg.pooling_nhwc_min_poly(
+        linalg.pooling_nhwc_min(
            input,
            shape,
            outs=[output],