[mlir][linalg] Add named ops for depthwise 3d convolution

Also complete the set by adding a variant of depthwise 1d convolution
with the multiplier != 1.

Differential Revision: https://reviews.llvm.org/D127687
This commit is contained in:
Benjamin Kramer 2022-06-13 22:03:56 +02:00
parent 671eb7dc1e
commit ba0222cdc6
3 changed files with 418 additions and 0 deletions

View File

@ -1827,6 +1827,91 @@ structured_op: !LinalgStructuredOpConfig
- !ScalarExpression
scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: depthwise_conv_1d_nwc_wcm
cpp_class_name: DepthwiseConv1DNwcWcmOp
doc: |-
Performs depth-wise 1-D convolution.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
implements:
- LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
args:
- !LinalgOperandDefConfig
name: I
kind: input_tensor
type_var: T1
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1 * s2 + s3 * s4,
s5)>
- !LinalgOperandDefConfig
name: K
kind: input_tensor
type_var: T2
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s3, s5, s6)>
- !LinalgOperandDefConfig
name: O
kind: output_tensor
type_var: U
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s5, s6)>
- !LinalgOperandDefConfig
name: strides
kind: index_attr
index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s2)>
default_indices:
- 1
- !LinalgOperandDefConfig
name: dilations
kind: index_attr
index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s4)>
default_indices:
- 1
indexing_maps: !LinalgIndexingMapsConfig
static_indexing_maps:
- affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1 * s2
+ d4 * s4, d2)>
- affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d4, d2, d3)>
- affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1, d2,
d3)>
iterator_types:
- parallel
- parallel
- parallel
- parallel
- reduction
assignments:
- !ScalarAssign
arg: O
value: !ScalarExpression
scalar_fn:
kind: binary
fn_name: add
operands:
- !ScalarExpression
scalar_arg: O
- !ScalarExpression
scalar_fn:
kind: binary
fn_name: mul
operands:
- !ScalarExpression
scalar_fn:
kind: type
fn_name: cast_signed
type_var: U
operands:
- !ScalarExpression
scalar_arg: I
- !ScalarExpression
scalar_fn:
kind: type
fn_name: cast_signed
type_var: U
operands:
- !ScalarExpression
scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: depthwise_conv_2d_nhwc_hwc
cpp_class_name: DepthwiseConv2DNhwcHwcOp
@ -2276,6 +2361,205 @@ structured_op: !LinalgStructuredOpConfig
- !ScalarExpression
scalar_arg: KZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: depthwise_conv_3d_ndhwc_dhwc
cpp_class_name: DepthwiseConv3DNdhwcDhwcOp
doc: |-
Performs depth-wise 3-D convolution.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output. Multiplier is set to 1
which is a special case for most depthwise convolutions.
implements:
- LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
args:
- !LinalgOperandDefConfig
name: I
kind: input_tensor
type_var: T1
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
- !LinalgOperandDefConfig
name: K
kind: input_tensor
type_var: T2
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13] -> (s3, s7, s11, s13)>
- !LinalgOperandDefConfig
name: O
kind: output_tensor
type_var: U
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13] -> (s0, s1, s5, s9)>
- !LinalgOperandDefConfig
name: strides
kind: index_attr
index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
s12, s13] -> (s2, s6, s10)>
default_indices:
- 1
- 1
- 1
- !LinalgOperandDefConfig
name: dilations
kind: index_attr
index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
s12, s13] -> (s4, s8, s12)>
default_indices:
- 1
- 1
- 1
indexing_maps: !LinalgIndexingMapsConfig
static_indexing_maps:
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3
* s10 + d6 * s12, d7)>
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13] -> (d4, d5, d6, d7)>
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d7)>
iterator_types:
- parallel
- parallel
- parallel
- parallel
- reduction
- reduction
- reduction
- parallel
assignments:
- !ScalarAssign
arg: O
value: !ScalarExpression
scalar_fn:
kind: binary
fn_name: add
operands:
- !ScalarExpression
scalar_arg: O
- !ScalarExpression
scalar_fn:
kind: binary
fn_name: mul
operands:
- !ScalarExpression
scalar_fn:
kind: type
fn_name: cast_signed
type_var: U
operands:
- !ScalarExpression
scalar_arg: I
- !ScalarExpression
scalar_fn:
kind: type
fn_name: cast_signed
type_var: U
operands:
- !ScalarExpression
scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: depthwise_conv_3d_ndhwc_dhwcm
cpp_class_name: DepthwiseConv3DNdhwcDhwcmOp
doc: |-
Performs depth-wise 3-D convolution.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
implements:
- LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
args:
- !LinalgOperandDefConfig
name: I
kind: input_tensor
type_var: T1
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12,
s13)>
- !LinalgOperandDefConfig
name: K
kind: input_tensor
type_var: T2
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14] -> (s3, s7, s11, s13, s14)>
- !LinalgOperandDefConfig
name: O
kind: output_tensor
type_var: U
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14] -> (s0, s1, s5, s9, s14)>
- !LinalgOperandDefConfig
name: strides
kind: index_attr
index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
s12, s13, s14] -> (s2, s6, s10)>
default_indices:
- 1
- 1
- 1
- !LinalgOperandDefConfig
name: dilations
kind: index_attr
index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
s12, s13, s14] -> (s4, s8, s12)>
default_indices:
- 1
- 1
- 1
indexing_maps: !LinalgIndexingMapsConfig
static_indexing_maps:
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6
* s8, d3 * s10 + d7 * s12, d8)>
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
s7, s8, s9, s10, s11, s12, s13, s14] -> (d5, d6, d7, d8, d4)>
- affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1, d2, d3, d8, d4)>
iterator_types:
- parallel
- parallel
- parallel
- parallel
- parallel
- reduction
- reduction
- reduction
- parallel
assignments:
- !ScalarAssign
arg: O
value: !ScalarExpression
scalar_fn:
kind: binary
fn_name: add
operands:
- !ScalarExpression
scalar_arg: O
- !ScalarExpression
scalar_fn:
kind: binary
fn_name: mul
operands:
- !ScalarExpression
scalar_fn:
kind: type
fn_name: cast_signed
type_var: U
operands:
- !ScalarExpression
scalar_arg: I
- !ScalarExpression
scalar_fn:
kind: type
fn_name: cast_signed
type_var: U
operands:
- !ScalarExpression
scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: pooling_nhwc_sum
cpp_class_name: PoolingNhwcSumOp

View File

@ -414,6 +414,26 @@ def depthwise_conv_1d_nwc_wc(I=TensorDef(T1, S.N, S.OW * S.SW + S.KW * S.DW,
TypeFn.cast_signed(U, K[D.kw, D.ic])
@linalg_structured_op
def depthwise_conv_1d_nwc_wcm(I=TensorDef(T1, S.N, S.OW * S.SW + S.KW * S.DW,
S.IC),
K=TensorDef(T2, S.KW, S.IC, S.CM),
O=TensorDef(U, S.N, S.OW, S.IC, S.CM,
output=True),
strides=IndexAttrDef(S.SW, default=[1]),
dilations=IndexAttrDef(S.DW, default=[1])):
"""Performs depth-wise 1-D convolution.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
"""
implements(ConvolutionOpInterface)
domain(D.n, D.ow, D.ic, D.cm, D.kw)
O[D.n, D.ow, D.ic, D.cm] += \
TypeFn.cast_signed(U, I[D.n, D.ow * S.SW + D.kw * S.DW, D.ic]) * \
TypeFn.cast_signed(U, K[D.kw, D.ic, D.cm])
@linalg_structured_op
def depthwise_conv_2d_nhwc_hwc(I=TensorDef(T1, S.N, S.OH * S.SH + S.KH * S.DH,
S.OW * S.SW + S.KW * S.DW, S.IC),
@ -536,6 +556,64 @@ def depthwise_conv_2d_nhwc_hwcm_q(I=TensorDef(T1, S.N,
TypeFn.cast_signed(U, KZp)))
@linalg_structured_op
def depthwise_conv_3d_ndhwc_dhwc(I=TensorDef(T1, S.N, S.OD * S.SD + S.KD * S.DD,
S.OH * S.SH + S.KH * S.DH,
S.OW * S.SW + S.KW * S.DW, S.IC),
K=TensorDef(T2, S.KD, S.KH, S.KW, S.IC),
O=TensorDef(U, S.N, S.OD, S.OH, S.OW,
output=True),
strides=IndexAttrDef(S.SD,
S.SH,
S.SW,
default=[1, 1, 1]),
dilations=IndexAttrDef(S.DD,
S.DH,
S.DW,
default=[1, 1, 1])):
"""Performs depth-wise 3-D convolution.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output. Multiplier is set to 1
which is a special case for most depthwise convolutions.
"""
implements(ConvolutionOpInterface)
domain(D.n, D.od, D.oh, D.ow, D.kd, D.kh, D.kw, D.ic)
O[D.n, D.od, D.oh, D.ow, D.ic] += TypeFn.cast_signed(
U, I[D.n, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH,
D.ow * S.SW + D.kw * S.DW, D.ic]) * TypeFn.cast_signed(
U, K[D.kd, D.kh, D.kw, D.ic])
@linalg_structured_op
def depthwise_conv_3d_ndhwc_dhwcm(I=TensorDef(T1,
S.N, S.OD * S.SD + S.KD * S.DD,
S.OH * S.SH + S.KH * S.DH,
S.OW * S.SW + S.KW * S.DW, S.IC),
K=TensorDef(T2, S.KD, S.KH, S.KW, S.IC, S.CM),
O=TensorDef(U, S.N, S.OD, S.OH, S.OW, S.CM,
output=True),
strides=IndexAttrDef(S.SD,
S.SH,
S.SW,
default=[1, 1, 1]),
dilations=IndexAttrDef(S.DD,
S.DH,
S.DW,
default=[1, 1, 1])):
"""Performs depth-wise 3-D convolution.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
"""
implements(ConvolutionOpInterface)
domain(D.n, D.od, D.oh, D.ow, D.cm, D.kd, D.kh, D.kw, D.ic)
O[D.n, D.od, D.oh, D.ow, D.ic, D.cm] += TypeFn.cast_signed(
U, I[D.n, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH,
D.ow * S.SW + D.kw * S.DW, D.ic]) * TypeFn.cast_signed(
U, K[D.kd, D.kh, D.kw, D.ic, D.cm])
@linalg_structured_op
def pooling_nhwc_sum(I=TensorDef(T1, S.N, S.OH * S.SH + S.KH * S.DH,
S.OW * S.SW + S.KW * S.DW, S.C),

View File

@ -1,5 +1,33 @@
// RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s
// CHECK-LABEL: func @depthwise_conv_1d_nwc_wcm
func.func @depthwise_conv_1d_nwc_wcm(%input: tensor<1x12x8xf32>, %filter: tensor<3x8x8xf32>) -> tensor<1x10x8x8xf32> {
%zero = arith.constant 0.000000e+00 : f32
%init = linalg.init_tensor [1, 10, 8, 8] : tensor<1x10x8x8xf32>
%fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
// CHECK: depthwise_conv_1d_nwc_wcm
%0 = linalg.depthwise_conv_1d_nwc_wcm {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8x8xf32>)
outs(%fill : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
return %0 : tensor<1x10x8x8xf32>
}
// -----
// CHECK-LABEL: func @depthwise_conv_1d_nwc_wc
func.func @depthwise_conv_1d_nwc_wc(%input: tensor<1x12x8xf32>, %filter: tensor<3x8xf32>) -> tensor<1x10x8xf32> {
%zero = arith.constant 0.000000e+00 : f32
%init = linalg.init_tensor [1, 10, 8] : tensor<1x10x8xf32>
%fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
// CHECK: depthwise_conv_1d_nwc_wc
%0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8xf32>)
outs(%fill : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
return %0 : tensor<1x10x8xf32>
}
// -----
// CHECK-LABEL: func @depthwise_conv_2d_nhwc_hwcm_tensor
func.func @depthwise_conv_2d_nhwc_hwcm_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> {
%zero = arith.constant 0.000000e+00 : f32
@ -130,6 +158,34 @@ func.func @depthwise_conv_2d_input_nhwc_filter_wrong_stride_size(%input: memref<
// -----
// CHECK-LABEL: func @depthwise_conv_3d_ndhwc_dhwcm
func.func @depthwise_conv_3d_ndhwc_dhwcm(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> {
%zero = arith.constant 0.000000e+00 : f32
%init = linalg.init_tensor [2, 3, 13, 4, 6, 6] : tensor<2x3x13x4x6x6xf32>
%fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
// CHECK: depthwise_conv_3d_ndhwc_dhwcm
%0 = linalg.depthwise_conv_3d_ndhwc_dhwcm {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>}
ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6x6xf32>)
outs(%fill : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
return %0 : tensor<2x3x13x4x6x6xf32>
}
// -----
// CHECK-LABEL: func @depthwise_conv_3d_ndhwc_dhwc
func.func @depthwise_conv_3d_ndhwc_dhwc(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6xf32>) -> tensor<2x3x13x4x6xf32> {
%zero = arith.constant 0.000000e+00 : f32
%init = linalg.init_tensor [2, 3, 13, 4, 6] : tensor<2x3x13x4x6xf32>
%fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
// CHECK: depthwise_conv_3d_ndhwc_dhwc
%0 = linalg.depthwise_conv_3d_ndhwc_dhwc {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>}
ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6xf32>)
outs(%fill : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
return %0 : tensor<2x3x13x4x6xf32>
}
// -----
// CHECK-LABEL: func @conv_1d_nwc_wcf
func.func @conv_1d_nwc_wcf(%input: tensor<?x?x?xf32>, %filter: tensor<?x?x?xf32>, %init: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
// CHECK: %{{.+}} = linalg.conv_1d_nwc_wcf