Reorder mmt4d r.h.s operand layout

Switch r.h.s operand layout (n1, k1, n0, k0) -> (n1, k1, k0, n0)
which is more consistant with scalar-vector products vectorization
and elementates operand transpose.

Reviewed By: rsuderman

Differential Revision: https://reviews.llvm.org/D107307
This commit is contained in:
Ahmed Taei 2021-08-02 13:26:03 -07:00
parent 24b0df8686
commit 53d6988171
2 changed files with 5 additions and 5 deletions

View File

@ -180,7 +180,7 @@ structured_op: !LinalgStructuredOpConfig
name: rhs
usage: InputOperand
type_var: RhsType
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4, s1, s5, s3)>
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4, s1, s3, s5)>
- !LinalgOperandDefConfig
name: accum
usage: OutputOperand
@ -190,8 +190,8 @@ structured_op: !LinalgStructuredOpConfig
static_indexing_maps:
- affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d4, d2,
d5)>
- affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d1, d4, d3,
d5)>
- affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d1, d4, d5,
d3)>
- affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2,
d3)>
iterator_types:

View File

@ -39,7 +39,7 @@ def quantized_matmul(
@linalg_structured_op
def mmt4d(lhs=TensorDef(TV.LhsType, S.M, S.K, S.M0, S.K0),
rhs=TensorDef(TV.RhsType, S.N, S.K, S.N0, S.K0),
rhs=TensorDef(TV.RhsType, S.N, S.K, S.K0, S.N0),
accum=TensorDef(TV.AccumType, S.M, S.N, S.M0, S.N0,
output=True)):
"""Performs a matrix-matrix-transpose multiplication of two 4D inputs.
@ -54,7 +54,7 @@ def mmt4d(lhs=TensorDef(TV.LhsType, S.M, S.K, S.M0, S.K0),
"""
domain(D.m, D.n, D.m0, D.n0, D.k, D.k0)
implements(ContractionOpInterface)
accum[D.m, D.n, D.m0, D.n0] += cast(TV.AccumType, lhs[D.m, D.k, D.m0, D.k0]) * cast(TV.AccumType, rhs[D.n, D.k, D.n0, D.k0])
accum[D.m, D.n, D.m0, D.n0] += cast(TV.AccumType, lhs[D.m, D.k, D.m0, D.k0]) * cast(TV.AccumType, rhs[D.n, D.k, D.k0, D.n0])
@linalg_structured_op
def batch_matmul(