!14084 [GraphKernel] support matmul on D

From: @lingyunli63 Reviewed-by: Signed-off-by:
2021-03-27 09:26:55 +08:00 · 2021-03-27 09:26:55 +08:00 · ad140a8bf4
parent 44d6da63c2 4b966ed40d
commit ad140a8bf4
6 changed files with 109 additions and 7 deletions
--- a/mindspore/_extends/graph_kernel/model/graph_split.py
+++ b/mindspore/_extends/graph_kernel/model/graph_split.py
@ -466,6 +466,8 @@ class GraphSplitAscend(GraphSplitByPattern):
    REDUCE_FUSE_DEPTH = 10

    def get_default_mode(self, op):
+        if op.prim == "MatMul":
+            return self.Area.MODE_COMPOSITE if op.inputs[0].dtype == "float16" else self.Area.MODE_BASIC
        if op.prim in ("Tile", "BroadcastTo"):
            return self.Area.MODE_COMPOSITE
        return self.Area.MODE_BASIC
--- a/mindspore/_extends/graph_kernel/model/model.py
+++ b/mindspore/_extends/graph_kernel/model/model.py
@ -88,8 +88,7 @@ class PrimLib:
    ELEMWISE = 2
    BROADCAST = 3
    REDUCE = 4
-    TRANSFORM = 5
-    CONTROL = 6
+    OPAQUE = 5

    class Prim:
        """Prim"""
@ -146,7 +145,6 @@ class PrimLib:
            default_elemwise_broadcast_relation,
            default_reduce_relation,
            unknown_relation,
-            unknown_relation,
        ]

    primtives = {
@ -176,7 +174,6 @@ class PrimLib:
        'ReduceSum': Prim(REDUCE),
        'ReduceMax': Prim(REDUCE),
        'ReduceMin': Prim(REDUCE),
-        'MakeTuple': Prim(CONTROL),
        'Assign': Prim(ELEMWISE),
        'Tanh': Prim(ELEMWISE),
        'ExpandDims': Prim(RESHAPE),
@ -186,9 +183,10 @@ class PrimLib:
        'Squeeze': Prim(RESHAPE),
        'Flatten': Prim(RESHAPE),
        'FlattenGrad': Prim(RESHAPE),
-        'Transpose': Prim(TRANSFORM),
+        'Transpose': Prim(OPAQUE),
        'Tile': Prim(BROADCAST),
        'BroadcastTo': Prim(BROADCAST),
+        'MatMul': Prim(OPAQUE),
    }

    default_primtive = Prim(UNKNOWN)
@ -509,7 +507,7 @@ class AddControlBuddy(GraphVisitor):
        self.buddies = {}  # {op : [ctrl_op]}

    def visit(self, op):
-        if PrimLib.iter_type(op) == PrimLib.CONTROL:
+        if op.prim == "MakeTuple":
            assert len(op.output.to_ops) == 1
            owner = op.output.to_ops[0]
            if owner in self.buddies:
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_attrs_process.cc
@ -177,6 +177,18 @@ void SetAkgAttrsForBN2Relu(const AnfNodePtr &anf_node) {
  AnfAlgo::SetNodeAttr(kAttrOutputNames, MakeValue(bn2_output_names), anf_node);
 }

+void SetAkgAttrsForMatMul(const AnfNodePtr &anf_node) {
+  MS_EXCEPTION_IF_NULL(anf_node);
+  std::string dst_type;
+  TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, 0);
+  dst_type = TypeId2String(output_type);
+  AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node);
+  auto left_format = AnfAlgo::GetInputFormat(anf_node, 0);
+  auto right_format = AnfAlgo::GetInputFormat(anf_node, 1);
+  AnfAlgo::SetNodeAttr("left_format", MakeValue(left_format), anf_node);
+  AnfAlgo::SetNodeAttr("right_format", MakeValue(right_format), anf_node);
+}
+
 const std::unordered_map<std::string, std::function<void(const AnfNodePtr &anf_node)>> kAkgKernelAttrsProcessMap = {
  {kFour2FiveOpName, SetAkgAttrsForFour2Five},
  {kFive2FourOpName, SetAkgAttrsForFive2Four},
@ -190,6 +202,7 @@ const std::unordered_map<std::string, std::function<void(const AnfNodePtr &anf_n
  {kConvBN1OpName, SetAkgAttrsForConvBN1},
  {kBN2AddReluOpName, SetAkgAttrsForBN2AddRelu},
  {kBN2ReLUOpName, SetAkgAttrsForBN2Relu},
+  {kMatMulOpName, SetAkgAttrsForMatMul},
 };
 }  // namespace

--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
@ -575,7 +575,7 @@ std::vector<PrimitivePtr> GetFusibleOpList() {
    prim::kPrimExpandDims, prim::kPrimMul,        prim::kPrimMinimum, prim::kPrimMaximum, prim::kPrimLog,
    prim::kPrimPow,        prim::kPrimSub,        prim::kPrimRsqrt,   prim::kPrimSqrt,    prim::kPrimAddN,
    prim::kPrimEqual,      prim::kPrimReciprocal, prim::kPrimTanh,    prim::kPrimReshape, prim::kPrimTranspose,
-    prim::kPrimCast,       prim::kPrimRealDiv};
+    prim::kPrimCast,       prim::kPrimRealDiv,    prim::kPrimMatMul};
 #elif ENABLE_GPU
  std::vector<PrimitivePtr> fusible_basic_ops = {
    prim::kPrimAbs,     prim::kPrimRound,      prim::kPrimNeg,       prim::kPrimExp,     prim::kPrimAdd,
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@ -265,6 +265,7 @@ constexpr auto kSGDName = "SGD";
 constexpr auto kLARSUpdateName = "LARSUpdate";
 constexpr auto kBasicLSTMCellCStateGradOpName = "BasicLSTMCellCStateGrad";
 constexpr auto kBasicLSTMCellCStateGradV2OpName = "BasicLSTMCellCStateGradV2";
+constexpr auto kMatMulOpName = "MatMul";
 constexpr auto kMatMulV2OpName = "MatMulV2";
 constexpr auto kBroadcastToOpName = "BroadcastTo";
 constexpr auto kFusedAddReluV2Name = "FusedAddReluV2";
--- a/tests/st/ops/graph_kernel/test_matmul.py
+++ b/tests/st/ops/graph_kernel/test_matmul.py
@ -0,0 +1,88 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import pytest
+import mindspore.context as context
+from mindspore import Tensor
+from mindspore.nn import Cell
+import mindspore.ops.operations as P
+
+class Net(Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.matmul = P.MatMul(transpose_a=True, transpose_b=True)
+
+    def construct(self, x, y):
+        return self.matmul(x, y)
+
+class Net1(Cell):
+    def __init__(self):
+        super(Net1, self).__init__()
+        self.matmul = P.MatMul(transpose_a=True, transpose_b=True)
+        self.add = P.BiasAdd()
+
+    def construct(self, x, y, bias):
+        res = self.matmul(x, y)
+        return self.add(res, bias)
+
+def get_output(i0, i1, enable_graph_kernel=False):
+    if enable_graph_kernel:
+        context.set_context(enable_graph_kernel=True, save_graphs=False)
+    net = Net()
+    output = net(i0, i1)
+    return output
+
+def get_output1(i0, i1, i2, enable_graph_kernel=False):
+    if enable_graph_kernel:
+        context.set_context(enable_graph_kernel=True, save_graphs=False)
+    net = Net1()
+    output = net(i0, i1, i2)
+    return output
+
+def test_basic():
+    i0 = Tensor(np.random.normal(1, 0.01, [800, 96]).astype(np.float16))
+    i1 = Tensor(np.random.normal(1, 0.01, [128, 800]).astype(np.float16))
+    expect = get_output(i0, i1, False)
+    output = get_output(i0, i1, True)
+    expect_np = expect.asnumpy().copy()
+    output_np = output.asnumpy().copy()
+    assert np.allclose(expect_np, output_np, 1.e-4, 1.e-7)
+
+def test_basic1():
+    i0 = Tensor(np.random.normal(1, 0.01, [800, 96]).astype(np.float16))
+    i1 = Tensor(np.random.normal(1, 0.01, [128, 800]).astype(np.float16))
+    i2 = Tensor(np.random.normal(100, 0.01, [128,]).astype(np.float16))
+    expect = get_output1(i0, i1, i2, False)
+    output = get_output1(i0, i1, i2, True)
+    expect_np = expect.asnumpy().copy()
+    output_np = output.asnumpy().copy()
+    assert np.allclose(expect_np, output_np, 6.e-4, 6.e-4)
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_basic_ascend():
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    test_basic()
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_basic_ascend1():
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    test_basic1()