From a7d89f66865cdc7743c5238c012f5c3644a60d71 Mon Sep 17 00:00:00 2001
From: Zichun Ye <zichun.ye@huawei.com>
Date: Wed, 25 Aug 2021 10:41:13 +0800
Subject: [PATCH] add graph kernel userdefine op support

fix code check
---
 .../_extends/graph_kernel/model/model.py      |   1 +
 .../graph_kernel/graph_kernel_cluster.cc      |   1 +
 .../graph_kernel/graph_kernel_cluster.h       |   4 +
 .../runtime/device/gpu/kernel_info_setter.cc  |   7 +-
 tests/st/ops/graph_kernel/test_user_define.py | 135 ++++++++++++++++++
 5 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100644 tests/st/ops/graph_kernel/test_user_define.py
diff --git a/mindspore/_extends/graph_kernel/model/model.py b/mindspore/_extends/graph_kernel/model/model.py
index c64aefc8713..5ebc4c59306 100644
--- a/mindspore/_extends/graph_kernel/model/model.py
+++ b/mindspore/_extends/graph_kernel/model/model.py
@@ -234,6 +234,7 @@ class PrimLib:
         'Gather': Prim(OPAQUE),
         'GatherNd': Prim(OPAQUE),
         'UnsortedSegmentSum': Prim(OPAQUE),
+        'UserDefined': Prim(OPAQUE),
     }
 
     default_primtive = Prim(UNKNOWN)
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.cc
index cd129b72fc7..38a9d945ba1 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.cc
@@ -100,6 +100,7 @@ std::vector<PrimitivePtr> GetClusterableOpList() {
     prim::kPrimSign,
     prim::kPrimSin,
     prim::kPrimStridedSlice,
+    prim::kPrimUserDefined,
 #endif
   };
   const auto &flags = context::GraphKernelFlags::GetInstance();
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.h
index 0957ce43fac..bf65a5cc540 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cluster.h
@@ -26,6 +26,10 @@
 #include "backend/optimizer/common/optimizer.h"
 
 namespace mindspore {
+namespace prim {
+inline const PrimitivePtr kPrimUserDefined = std::make_shared<Primitive>("UserDefined");
+}
+
 namespace opt {
 class Graph;
 using GraphPtr = std::shared_ptr<Graph>;
diff --git a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
index 36a5271cfcd..7d62e65d10c 100644
--- a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
@@ -104,7 +104,12 @@ bool SelectAkgKernel(const CNodePtr &kernel_node, const std::shared_ptr<KernelBu
   MS_EXCEPTION_IF_NULL(kernel_node);
   MS_EXCEPTION_IF_NULL(selected_kernel_info);
   std::vector<std::shared_ptr<KernelBuildInfo>> kernel_info_list;
-
+  auto func_call = kernel_node->input(0);
+  if (auto pre = GetCNodePrimitive(kernel_node)) {
+    if (pre->GetAttr("akg")) {
+      return true;
+    }
+  }
   if (AnfAlgo::IsNodeInGraphKernel(kernel_node)) {
     // The op_info in OpLib is only used for basic ops,
     // we don't care it in GraphKernel.
diff --git a/tests/st/ops/graph_kernel/test_user_define.py b/tests/st/ops/graph_kernel/test_user_define.py
new file mode 100644
index 00000000000..b02c2349462
--- /dev/null
+++ b/tests/st/ops/graph_kernel/test_user_define.py
@@ -0,0 +1,135 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import inspect
+import numpy as np
+import pytest
+from mindspore import context, ops, Tensor
+from mindspore.common import dtype as mstype
+from mindspore.nn import Cell
+
+
+class UserDefined(ops.PrimitiveWithInfer):
+    def __init__(self, func, shape, dtype, func_type=None):
+        ops.PrimitiveWithInfer.__init__(self, "UserDefined")
+        self.add_prim_attr('akg', True)
+
+        if "__wrapped__" in func.__dict__:
+            func = func.__dict__["__wrapped__"]
+        func_name = func.__name__
+        self.add_prim_attr('func_name', func_name)
+        func_source_str = inspect.getsource(func)
+
+        if func_type is None:
+            if "ir_builder" in func_source_str:
+                func_type = "ir_builder"
+            elif "compute" in func_source_str:
+                func_type = "tvm_compute"
+            else:
+                func_type = "hybrid"
+
+        self.add_prim_attr('func_source_str', func_source_str)
+        self.add_prim_attr('func_type', func_type)
+
+        self._shape = shape
+        self._dtype = dtype
+
+    def infer_shape(self, *args):
+        if callable(self._shape):
+            return self._shape(*args)
+        return self._shape
+
+    def infer_dtype(self, *args):
+        if callable(self._dtype):
+            return self._dtype(*args)
+        return self._dtype
+
+
+def outer_product(a, b):
+    c = output_tensor((a.shape[0], b.shape[1]), 'float32')
+
+    for i0 in range(a.shape[0]):
+        for i1 in range(b.shape[1]):
+            c[i0, i1] = 0.0
+            for i2 in range(a.shape[1]):
+                c[i0, i1] = c[i0, i1] + (a[i0, i2] * b[i2, i1])
+    return c
+
+
+class TestHybrid(Cell):
+    def __init__(self):
+        super(TestHybrid, self).__init__()
+
+        def infer_func(x, y):
+            return x
+
+        self.program = UserDefined(
+            outer_product, shape=infer_func, dtype=infer_func)
+
+    def construct(self, x, y):
+        return self.program(x, y)
+
+
+def v_add(inputs, attrs):
+    def vadd_func(dst, data_1, data_2):
+        ib = tvm.ir_builder.create()
+        with ib.for_range_n(data_1.shape, "i") as i:
+            ib.store(dst, i, ib.load(data_1, i) + ib.load(data_2, i))
+        return ib.get()
+    data_1, data_2 = inputs[0], inputs[1]
+    return tvm.extern(data_1.shape, [data_1, data_2],
+                      lambda ins, outs: vadd_func(outs[0], ins[0], ins[1]),
+                      name="v_add", dtype=data_1.dtype)
+
+
+class TestIRbuilder(Cell):
+    def __init__(self, shape):
+        super(TestIRbuilder, self).__init__()
+        self.program = UserDefined(
+            v_add, shape=shape, dtype=mstype.float16)
+
+    def construct(self, x, y):
+        return self.program(x, y)
+
+
+def test_user_defined_hybrid():
+
+    input_x = np.random.normal(0, 1, [4, 4]).astype(np.float32)
+    input_y = np.random.normal(0, 1, [4, 4]).astype(np.float32)
+
+    test = TestHybrid()
+    output = test(Tensor(input_x), Tensor(input_y))
+    expect = np.matmul(input_x, input_y)
+    assert np.allclose(expect, output.asnumpy(), 0.001, 0.001)
+
+
+def test_user_defined_irbuider():
+
+    shape = (4, 5)
+    input_x = np.random.normal(0, 1, shape).astype(np.float16)
+    input_y = np.random.normal(0, 1, shape).astype(np.float16)
+
+    test = TestIRbuilder(shape)
+    output = test(Tensor(input_x), Tensor(input_y))
+    assert np.allclose(input_x + input_y, output.asnumpy(), 0.001, 0.001)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_user_defined_gpu():
+    context.set_context(mode=0, enable_graph_kernel=True)
+    test_user_defined_hybrid()
+    test_user_defined_irbuider()