add ms_hybrid cpu fp16 support

format file update test case fix test case on d update test cases on D update akg commit id update test cases
2022-04-28 17:40:08 +08:00 · 2022-04-28 17:40:08 +08:00 · d5322e6f5f
parent d98d859756
commit d5322e6f5f
5 changed files with 36 additions and 16 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 0ddab6d9cad4c4c9faf72585e5a52faf79f55e4d
+Subproject commit f8cc452a915b78400328b7cb21be21d4ca5b3590
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_json_generator.cc
+++ b/mindspore/ccsrc/kernel/akg/akg_kernel_json_generator.cc
@ -700,8 +700,11 @@ bool AkgKernelJsonGenerator::CollectJson(const AnfNodePtr &anf_node, nlohmann::j
    MS_LOG(ERROR) << "Op[" << anf_node->fullname_with_scope() << "] create single kernel json failed.";
    return false;
  }
-
-  (*kernel_json)[kJsonKeyProcess] = GetProcessorByTarget();
+  auto process_target = GetProcessorByTarget();
+  (*kernel_json)[kJsonKeyProcess] = process_target;
+  if (process_target == "cpu") {
+    (*kernel_json)[kJsonKeyTargetOption] = kCPUTargetOption;
+  }
  size_t hash_id = std::hash<std::string>()(kernel_json->dump());
  kernel_name_ = op_name + "_";
  (void)kernel_name_.append(std::to_string(hash_id));
@ -776,7 +779,11 @@ bool AkgKernelJsonGenerator::CollectFusedJson(const std::vector<AnfNodePtr> &anf
  // Add parallel fusion information.
  GenParallelJson(anf_nodes, input_list, output_list, node_json_map, kernel_json);

-  (*kernel_json)[kJsonKeyProcess] = GetProcessorByTarget();
+  auto process_target = GetProcessorByTarget();
+  (*kernel_json)[kJsonKeyProcess] = process_target;
+  if (process_target == "cpu") {
+    (*kernel_json)[kJsonKeyTargetOption] = kCPUTargetOption;
+  }
  size_t hash_id = std::hash<std::string>()(kernel_json->dump());
  kernel_name_ = "Fused_";
  auto fg = anf_nodes[0]->func_graph();
--- a/mindspore/ccsrc/kernel/akg/akg_kernel_json_generator.h
+++ b/mindspore/ccsrc/kernel/akg/akg_kernel_json_generator.h
@ -64,6 +64,10 @@ constexpr auto kJsonKeyBufferStitch = "buffer_stitch";
 constexpr auto kJsonKeyStitchOp = "stitch_op";
 constexpr auto kJsonKeyStitchAtomicOp = "stitch_atomic_op";
 constexpr auto kJsonKeyComputeCapability = "compute_capability";
+constexpr auto kJsonKeyTargetOption = "target_option";
+
+// target related compile options
+constexpr auto kCPUTargetOption = "-mcpu=core-avx2 -mattr=avx2";

 // dump option
 struct DumpOption {
--- a/mindspore/python/mindspore/ops/operations/_ms_hybrid.py
+++ b/mindspore/python/mindspore/ops/operations/_ms_hybrid.py
@ -159,7 +159,7 @@ INTRIN_GLOBALS = {
    **INTRIN_BINARY_OP,
 }

-INTRIN_GPU_UNARY_OP = {
+INTRIN_GENERAL_UNARY_OP = {
    'rsqrt': _rsqrt,
    'erf': _erf,
    'isnan': numpy.isnan,
@ -180,18 +180,20 @@ INTRIN_GPU_UNARY_OP = {
    'round': numpy.round,
 }

-INTRIN_GPU_BINARY_OP = {
+INTRIN_CPU_NOT_SUPPORT = ["atan2", "expm1"]
+
+INTRIN_GENERAL_BINARY_OP = {
    'ceil_div': lambda a, b: (a + b - 1) // b,
 }

-INTRIN_GPU = {
-    **INTRIN_GPU_UNARY_OP,
-    **INTRIN_GPU_BINARY_OP
+INTRIN_GENERAL = {
+    **INTRIN_GENERAL_UNARY_OP,
+    **INTRIN_GENERAL_BINARY_OP
 }

 INTRIN_RUNTIME = {
    **INTRIN_GLOBALS,
-    **INTRIN_GPU
+    **INTRIN_GENERAL
 }


@ -283,16 +285,17 @@ class VariableUsage(ast.NodeVisitor):
            raise ValueError(
                "In the function {} written in the Hybrid DSL, function call id {} "
                "not in intrinsics' list".format(self.func_name, func_id))
-        if self.device != "GPU" and func_id in list(INTRIN_GPU.keys()):
+        if (self.device == "Ascend" and func_id in list(INTRIN_GENERAL.keys())) or \
+                (self.device == "CPU" and func_id in INTRIN_CPU_NOT_SUPPORT):
            raise ValueError(
                "In the function {} written in the Hybrid DSL, function {} is not available on the "
                "device {}".format(self.func_name, func_id, self.device))
-        if func_id in list(INTRIN_UNARY_OP.keys()) + list(INTRIN_GPU_UNARY_OP.keys()) + list(INTRIN_LOOP.keys()) \
+        if func_id in list(INTRIN_UNARY_OP.keys()) + list(INTRIN_GENERAL_UNARY_OP.keys()) + list(INTRIN_LOOP.keys()) \
                and len(node.args) != 1:
            raise TypeError(
                "In the function {} written in the Hybrid DSL, function {} "
                "expects one input, but get {}".format(self.func_name, func_id, len(node.args)))
-        if func_id in list(INTRIN_BINARY_OP.keys()) + list(INTRIN_GPU_BINARY_OP.keys()) + \
+        if func_id in list(INTRIN_BINARY_OP.keys()) + list(INTRIN_GENERAL_BINARY_OP.keys()) + \
                list(INTRIN_BUFFER.keys()) and len(node.args) != 2:
            raise TypeError(
                "In the function {} written in the Hybrid DSL, function {} "
--- a/tests/st/ops/graph_kernel/custom/test_ms_hybrid.py
+++ b/tests/st/ops/graph_kernel/custom/test_ms_hybrid.py
@ -48,7 +48,7 @@ def allocate_and_math_intrin_example(a, b):

    for i0 in range(a.shape[0]):
        for i1 in range(b.shape[1]):
-            d[i0, i1] = exp(a[i0, i1])
+            d[i0, i1] = abs(a[i0, i1])
            c[i0, i1] = d[i0, i1] + b[i0, i1]
    return c

@ -259,7 +259,10 @@ def test_ms_hybrid_cpu_graph_mode():
        pass
    else:
        context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
-        ms_hybrid_allocate_cpu()
+        ms_hybrid_cast_with_infer()
+        ms_hybrid_cast_without_infer()
+        ms_hybrid_allocate()
+        ms_hybrid_grid()


@pytest.mark.level0
@ -276,4 +279,7 @@ def test_ms_hybrid_cpu_pynative_mode():
        pass
    else:
        context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
-        ms_hybrid_allocate_cpu()
+        ms_hybrid_cast_with_infer()
+        ms_hybrid_cast_without_infer()
+        ms_hybrid_allocate()
+        ms_hybrid_grid()