forked from mindspore-Ecosystem/mindspore
add ms_hybrid cpu fp16 support
format file update test case fix test case on d update test cases on D update akg commit id update test cases
This commit is contained in:
parent
d98d859756
commit
d5322e6f5f
2
akg
2
akg
|
@ -1 +1 @@
|
|||
Subproject commit 0ddab6d9cad4c4c9faf72585e5a52faf79f55e4d
|
||||
Subproject commit f8cc452a915b78400328b7cb21be21d4ca5b3590
|
|
@ -700,8 +700,11 @@ bool AkgKernelJsonGenerator::CollectJson(const AnfNodePtr &anf_node, nlohmann::j
|
|||
MS_LOG(ERROR) << "Op[" << anf_node->fullname_with_scope() << "] create single kernel json failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
(*kernel_json)[kJsonKeyProcess] = GetProcessorByTarget();
|
||||
auto process_target = GetProcessorByTarget();
|
||||
(*kernel_json)[kJsonKeyProcess] = process_target;
|
||||
if (process_target == "cpu") {
|
||||
(*kernel_json)[kJsonKeyTargetOption] = kCPUTargetOption;
|
||||
}
|
||||
size_t hash_id = std::hash<std::string>()(kernel_json->dump());
|
||||
kernel_name_ = op_name + "_";
|
||||
(void)kernel_name_.append(std::to_string(hash_id));
|
||||
|
@ -776,7 +779,11 @@ bool AkgKernelJsonGenerator::CollectFusedJson(const std::vector<AnfNodePtr> &anf
|
|||
// Add parallel fusion information.
|
||||
GenParallelJson(anf_nodes, input_list, output_list, node_json_map, kernel_json);
|
||||
|
||||
(*kernel_json)[kJsonKeyProcess] = GetProcessorByTarget();
|
||||
auto process_target = GetProcessorByTarget();
|
||||
(*kernel_json)[kJsonKeyProcess] = process_target;
|
||||
if (process_target == "cpu") {
|
||||
(*kernel_json)[kJsonKeyTargetOption] = kCPUTargetOption;
|
||||
}
|
||||
size_t hash_id = std::hash<std::string>()(kernel_json->dump());
|
||||
kernel_name_ = "Fused_";
|
||||
auto fg = anf_nodes[0]->func_graph();
|
||||
|
|
|
@ -64,6 +64,10 @@ constexpr auto kJsonKeyBufferStitch = "buffer_stitch";
|
|||
constexpr auto kJsonKeyStitchOp = "stitch_op";
|
||||
constexpr auto kJsonKeyStitchAtomicOp = "stitch_atomic_op";
|
||||
constexpr auto kJsonKeyComputeCapability = "compute_capability";
|
||||
constexpr auto kJsonKeyTargetOption = "target_option";
|
||||
|
||||
// target related compile options
|
||||
constexpr auto kCPUTargetOption = "-mcpu=core-avx2 -mattr=avx2";
|
||||
|
||||
// dump option
|
||||
struct DumpOption {
|
||||
|
|
|
@ -159,7 +159,7 @@ INTRIN_GLOBALS = {
|
|||
**INTRIN_BINARY_OP,
|
||||
}
|
||||
|
||||
INTRIN_GPU_UNARY_OP = {
|
||||
INTRIN_GENERAL_UNARY_OP = {
|
||||
'rsqrt': _rsqrt,
|
||||
'erf': _erf,
|
||||
'isnan': numpy.isnan,
|
||||
|
@ -180,18 +180,20 @@ INTRIN_GPU_UNARY_OP = {
|
|||
'round': numpy.round,
|
||||
}
|
||||
|
||||
INTRIN_GPU_BINARY_OP = {
|
||||
INTRIN_CPU_NOT_SUPPORT = ["atan2", "expm1"]
|
||||
|
||||
INTRIN_GENERAL_BINARY_OP = {
|
||||
'ceil_div': lambda a, b: (a + b - 1) // b,
|
||||
}
|
||||
|
||||
INTRIN_GPU = {
|
||||
**INTRIN_GPU_UNARY_OP,
|
||||
**INTRIN_GPU_BINARY_OP
|
||||
INTRIN_GENERAL = {
|
||||
**INTRIN_GENERAL_UNARY_OP,
|
||||
**INTRIN_GENERAL_BINARY_OP
|
||||
}
|
||||
|
||||
INTRIN_RUNTIME = {
|
||||
**INTRIN_GLOBALS,
|
||||
**INTRIN_GPU
|
||||
**INTRIN_GENERAL
|
||||
}
|
||||
|
||||
|
||||
|
@ -283,16 +285,17 @@ class VariableUsage(ast.NodeVisitor):
|
|||
raise ValueError(
|
||||
"In the function {} written in the Hybrid DSL, function call id {} "
|
||||
"not in intrinsics' list".format(self.func_name, func_id))
|
||||
if self.device != "GPU" and func_id in list(INTRIN_GPU.keys()):
|
||||
if (self.device == "Ascend" and func_id in list(INTRIN_GENERAL.keys())) or \
|
||||
(self.device == "CPU" and func_id in INTRIN_CPU_NOT_SUPPORT):
|
||||
raise ValueError(
|
||||
"In the function {} written in the Hybrid DSL, function {} is not available on the "
|
||||
"device {}".format(self.func_name, func_id, self.device))
|
||||
if func_id in list(INTRIN_UNARY_OP.keys()) + list(INTRIN_GPU_UNARY_OP.keys()) + list(INTRIN_LOOP.keys()) \
|
||||
if func_id in list(INTRIN_UNARY_OP.keys()) + list(INTRIN_GENERAL_UNARY_OP.keys()) + list(INTRIN_LOOP.keys()) \
|
||||
and len(node.args) != 1:
|
||||
raise TypeError(
|
||||
"In the function {} written in the Hybrid DSL, function {} "
|
||||
"expects one input, but get {}".format(self.func_name, func_id, len(node.args)))
|
||||
if func_id in list(INTRIN_BINARY_OP.keys()) + list(INTRIN_GPU_BINARY_OP.keys()) + \
|
||||
if func_id in list(INTRIN_BINARY_OP.keys()) + list(INTRIN_GENERAL_BINARY_OP.keys()) + \
|
||||
list(INTRIN_BUFFER.keys()) and len(node.args) != 2:
|
||||
raise TypeError(
|
||||
"In the function {} written in the Hybrid DSL, function {} "
|
||||
|
|
|
@ -48,7 +48,7 @@ def allocate_and_math_intrin_example(a, b):
|
|||
|
||||
for i0 in range(a.shape[0]):
|
||||
for i1 in range(b.shape[1]):
|
||||
d[i0, i1] = exp(a[i0, i1])
|
||||
d[i0, i1] = abs(a[i0, i1])
|
||||
c[i0, i1] = d[i0, i1] + b[i0, i1]
|
||||
return c
|
||||
|
||||
|
@ -259,7 +259,10 @@ def test_ms_hybrid_cpu_graph_mode():
|
|||
pass
|
||||
else:
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
ms_hybrid_allocate_cpu()
|
||||
ms_hybrid_cast_with_infer()
|
||||
ms_hybrid_cast_without_infer()
|
||||
ms_hybrid_allocate()
|
||||
ms_hybrid_grid()
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
|
@ -276,4 +279,7 @@ def test_ms_hybrid_cpu_pynative_mode():
|
|||
pass
|
||||
else:
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
|
||||
ms_hybrid_allocate_cpu()
|
||||
ms_hybrid_cast_with_infer()
|
||||
ms_hybrid_cast_without_infer()
|
||||
ms_hybrid_allocate()
|
||||
ms_hybrid_grid()
|
||||
|
|
Loading…
Reference in New Issue