add ms_hybrid cpu fp16 support

format file

update test case

fix test case on d

update test cases on D

update akg commit id

update test cases
This commit is contained in:
Zichun Ye 2022-04-28 17:40:08 +08:00
parent d98d859756
commit d5322e6f5f
5 changed files with 36 additions and 16 deletions

2
akg

@ -1 +1 @@
Subproject commit 0ddab6d9cad4c4c9faf72585e5a52faf79f55e4d
Subproject commit f8cc452a915b78400328b7cb21be21d4ca5b3590

View File

@ -700,8 +700,11 @@ bool AkgKernelJsonGenerator::CollectJson(const AnfNodePtr &anf_node, nlohmann::j
MS_LOG(ERROR) << "Op[" << anf_node->fullname_with_scope() << "] create single kernel json failed.";
return false;
}
(*kernel_json)[kJsonKeyProcess] = GetProcessorByTarget();
auto process_target = GetProcessorByTarget();
(*kernel_json)[kJsonKeyProcess] = process_target;
if (process_target == "cpu") {
(*kernel_json)[kJsonKeyTargetOption] = kCPUTargetOption;
}
size_t hash_id = std::hash<std::string>()(kernel_json->dump());
kernel_name_ = op_name + "_";
(void)kernel_name_.append(std::to_string(hash_id));
@ -776,7 +779,11 @@ bool AkgKernelJsonGenerator::CollectFusedJson(const std::vector<AnfNodePtr> &anf
// Add parallel fusion information.
GenParallelJson(anf_nodes, input_list, output_list, node_json_map, kernel_json);
(*kernel_json)[kJsonKeyProcess] = GetProcessorByTarget();
auto process_target = GetProcessorByTarget();
(*kernel_json)[kJsonKeyProcess] = process_target;
if (process_target == "cpu") {
(*kernel_json)[kJsonKeyTargetOption] = kCPUTargetOption;
}
size_t hash_id = std::hash<std::string>()(kernel_json->dump());
kernel_name_ = "Fused_";
auto fg = anf_nodes[0]->func_graph();

View File

@ -64,6 +64,10 @@ constexpr auto kJsonKeyBufferStitch = "buffer_stitch";
constexpr auto kJsonKeyStitchOp = "stitch_op";
constexpr auto kJsonKeyStitchAtomicOp = "stitch_atomic_op";
constexpr auto kJsonKeyComputeCapability = "compute_capability";
constexpr auto kJsonKeyTargetOption = "target_option";
// target related compile options
constexpr auto kCPUTargetOption = "-mcpu=core-avx2 -mattr=avx2";
// dump option
struct DumpOption {

View File

@ -159,7 +159,7 @@ INTRIN_GLOBALS = {
**INTRIN_BINARY_OP,
}
INTRIN_GPU_UNARY_OP = {
INTRIN_GENERAL_UNARY_OP = {
'rsqrt': _rsqrt,
'erf': _erf,
'isnan': numpy.isnan,
@ -180,18 +180,20 @@ INTRIN_GPU_UNARY_OP = {
'round': numpy.round,
}
INTRIN_GPU_BINARY_OP = {
INTRIN_CPU_NOT_SUPPORT = ["atan2", "expm1"]
INTRIN_GENERAL_BINARY_OP = {
'ceil_div': lambda a, b: (a + b - 1) // b,
}
INTRIN_GPU = {
**INTRIN_GPU_UNARY_OP,
**INTRIN_GPU_BINARY_OP
INTRIN_GENERAL = {
**INTRIN_GENERAL_UNARY_OP,
**INTRIN_GENERAL_BINARY_OP
}
INTRIN_RUNTIME = {
**INTRIN_GLOBALS,
**INTRIN_GPU
**INTRIN_GENERAL
}
@ -283,16 +285,17 @@ class VariableUsage(ast.NodeVisitor):
raise ValueError(
"In the function {} written in the Hybrid DSL, function call id {} "
"not in intrinsics' list".format(self.func_name, func_id))
if self.device != "GPU" and func_id in list(INTRIN_GPU.keys()):
if (self.device == "Ascend" and func_id in list(INTRIN_GENERAL.keys())) or \
(self.device == "CPU" and func_id in INTRIN_CPU_NOT_SUPPORT):
raise ValueError(
"In the function {} written in the Hybrid DSL, function {} is not available on the "
"device {}".format(self.func_name, func_id, self.device))
if func_id in list(INTRIN_UNARY_OP.keys()) + list(INTRIN_GPU_UNARY_OP.keys()) + list(INTRIN_LOOP.keys()) \
if func_id in list(INTRIN_UNARY_OP.keys()) + list(INTRIN_GENERAL_UNARY_OP.keys()) + list(INTRIN_LOOP.keys()) \
and len(node.args) != 1:
raise TypeError(
"In the function {} written in the Hybrid DSL, function {} "
"expects one input, but get {}".format(self.func_name, func_id, len(node.args)))
if func_id in list(INTRIN_BINARY_OP.keys()) + list(INTRIN_GPU_BINARY_OP.keys()) + \
if func_id in list(INTRIN_BINARY_OP.keys()) + list(INTRIN_GENERAL_BINARY_OP.keys()) + \
list(INTRIN_BUFFER.keys()) and len(node.args) != 2:
raise TypeError(
"In the function {} written in the Hybrid DSL, function {} "

View File

@ -48,7 +48,7 @@ def allocate_and_math_intrin_example(a, b):
for i0 in range(a.shape[0]):
for i1 in range(b.shape[1]):
d[i0, i1] = exp(a[i0, i1])
d[i0, i1] = abs(a[i0, i1])
c[i0, i1] = d[i0, i1] + b[i0, i1]
return c
@ -259,7 +259,10 @@ def test_ms_hybrid_cpu_graph_mode():
pass
else:
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
ms_hybrid_allocate_cpu()
ms_hybrid_cast_with_infer()
ms_hybrid_cast_without_infer()
ms_hybrid_allocate()
ms_hybrid_grid()
@pytest.mark.level0
@ -276,4 +279,7 @@ def test_ms_hybrid_cpu_pynative_mode():
pass
else:
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
ms_hybrid_allocate_cpu()
ms_hybrid_cast_with_infer()
ms_hybrid_cast_without_infer()
ms_hybrid_allocate()
ms_hybrid_grid()