forked from mindspore-Ecosystem/mindspore
!14304 [GraphKernel] Dump GraphKernel split info as text; dump akg kernel launch fail message
From: @dayschan Reviewed-by: @gaoxiong1,@gaoxiong1,@anyrenwei Signed-off-by: @anyrenwei
This commit is contained in:
commit
75fdaaa6aa
|
@ -14,33 +14,57 @@
|
|||
# ============================================================================
|
||||
"""GraphKernel splitter"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import json.decoder as jd
|
||||
import traceback
|
||||
from mindspore import log as logger
|
||||
from . import model
|
||||
from . import utils
|
||||
|
||||
|
||||
def reset_graphmode_for_inplaceassign(graph_list, graph_mode):
|
||||
for i, g in enumerate(graph_list):
|
||||
if any([op['name'] == 'InplaceAssign' for op in g['op_desc']]):
|
||||
graph_mode[i] = 'composite'
|
||||
|
||||
|
||||
def split_with_json(json_str: str):
|
||||
def split_with_json(json_str, flags_str):
|
||||
"""Call costmodel to split GraphKernel"""
|
||||
try:
|
||||
graph_desc = json.loads(json_str)
|
||||
flags = json.loads(flags_str)
|
||||
target = graph_desc['process']
|
||||
comp = model.load_composite(graph_desc)
|
||||
graph_split, graph_mode = model.split(comp.graph, target)
|
||||
is_multi_graph = len(graph_split) > 1
|
||||
graph_list = list(map(comp.dump, graph_split))
|
||||
reset_graphmode_for_inplaceassign(graph_list, graph_mode)
|
||||
_reset_graphmode_for_inplaceassign(graph_list, graph_mode)
|
||||
result = {"multi_graph": is_multi_graph,
|
||||
"graph_desc": graph_list,
|
||||
"graph_mode": graph_mode}
|
||||
_dump_split_info(flags, json_str, comp.graph, graph_split, graph_mode)
|
||||
return json.dumps(result)
|
||||
except jd.JSONDecodeError:
|
||||
logger.error(traceback.format_exc())
|
||||
return None
|
||||
|
||||
|
||||
def _reset_graphmode_for_inplaceassign(graph_list, graph_mode):
|
||||
"""Operator with InplaceAssign should always be composite op"""
|
||||
for i, g in enumerate(graph_list):
|
||||
if any([op['name'] == 'InplaceAssign' for op in g['op_desc']]):
|
||||
graph_mode[i] = 'composite'
|
||||
|
||||
|
||||
def _dump_split_info(flags, graph_json, graph_desc, subgraphs, graph_mode):
|
||||
"""Dump split info as text"""
|
||||
if not flags.get("dump_as_text", False):
|
||||
return
|
||||
utils.create_dir(utils.GRAPH_KERNEL_DUMP_PATH)
|
||||
filename = os.path.join(utils.GRAPH_KERNEL_DUMP_PATH, "graph_kernel_split_mode.txt")
|
||||
with open(filename, "a+") as f:
|
||||
f.write("********** main graph: {} **********\n".format(graph_desc.name))
|
||||
f.write("input json:\n{}\n".format(graph_json))
|
||||
f.write("graph desc:\n{}\n".format(str(graph_desc)))
|
||||
if len(subgraphs) > 1:
|
||||
for i, g in enumerate(subgraphs):
|
||||
f.write("-------- subgraph {}, mode: {} --------\n".format(i, graph_mode[i]))
|
||||
f.write("{}\n".format(str(g)))
|
||||
else:
|
||||
f.write("Graph unchanged.\n")
|
||||
f.write("\n")
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""GraphKernel utils"""
|
||||
import os
|
||||
|
||||
GRAPH_KERNEL_DUMP_PATH = "graph_kernel_dump"
|
||||
|
||||
|
||||
def create_dir(pathname):
|
||||
"""Try to create directory"""
|
||||
if os.path.exists(pathname):
|
||||
return
|
||||
try:
|
||||
os.mkdir(pathname)
|
||||
except OSError:
|
||||
pass
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -107,7 +107,9 @@ bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vect
|
|||
thread_info[5], 0, reinterpret_cast<CUstream>(stream_ptr),
|
||||
reinterpret_cast<void **>(&runtimeargs[0]), 0);
|
||||
if (result != CUDA_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Launch Kernel failed.";
|
||||
const char *msg = nullptr;
|
||||
cuGetErrorName(result, &msg);
|
||||
MS_LOG(ERROR) << "Launch Kernel failed. error: " << msg;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -29,6 +29,7 @@
|
|||
#include "backend/kernel_compiler/akg/akg_kernel_json_decoder.h"
|
||||
#include "backend/optimizer/graph_kernel/graph_kernel_helper.h"
|
||||
#include "debug/anf_ir_dump.h"
|
||||
#include "utils/context/graph_kernel_flags.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
|
@ -572,17 +573,19 @@ class CostModelSplitSchemer : public Splitter::SplitSchemer {
|
|||
|
||||
// call costmodel split function.
|
||||
auto json_desc_str = json_desc.dump();
|
||||
MS_LOG(DEBUG) << "CallPyFn: [" << kGraphKernelSplitFunc << "] with input json:\n" << json_desc_str;
|
||||
auto ret = parse::python_adapter::CallPyFn(kGraphKernelModule, kGraphKernelSplitFunc, json_desc_str);
|
||||
auto flags_str = CollectSplitFlags();
|
||||
MS_LOG(DEBUG) << "CallPyFn: [" << kGraphKernelSplitFunc << "] with input json: " << json_desc_str
|
||||
<< ". flag: " << flags_str;
|
||||
auto ret = parse::python_adapter::CallPyFn(kGraphKernelModule, kGraphKernelSplitFunc, json_desc_str, flags_str);
|
||||
if (py::isinstance<py::none>(ret)) {
|
||||
MS_LOG(ERROR) << "CallPyFn: [" << kGraphKernelSplitFunc << "] return invalid result. input json:\n"
|
||||
<< json_desc_str;
|
||||
<< json_desc_str << ". flag: " << flags_str;
|
||||
return false;
|
||||
}
|
||||
std::string split_graphs_str = py::cast<std::string>(ret);
|
||||
if (split_graphs_str.empty()) {
|
||||
MS_LOG(ERROR) << "CallPyFn: [" << kGraphKernelSplitFunc << "] return invalid result. input json:\n"
|
||||
<< json_desc_str;
|
||||
<< json_desc_str << ". flag: " << flags_str;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -713,6 +716,13 @@ class CostModelSplitSchemer : public Splitter::SplitSchemer {
|
|||
}
|
||||
}
|
||||
|
||||
virtual std::string CollectSplitFlags() {
|
||||
const auto &flags = context::GraphKernelFlags::GetInstance();
|
||||
nlohmann::json flag_json;
|
||||
flag_json["dump_as_text"] = flags.dump_as_text;
|
||||
return flag_json.dump();
|
||||
}
|
||||
|
||||
std::shared_ptr<FuncGraph> func_graph_;
|
||||
AnfNodePtrList topo_all_nodes_;
|
||||
AnfNodePtrList topo_valid_nodes_;
|
||||
|
|
|
@ -49,12 +49,13 @@ class GraphKernelFlags {
|
|||
|
||||
public:
|
||||
/**
|
||||
* dump_as_text, unsupported now.
|
||||
* Dump info as human-readable text.
|
||||
* A directory "graph_kernel_dump" will be created, and all information will be dumped in this directory.
|
||||
*/
|
||||
bool dump_as_text{false};
|
||||
|
||||
/**
|
||||
* opt_level, value from 0 to 3.
|
||||
* Optimization level, value from 0 to 3.
|
||||
* 0: GraphKernel disabled
|
||||
* 1: GraphKernel enabled
|
||||
* 2 and 3 are not supported now.
|
||||
|
@ -93,17 +94,21 @@ class GraphKernelFlags {
|
|||
std::vector<std::string> disable_expand_ops;
|
||||
|
||||
/**
|
||||
* enable_cluster_ops, unsupported now.
|
||||
* Additional clustering operators (case sensitive).
|
||||
* The operators to be added into the default clustering operator list.
|
||||
*/
|
||||
std::vector<std::string> enable_cluster_ops;
|
||||
|
||||
/**
|
||||
* enable_cluster_ops_only, unsupported now.
|
||||
* Clustering operators to be enabled (case sensitive).
|
||||
* Unlike the "enable_cluster_ops", the default list will be overwritten by this list.
|
||||
* Note that the "enable_cluster_ops" and "disable_cluster_ops" will be ignored if this flag is set.
|
||||
*/
|
||||
std::vector<std::string> enable_cluster_ops_only;
|
||||
|
||||
/**
|
||||
* disable_cluster_ops, unsupported now.
|
||||
* Clustering operators to be disabled (case sensitive).
|
||||
* The behavior is undefined when this list overlaps with "enable_cluster_ops".
|
||||
*/
|
||||
std::vector<std::string> disable_cluster_ops;
|
||||
|
||||
|
|
|
@ -26,3 +26,5 @@ CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDi
|
|||
}
|
||||
|
||||
CUresult cuModuleUnload(CUmodule hmod) { return CUDA_SUCCESS; }
|
||||
|
||||
CUresult cuGetErrorName(CUresult error, const char **pStr) { return CUDA_SUCCESS; }
|
||||
|
|
|
@ -46,5 +46,5 @@ CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDi
|
|||
unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ,
|
||||
unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
|
||||
CUresult cuModuleUnload(CUmodule hmod);
|
||||
|
||||
CUresult cuGetErrorName(CUresult error, const char **pStr);
|
||||
#endif // TESTS_UT_STUB_RUNTIME_INCLUDE_CUDA_H_
|
||||
|
|
Loading…
Reference in New Issue