diff --git a/build.sh b/build.sh index 428743f0ffb..cfa657ff3ed 100755 --- a/build.sh +++ b/build.sh @@ -24,7 +24,7 @@ usage() { echo "Usage:" echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" - echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" + echo " [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]" echo "" echo "Options:" @@ -48,6 +48,7 @@ usage() echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" echo " -Q Enable dump memory, default off" echo " -D Enable dumping of function graph ir, default on" + echo " -S Enable async data dump, default off" echo " -z Compile dataset & mindrecord, default on" echo " -M Enable MPI and NCCL for GPU training, gpu default on" echo " -V Specify the minimum required cuda version, default CUDA 10.1" @@ -88,6 +89,7 @@ checkopts() ENABLE_TIMELINE="off" ENABLE_DUMP2PROTO="on" ENABLE_DUMPE2E="off" + ENABLE_DATA_DUMP="off" ENABLE_DUMP_IR="on" COMPILE_MINDDATA="on" ENABLE_MPI="off" @@ -102,7 +104,7 @@ checkopts() ENABLE_PYTHON="on" # Process the options - while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt + while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt do OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') case "${opt}" in @@ -218,6 +220,11 @@ checkopts() ENABLE_DUMPE2E="$OPTARG" echo "enable dump end to end" ;; + S) + check_on_off $OPTARG S + ENABLE_DATA_DUMP="$OPTARG" + echo "enable data dump" + ;; D) check_on_off $OPTARG D ENABLE_DUMP_IR="$OPTARG" @@ -321,6 +328,9 @@ build_mindspore() if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" fi + if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then + CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON" + fi CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" if [[ "X$ENABLE_MPI" = "Xon" ]]; then diff --git a/cmake/options.cmake b/cmake/options.cmake index b01c623377d..2470c25a90c 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -116,6 +116,10 @@ if(ENABLE_DUMP_E2E) add_compile_definitions(ENABLE_DUMP_E2E) endif() +if(ENABLE_DATA_DUMP) + add_compile_definitions(ENABLE_DATA_DUMP) +endif() + if(ENABLE_DEBUGGER) add_compile_definitions(ENABLE_DEBUGGER) endif() diff --git a/config/data_dump.json b/config/data_dump.json new file mode 100644 index 00000000000..fc08f785906 --- /dev/null +++ b/config/data_dump.json @@ -0,0 +1,15 @@ +{ + "DumpSettings": { + "net_name": "ResNet50", + "mode": 1, + "iteration": 0, + "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"] + }, + + "DumpSettingsSpec": { + "net_name": "net name eg:ResNet50", + "mode": "0: dump all kernels, 1: dump kernels in kernels list", + "iteration": "specified iteration ", + "kernels": "op's full scope name which need to be dump" + } +} \ No newline at end of file diff --git a/graphengine b/graphengine index 1c2672868fd..18cf690152a 160000 --- a/graphengine +++ b/graphengine @@ -1 +1 @@ -Subproject commit 1c2672868fda8b1d012c99e5aca73725ac869ba9 +Subproject commit 18cf690152add623ffbddfbbb4674d1b34484ca7 diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 8523475b1f6..58b3ce6881e 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -109,8 +109,12 @@ if (ENABLE_D) file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto") ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER}) + file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/dump/proto/*.proto") + ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP}) + list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS}) list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS}) + list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS}) add_compile_definitions(ENABLE_D) endif () diff --git a/mindspore/ccsrc/debug/CMakeLists.txt b/mindspore/ccsrc/debug/CMakeLists.txt index ba0c5e07ac2..37ffcceeaf5 100644 --- a/mindspore/ccsrc/debug/CMakeLists.txt +++ b/mindspore/ccsrc/debug/CMakeLists.txt @@ -19,6 +19,15 @@ if (ENABLE_DEBUGGER) ) endif (ENABLE_DEBUGGER) +if (ENABLE_D) + list(APPEND _DEBUG_SRC_LIST + "${CMAKE_CURRENT_SOURCE_DIR}/common.cc" + ) + if (ENABLE_DATA_DUMP) + list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/data_dump_parser.cc") + endif(ENABLE_DATA_DUMP) +endif() + if (ENABLE_DUMP_E2E) list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc") endif (ENABLE_DUMP_E2E) diff --git a/mindspore/ccsrc/debug/common.cc b/mindspore/ccsrc/debug/common.cc new file mode 100644 index 00000000000..6caf7e2c393 --- /dev/null +++ b/mindspore/ccsrc/debug/common.cc @@ -0,0 +1,125 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "debug/common.h" + +#include +#include +#include "utils/system/env.h" +#include "utils/system/file_system.h" +#include "utils/log_adapter.h" +#include "utils/context/ms_context.h" + +namespace mindspore { +std::optional Common::GetRealPath(const std::string &input_path) { + std::string out_path; + auto path_split_pos = input_path.find_last_of('/'); + if (path_split_pos == std::string::npos) { + path_split_pos = input_path.find_last_of('\\'); + } + // get real path + char real_path[PATH_MAX] = {0}; + if (path_split_pos != std::string::npos) { + std::string prefix_path = input_path.substr(0, path_split_pos); + if (prefix_path.length() >= PATH_MAX) { + MS_LOG(ERROR) << "Prefix path is too longer!"; + return std::nullopt; + } + std::string last_path = input_path.substr(path_split_pos, input_path.length() - path_split_pos); + auto ret = CreateNotExistDirs(prefix_path); + if (!ret) { + MS_LOG(ERROR) << "CreateNotExistDirs Failed!"; + return std::nullopt; + } + + if (nullptr == realpath(prefix_path.c_str(), real_path)) { + MS_LOG(ERROR) << "dir " << prefix_path << " does not exit."; + return std::nullopt; + } + out_path = std::string(real_path) + last_path; + } + + if (path_split_pos == std::string::npos) { + if (input_path.length() >= PATH_MAX) { + MS_LOG(ERROR) << "Prefix path is too longer!"; + return std::nullopt; + } + if (nullptr == realpath(input_path.c_str(), real_path)) { + MS_LOG(ERROR) << "File " << input_path << " does not exit, it will be created."; + } + out_path = std::string(real_path); + } + return out_path; +} + +bool Common::CreateNotExistDirs(const std::string &path) { + std::shared_ptr fs = system::Env::GetFileSystem(); + MS_EXCEPTION_IF_NULL(fs); + char temp_path[PATH_MAX] = {0}; + if (path.length() > PATH_MAX) { + MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX; + return false; + } + for (uint32_t i = 0; i < path.length(); i++) { + temp_path[i] = path[i]; + if (temp_path[i] == '\\' || temp_path[i] == '/') { + if (i != 0) { + char tmp_char = temp_path[i]; + temp_path[i] = '\0'; + std::string path_handle(temp_path); + if (!fs->FileExist(temp_path)) { + MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating..."; + if (!fs->CreateDir(temp_path)) { + MS_LOG(ERROR) << "Create " << path_handle << " dir error"; + return false; + } + } + temp_path[i] = tmp_char; + } + } + } + + if (!fs->FileExist(path)) { + MS_LOG(INFO) << "Dir " << path << " does not exit, creating..."; + if (!fs->CreateDir(path)) { + MS_LOG(ERROR) << "Create " << path << " dir error"; + return false; + } + } + return true; +} + +std::optional Common::GetConfigFile(const std::string &env) { + if (env.empty()) { + MS_LOG(EXCEPTION) << "Invalid env"; + } + auto config_path_str = std::getenv(env.c_str()); + if (config_path_str == nullptr) { + MS_LOG(ERROR) << "Please export env:" << env; + return {}; + } + MS_LOG(INFO) << "Async Dump Getenv env:" << env << "=" << config_path_str; + + std::string dump_config_file(config_path_str); + std::shared_ptr fs = system::Env::GetFileSystem(); + MS_EXCEPTION_IF_NULL(fs); + if (!fs->FileExist(dump_config_file)) { + MS_LOG(ERROR) << dump_config_file << " not exist."; + return {}; + } + return dump_config_file; +} +} // namespace mindspore diff --git a/mindspore/ccsrc/debug/common.h b/mindspore/ccsrc/debug/common.h new file mode 100644 index 00000000000..8d4a6cb4674 --- /dev/null +++ b/mindspore/ccsrc/debug/common.h @@ -0,0 +1,36 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_ +#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_ + +#include +#include +#include "utils/contract.h" + +namespace mindspore { +class Common { + public: + Common() = default; + ~Common() = default; + static std::optional GetRealPath(const std::string &input_path); + static std::optional GetConfigFile(const std::string &env); + + private: + static bool CreateNotExistDirs(const std::string &path); +}; +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_ diff --git a/mindspore/ccsrc/debug/data_dump_parser.cc b/mindspore/ccsrc/debug/data_dump_parser.cc new file mode 100644 index 00000000000..259ec388d32 --- /dev/null +++ b/mindspore/ccsrc/debug/data_dump_parser.cc @@ -0,0 +1,152 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "debug/data_dump_parser.h" + +#include +#include "utils/context/ms_context.h" +#include "debug/common.h" + +constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH"; +constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP"; +constexpr auto kDataDumpPath = "DATA_DUMP_PATH"; +namespace mindspore { +void DataDumpParser::ResetParam() { + enable_ = false; + net_name_.clear(); + dump_mode_ = 0; + dump_step_ = 0; + kernel_set_.clear(); +} + +bool DataDumpParser::DumpEnabled() const { + auto enable_dump = std::getenv(kEnableDataDump); + if (!enable_dump) { + MS_LOG(WARNING) << "[DataDump] enable dump is null. Please export ENABLE_DATA_DUMP"; + return false; + } + + auto enabled = std::atoi(enable_dump); + if (enabled != 1) { + MS_LOG(WARNING) << "[DataDump] Please export ENABLE_DATA_DUMP=1"; + return false; + } + + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + if (context->execution_mode() == kPynativeMode) { + MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump"; + } + return true; +} + +std::optional DataDumpParser::GetDumpPath() const { + auto dump_path = std::getenv(kDataDumpPath); + if (!dump_path) { + MS_LOG(ERROR) << "[DataDump] dump path is null. Please export DATA_DUMP_PATH"; + return {}; + } + std::string dump_path_str(dump_path); + return dump_path_str; +} + +void DataDumpParser::ParseDumpConfig() { + std::lock_guard guard(lock_); + MS_LOG(INFO) << "[DataDump] parse start"; + if (!DumpEnabled()) { + MS_LOG(INFO) << "[DataDump] dump not enable"; + return; + } + + ResetParam(); + + auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah); + if (!dump_config_file.has_value()) { + MS_LOG(EXCEPTION) << "[DataDump] Get config file failed"; + } + + std::ifstream json_file(dump_config_file.value()); + if (!json_file.is_open()) { + MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed."; + } + + nlohmann::json j; + json_file >> j; + if (j.find("DumpSettings") == j.end()) { + MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist."; + } + + nlohmann::json dump_settings = j.at("DumpSettings"); + // convert json to string + std::stringstream ss; + ss << dump_settings; + std::string cfg = ss.str(); + MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg; + if (!IsConfigExist(dump_settings)) { + MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid"; + } + + if (!ParseDumpSetting(dump_settings)) { + MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed"; + } +} + +bool DataDumpParser::NeedDump(const std::string &op_full_name) const { + if (!DumpEnabled()) { + return false; + } + if (dump_mode_ == 0) { + return true; + } + auto iter = kernel_set_.find(op_full_name); + return iter != kernel_set_.end(); +} + +bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const { + if (dump_settings.find("mode") == dump_settings.end() || dump_settings.find("net_name") == dump_settings.end() || + dump_settings.find("iteration") == dump_settings.end() || dump_settings.find("kernels") == dump_settings.end()) { + MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist."; + return false; + } + return true; +} + +bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) { + auto mode = dump_settings.at("mode"); + auto net_name = dump_settings.at("net_name"); + auto iteration = dump_settings.at("iteration"); + auto kernels = dump_settings.at("kernels"); + if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) { + MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid."; + enable_ = false; + return false; + } + + enable_ = true; + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + dump_mode_ = mode; + net_name_ = net_name; + dump_step_ = iteration; + for (const auto &kernel : kernels) { + auto kernel_str = kernel.dump(); + kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end()); + MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str; + kernel_set_.insert(kernel_str); + } + return true; +} +} // namespace mindspore diff --git a/mindspore/ccsrc/debug/data_dump_parser.h b/mindspore/ccsrc/debug/data_dump_parser.h new file mode 100644 index 00000000000..751c61dd1a1 --- /dev/null +++ b/mindspore/ccsrc/debug/data_dump_parser.h @@ -0,0 +1,61 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ +#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ + +#include +#include +#include +#include +#include "nlohmann/json.hpp" +#include "common/utils.h" + +namespace mindspore { +class DataDumpParser { + public: + static DataDumpParser &GetInstance() { + static DataDumpParser instance; + return instance; + } + void ParseDumpConfig(); + bool NeedDump(const std::string &op_full_name) const; + bool DumpEnabled() const; + std::optional GetDumpPath() const; + bool enable() const { return enable_; } + const std::string &net_name() const { return net_name_; } + uint32_t dump_mode() const { return dump_mode_; } + uint32_t dump_step() const { return dump_step_; } + const std::set &kernel_set() const { return kernel_set_; } + + private: + DataDumpParser() = default; + virtual ~DataDumpParser() = default; + DISABLE_COPY_AND_ASSIGN(DataDumpParser); + + void ResetParam(); + bool IsConfigExist(const nlohmann::json &dump_settings) const; + bool ParseDumpSetting(const nlohmann::json &dump_settings); + + std::mutex lock_; + bool enable_{false}; + std::string net_name_; + uint32_t dump_mode_{0}; + uint32_t dump_step_{0}; + std::set kernel_set_; +}; +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_ diff --git a/mindspore/ccsrc/debug/e2e_dump.cc b/mindspore/ccsrc/debug/e2e_dump.cc index 78a331fc278..9037a6d00b3 100644 --- a/mindspore/ccsrc/debug/e2e_dump.cc +++ b/mindspore/ccsrc/debug/e2e_dump.cc @@ -17,12 +17,14 @@ #include #include #include +#include #include #include "utils/log_adapter.h" #include "utils/system/file_system.h" #include "utils/system/env.h" #include "utils/convert_utils.h" #include "utils/context/ms_context.h" +#include "debug/common.h" using json = nlohmann::json; @@ -158,100 +160,19 @@ bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len) return false; } - std::string realpath; - bool ret = GetRealPath(filename, &realpath); - if (!ret) { + auto realpath = Common::GetRealPath(filename); + if (!realpath.has_value()) { MS_LOG(ERROR) << "Get real path failed."; return false; } std::ofstream fd; - fd.open(realpath, std::ios::binary | std::ios::out); + fd.open(realpath.value(), std::ios::binary | std::ios::out); if (!fd.is_open()) { - MS_LOG(ERROR) << "Open file " << realpath << " fail."; + MS_LOG(ERROR) << "Open file " << realpath.value() << " fail."; return false; } (void)fd.write(reinterpret_cast(data), SizeToLong(len)); fd.close(); return true; } - -bool Dump::GetRealPath(const std::string &inpath, std::string *outpath) { - MS_EXCEPTION_IF_NULL(outpath); - auto path_split_pos = inpath.find_last_of('/'); - if (path_split_pos == std::string::npos) { - path_split_pos = inpath.find_last_of('\\'); - } - // get real path - char real_path[PATH_MAX] = {0}; - if (path_split_pos != std::string::npos) { - std::string prefix_path = inpath.substr(0, path_split_pos); - if (prefix_path.length() >= PATH_MAX) { - MS_LOG(ERROR) << "Prefix path is too longer!"; - return false; - } - std::string last_path = inpath.substr(path_split_pos, inpath.length() - path_split_pos); - auto ret = CreateNotExistDirs(prefix_path); - if (ret == false) { - MS_LOG(ERROR) << "CreateNotExistDirs Failed!"; - return false; - } - - if (nullptr == realpath(prefix_path.c_str(), real_path)) { - MS_LOG(ERROR) << "dir " << prefix_path << " does not exit."; - return false; - } - *outpath = std::string(real_path) + last_path; - } - - if (path_split_pos == std::string::npos) { - if (inpath.length() >= PATH_MAX) { - MS_LOG(ERROR) << "Prefix path is too longer!"; - return false; - } - if (nullptr == realpath(inpath.c_str(), real_path)) { - MS_LOG(ERROR) << "File " << inpath << " does not exit, it will be created."; - } - *outpath = std::string(real_path); - } - - return true; -} - -bool Dump::CreateNotExistDirs(const std::string &path) { - std::shared_ptr fs = system::Env::GetFileSystem(); - MS_EXCEPTION_IF_NULL(fs); - char temp_path[PATH_MAX] = {0}; - if (path.length() > PATH_MAX) { - MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX; - return false; - } - for (uint32_t i = 0; i < path.length(); i++) { - temp_path[i] = path[i]; - if (temp_path[i] == '\\' || temp_path[i] == '/') { - if (i != 0) { - char tmp_char = temp_path[i]; - temp_path[i] = '\0'; - std::string path_handle(temp_path); - if (!fs->FileExist(temp_path)) { - MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating..."; - if (!fs->CreateDir(temp_path)) { - MS_LOG(ERROR) << "Create " << path_handle << " dir error"; - return false; - } - } - temp_path[i] = tmp_char; - } - } - } - - if (!fs->FileExist(path)) { - MS_LOG(INFO) << "Dir " << path << " does not exit, creating..."; - if (!fs->CreateDir(path)) { - MS_LOG(ERROR) << "Create " << path << " dir error"; - return false; - } - } - - return true; -} } // namespace mindspore diff --git a/mindspore/ccsrc/debug/e2e_dump.h b/mindspore/ccsrc/debug/e2e_dump.h index 4c3e8308da7..acde1626cb4 100644 --- a/mindspore/ccsrc/debug/e2e_dump.h +++ b/mindspore/ccsrc/debug/e2e_dump.h @@ -59,10 +59,6 @@ class Dump { uint32_t cur_iter_; std::vector dump_kernels_; - static bool GetRealPath(const std::string &inpath, std::string *outpath); - - static bool CreateNotExistDirs(const std::string &path); - private: bool ParseDumpConfig(const std::string &dump_config_file); bool IsConfigExist(const nlohmann::json &dumpSettings); diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc index 8b176af5fc0..42b1d93dd55 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc @@ -42,6 +42,7 @@ #include "device/ascend/ascend_memory_manager.h" #include "debug/tensor_load.h" +using ge::model_runner::ModelRunner; using mindspore::device::ascend::ProfilingManager; using mindspore::device::ascend::ProfilingUtils; using mindspore::device::ascend::tasksink::TaskGenerator; @@ -90,9 +91,16 @@ std::string GetRankId() { AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); } void AscendKernelRuntime::ClearGraphModelMap() { +#ifdef ENABLE_DATA_DUMP + for (auto &iter : graph_data_dumper_) { + MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first; + iter.second->UnloadDumpInfo(); + } + graph_data_dumper_.clear(); +#endif for (auto &iter : graph_model_map_) { MS_LOG(INFO) << "Ge UnloadModel " << iter.first; - auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter.first); + auto ret = ModelRunner::Instance().UnloadModel(iter.first); if (!ret) { MS_LOG(ERROR) << "UnloadModel failed"; } @@ -107,7 +115,7 @@ void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) { return; } MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first; - auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter->first); + auto ret = ModelRunner::Instance().UnloadModel(iter->first); if (!ret) { MS_LOG(ERROR) << "UnloadModel failed"; } @@ -159,6 +167,10 @@ bool AscendKernelRuntime::Init() { } #endif +#ifdef ENABLE_DATA_DUMP + DataDumpParser::GetInstance().ParseDumpConfig(); +#endif + // Start up profiling before rtSetDevice ret = ProfilingManager::GetInstance().StartupProfiling(device_id_); if (!ret) { @@ -440,7 +452,7 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { << ", wait_active_stream_list size:" << wait_active_stream_list.size() << ", force_copy_stream_list size:" << force_copy_stream_list.size(); std::vector> empty_list; - std::shared_ptr model = std::make_shared( + auto model = std::make_shared( task_info_list, empty_list, empty_list, empty_list, empty_list, wait_active_stream_list, force_copy_stream_list, 0, 0, 0, 0, 0, 0, resource_manager.get_cur_stream_num(), label_assign_instance.GetLabelNum(NOT_NULL(graph)), resource_manager.get_cur_event_num(), 0); @@ -477,21 +489,45 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { std::shared_ptr listener; MS_LOG(INFO) << "LoadDavinciModel mode_id:" << model_iter->first; - bool status = ge::model_runner::ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, - model_iter->second, listener); + bool status = + ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener); if (!status) { MS_LOG(EXCEPTION) << "Load Task Failed"; } if (ProfilingManager::GetInstance().IsProfiling()) { - auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first); - auto stream_ids = ge::model_runner::ModelRunner::Instance().GetStreamIdList(model_iter->first); + auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first); + auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first); ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph)); } + +#ifdef ENABLE_DATA_DUMP + LaunchDataDump(NOT_NULL(graph)); +#endif + if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) { + MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed"; + return false; + } return true; } +#ifdef ENABLE_DATA_DUMP +void AscendKernelRuntime::LaunchDataDump(NotNull graph) { + if (!DataDumpParser::GetInstance().DumpEnabled()) { + return; + } + auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph->graph_id()); + auto data_dumper = std::make_shared(graph.get(), runtime_info_map); + MS_EXCEPTION_IF_NULL(data_dumper); + data_dumper->LoadDumpInfo(); + auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper); + if (!ret.second) { + MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed"; + } +} +#endif + void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) { - auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(graph_id); + auto task_ids = ModelRunner::Instance().GetTaskIdList(graph_id); auto graph_task_names = ProfilingUtils::graph_kernel_name(); auto iter = graph_task_names.find(graph_id); if (iter != graph_task_names.end()) { @@ -524,7 +560,7 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) { return false; } - bool status = ge::model_runner::ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors); + bool status = ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors); if (!status) { MS_LOG(ERROR) << "Run task failed"; DebugTaskIdName(graph->graph_id()); diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h index 69ba8b295a8..771c3f8c4f8 100644 --- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h +++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h @@ -24,6 +24,10 @@ #include "framework/ge_runtime/davinci_model.h" #include "device/kernel_runtime_manager.h" #include "session/session_basic.h" +#ifdef ENABLE_DATA_DUMP +#include "debug/data_dump_parser.h" +#include "device/ascend/dump/data_dumper.h" +#endif using ge::model_runner::TaskInfo; using std::unordered_map; @@ -66,6 +70,10 @@ class AscendKernelRuntime : public KernelRuntime { bool initialized_{false}; unordered_map>> task_map_; unordered_map> graph_model_map_; +#ifdef ENABLE_DATA_DUMP + void LaunchDataDump(NotNull graph); + unordered_map> graph_data_dumper_; +#endif }; MS_REG_KERNEL_RUNTIME(kAscendDevice, AscendKernelRuntime); diff --git a/mindspore/ccsrc/device/ascend/dump/data_dumper.cc b/mindspore/ccsrc/device/ascend/dump/data_dumper.cc new file mode 100644 index 00000000000..57ac0e0947d --- /dev/null +++ b/mindspore/ccsrc/device/ascend/dump/data_dumper.cc @@ -0,0 +1,282 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifdef ENABLE_DATA_DUMP +#include "device/ascend/dump/data_dumper.h" + +#include +#include +#include +#include "utility" +#include "session/anf_runtime_algorithm.h" +#include "runtime/mem.h" +#include "runtime/kernel.h" +#include "device/ascend/dump/ge_dump.h" +#include "proto/op_mapping_info.pb.h" +#include "utils/context/ms_context.h" +#include "debug/data_dump_parser.h" + +constexpr uint32_t kAicpuLoadFlag = 1; +constexpr uint32_t kAicpuUnloadFlag = 0; +constexpr uint32_t kTupleTaskId = 0; +constexpr uint32_t kTupleStreamId = 1; +constexpr uint32_t kTupleArgs = 2; +constexpr uint32_t kCurrentStepTensorIndex = 0; +constexpr uint32_t kCurrentEpochTensorIndex = 1; +constexpr uint32_t kStepsPerEpochTensorIndex = 2; + +namespace mindspore { +namespace device { +namespace ascend { +void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull task); +void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull task); +void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr); + +DataDumper::~DataDumper() { + ReleaseDevMem(&dev_load_mem_); + ReleaseDevMem(&dev_unload_mem_); +} + +void DataDumper::LoadDumpInfo() { + MS_LOG(INFO) << "[DataDump] LoadDumpInfo start"; + MS_EXCEPTION_IF_NULL(kernel_graph_); + aicpu::dump::OpMappingInfo dump_info; + SetOpMappingInfo(NOT_NULL(&dump_info)); + + auto kernels = kernel_graph_->execution_order(); + for (const auto &kernel : kernels) { + MS_EXCEPTION_IF_NULL(kernel); + if (!KernelNeedDump(kernel)) { + continue; + } + MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope(); + dump_kernel_names_.emplace_back(kernel->fullname_with_scope()); + + aicpu::dump::Task task; + ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task)); + MS_EXCEPTION_IF_NULL(dump_info.mutable_task()); + dump_info.mutable_task()->Add(std::move(task)); + } + RtLoadDumpData(dump_info, &dev_load_mem_); + load_flag_ = true; + MS_LOG(INFO) << "[DataDump] LoadDumpInfo end"; +} + +void DataDumper::SetOpMappingInfo(NotNull dump_info) const { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + MS_EXCEPTION_IF_NULL(kernel_graph_); + auto dump_path = DataDumpParser::GetInstance().GetDumpPath(); + if (!dump_path.has_value()) { + MS_LOG(EXCEPTION) << "Dump path invalid"; + } + auto device_id = context_ptr->device_id(); + dump_info->set_dump_path(dump_path.value() + "_" + std::to_string(device_id) + "/"); + MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path.value(); + + dump_info->set_model_name(DataDumpParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id())); + dump_info->set_dump_step(std::to_string(DataDumpParser::GetInstance().dump_step())); + dump_info->set_model_id(kernel_graph_->graph_id()); + dump_info->set_flag(kAicpuLoadFlag); + + const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors(); + if (input_ctrl_tensors == nullptr || input_ctrl_tensors->size() < 3) { + MS_LOG(INFO) << "[DataDump] Not data sink mode, input_ctrl_tensor"; + return; + } + const auto ¤t_step_tensor = input_ctrl_tensors->at(kCurrentStepTensorIndex); + const auto &currnet_epoch_tensor = input_ctrl_tensors->at(kCurrentEpochTensorIndex); + const auto &steps_per_epoch_tensor = input_ctrl_tensors->at(kStepsPerEpochTensorIndex); + + MS_EXCEPTION_IF_NULL(current_step_tensor); + MS_EXCEPTION_IF_NULL(currnet_epoch_tensor); + MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor); + MS_EXCEPTION_IF_NULL(current_step_tensor->device_address()); + MS_EXCEPTION_IF_NULL(currnet_epoch_tensor->device_address()); + MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor->device_address()); + + void *current_step = current_step_tensor->device_address()->ptr_; + void *current_epoch = currnet_epoch_tensor->device_address()->ptr_; + void *steps_per_epoch = steps_per_epoch_tensor->device_address()->ptr_; + + if (current_epoch != nullptr && current_step != nullptr && steps_per_epoch != nullptr) { + dump_info->set_step_id_addr(reinterpret_cast(current_epoch)); + dump_info->set_loop_cond_addr(reinterpret_cast(current_step)); + dump_info->set_iterations_per_loop_addr(reinterpret_cast(steps_per_epoch)); + } else { + MS_LOG(INFO) << "Invalid ctrl tensor device address"; + } +} + +bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const { + if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL && + AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) { + return false; + } + MS_EXCEPTION_IF_NULL(kernel); + const auto &kernel_set = DataDumpParser::GetInstance().kernel_set(); + return kernel_set.find(kernel->fullname_with_scope()) != kernel_set.end(); +} + +void DataDumper::UnloadDumpInfo() { + if (!load_flag_) { + MS_LOG(WARNING) << "Load not success, no need to unload"; + return; + } + MS_EXCEPTION_IF_NULL(kernel_graph_); + MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << kernel_graph_->graph_id(); + + aicpu::dump::OpMappingInfo op_mapping_info; + op_mapping_info.set_model_id(kernel_graph_->graph_id()); + op_mapping_info.set_flag(kAicpuUnloadFlag); + + for (const auto &kernel_name : dump_kernel_names_) { + aicpu::dump::Task task; + auto iter = runtime_info_map_.find(kernel_name); + if (iter == runtime_info_map_.end()) { + MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map"; + } + MS_EXCEPTION_IF_NULL(iter->second); + auto task_id = std::get(*iter->second); + task.set_task_id(task_id); + MS_EXCEPTION_IF_NULL(op_mapping_info.mutable_task()); + op_mapping_info.mutable_task()->Add(std::move(task)); + } + + RtLoadDumpData(op_mapping_info, &dev_unload_mem_); +} + +void DataDumper::ReleaseDevMem(void **ptr) const { + if (ptr == nullptr) { + return; + } + if (*ptr != nullptr) { + rtError_t rt_error = rtFree(*ptr); + if (rt_error != RT_ERROR_NONE) { + MS_LOG(ERROR) << "[DataDump] Call rtFree failed, ret:" << rt_error; + } + *ptr = nullptr; + } +} + +void DataDumper::ConstructDumpTask(NotNull kernel, NotNull dump_task) const { + dump_task->set_end_graph(false); + auto iter = runtime_info_map_.find(kernel->fullname_with_scope()); + if (iter == runtime_info_map_.end()) { + MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map"; + } + MS_EXCEPTION_IF_NULL(iter->second); + auto task_id = std::get(*iter->second); + auto stream_id = std::get(*iter->second); + auto args = std::get(*iter->second); + MS_LOG(INFO) << "[DataDump] Get runtime info task_id:" << task_id << " stream_id:" << stream_id; + + dump_task->set_task_id(task_id); + dump_task->set_stream_id(stream_id); + MS_EXCEPTION_IF_NULL(dump_task->mutable_op()); + dump_task->mutable_op()->set_op_name(kernel->fullname_with_scope()); + dump_task->mutable_op()->set_op_type(AnfAlgo::GetCNodeName(kernel.get())); + + DumpKernelOutput(kernel, args, dump_task); + DumpKernelInput(kernel, args, dump_task); +} + +void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) { + std::string proto_str; + size_t proto_size = dump_info.ByteSizeLong(); + bool ret = dump_info.SerializeToString(&proto_str); + if (!ret || proto_size == 0) { + MS_LOG(EXCEPTION) << "[DataDump] Protobuf SerializeToString failed, proto size %zu."; + } + + rtError_t rt_ret = rtMalloc(ptr, proto_size, RT_MEMORY_HBM); + if (rt_ret != RT_ERROR_NONE) { + MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed"; + } + + if (ptr == nullptr) { + MS_LOG(ERROR) << "[DataDump] rtMalloc failed, ptr is nullptr"; + return; + } + rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { + MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed"; + } + + MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start"; + rt_ret = rtDatadumpInfoLoad(*ptr, proto_size); + if (rt_ret != RT_ERROR_NONE) { + MS_LOG(EXCEPTION) << "[DataDump] Call rtDatadumpInfoLoad failed"; + } +} + +void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull task) { + MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope(); + auto input_size = AnfAlgo::GetInputTensorNum(kernel); + auto output_size = AnfAlgo::GetOutputTensorNum(kernel); + uint64_t offset = sizeof(void *) * input_size; + for (size_t i = 0; i < output_size; ++i) { + auto data_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); + auto output_format = AnfAlgo::GetOutputFormat(kernel, i); + auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i); + + aicpu::dump::Output output; + output.set_data_type(GetGeDataType(data_type)); + output.set_format(GetGeFormat(output_format, output_shape.size())); + MS_EXCEPTION_IF_NULL(output.mutable_shape()); + for (auto dim : output_shape) { + output.mutable_shape()->add_dim(dim); + } + output.set_original_output_format(GetGeFormat(output_format, output_shape.size())); + output.set_address(static_cast(reinterpret_cast(args)) + offset); + MS_EXCEPTION_IF_NULL(task->mutable_output()); + task->mutable_output()->Add(std::move(output)); + offset += sizeof(void *); + } +} + +void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull task) { + MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope(); + auto input_size = AnfAlgo::GetInputTensorNum(kernel); + uint64_t offset = 0; + for (size_t i = 0; i < input_size; ++i) { + aicpu::dump::Input input; + auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i); + auto input_node = input_node_with_index.first; + auto input_index = input_node_with_index.second; + std::string output_format = AnfAlgo::GetOutputFormat(input_node, input_index); + auto output_type = AnfAlgo::GetOutputDeviceDataType(input_node, input_index); + if (output_type == kTypeUnknown) { + MS_LOG(WARNING) << "[DataDump] It is not suggested to use a lonely weight parameter as the output of graph"; + output_type = AnfAlgo::GetOutputInferDataType(input_node, input_index); + } + auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index); + + input.set_data_type(GetGeDataType(output_type)); + input.set_format(GetGeFormat(output_format, output_shape.size())); + MS_EXCEPTION_IF_NULL(input.mutable_shape()); + for (auto dim : output_shape) { + input.mutable_shape()->add_dim(dim); + } + input.set_address(static_cast(reinterpret_cast(args)) + offset); + MS_EXCEPTION_IF_NULL(task->mutable_input()); + task->mutable_input()->Add(std::move(input)); + offset += sizeof(void *); + } +} +} // namespace ascend +} // namespace device +} // namespace mindspore +#endif diff --git a/mindspore/ccsrc/device/ascend/dump/data_dumper.h b/mindspore/ccsrc/device/ascend/dump/data_dumper.h new file mode 100644 index 00000000000..65b01c61c45 --- /dev/null +++ b/mindspore/ccsrc/device/ascend/dump/data_dumper.h @@ -0,0 +1,69 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_ +#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_ +#ifdef ENABLE_DATA_DUMP +#include +#include +#include +#include +#include +#include "session/kernel_graph.h" + +namespace aicpu { +namespace dump { +class OpMappingInfo; +class Task; +} // namespace dump +} // namespace aicpu +namespace mindspore { +namespace device { +namespace ascend { +// tuple(op_name, task_id, stream_id, args) +using RuntimeInfo = std::tuple; +class DataDumper { + public: + DataDumper(const session::KernelGraph *kernel_graph, + const std::map> &runtime_info_map) + : load_flag_(false), + dev_load_mem_(nullptr), + dev_unload_mem_(nullptr), + kernel_graph_(kernel_graph), + runtime_info_map_(runtime_info_map) {} + ~DataDumper(); + void LoadDumpInfo(); + + void UnloadDumpInfo(); + + private: + void ReleaseDevMem(void **ptr) const; + bool KernelNeedDump(const CNodePtr &kernel) const; + void SetOpMappingInfo(NotNull dump_info) const; + void ConstructDumpTask(NotNull kernel, NotNull dump_task) const; + + bool load_flag_; + void *dev_load_mem_; + void *dev_unload_mem_; + std::vector dump_kernel_names_; + const session::KernelGraph *kernel_graph_; + std::map> runtime_info_map_; +}; +} // namespace ascend +} // namespace device +} // namespace mindspore +#endif +#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_ diff --git a/mindspore/ccsrc/device/ascend/dump/ge_dump.h b/mindspore/ccsrc/device/ascend/dump/ge_dump.h new file mode 100644 index 00000000000..eae70c4b0b7 --- /dev/null +++ b/mindspore/ccsrc/device/ascend/dump/ge_dump.h @@ -0,0 +1,120 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_ +#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_ + +#include +#include +#include "proto/ge_dtype.pb.h" +#include "ir/dtype/type_id.h" +#include "utils/utils.h" + +namespace mindspore { +namespace device { +namespace ascend { +static ge::proto::DataType GetGeDataType(TypeId type_id) { + static const std::map data_type_map = { + {TypeId::kTypeUnknown, ge::proto::DT_UNDEFINED}, {TypeId::kNumberTypeFloat32, ge::proto::DT_FLOAT}, + {TypeId::kNumberTypeFloat16, ge::proto::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::proto::DT_INT8}, + {TypeId::kNumberTypeUInt8, ge::proto::DT_UINT8}, {TypeId::kNumberTypeInt16, ge::proto::DT_INT16}, + {TypeId::kNumberTypeUInt16, ge::proto::DT_UINT16}, {TypeId::kNumberTypeInt32, ge::proto::DT_INT32}, + {TypeId::kNumberTypeInt64, ge::proto::DT_INT64}, {TypeId::kNumberTypeUInt32, ge::proto::DT_UINT32}, + {TypeId::kNumberTypeUInt64, ge::proto::DT_UINT64}, {TypeId::kNumberTypeBool, ge::proto::DT_BOOL}, + {TypeId::kNumberTypeFloat64, ge::proto::DT_DOUBLE}, + }; + MS_LOG(INFO) << "Vm origin type_id:" << type_id; + auto iter = data_type_map.find(type_id); + if (iter == data_type_map.end()) { + MS_LOG(EXCEPTION) << "Invalid data type:" << type_id; + } + return iter->second; +} + +enum GeFormat { + kFormat_NCHW = 0, // NCHW + kFormat_NHWC, // NHWC + kFormat_ND, // Nd Tensor + kFormat_NC1HWC0, // NC1HWC0 + kFormat_FRACTAL_Z, // FRACTAL_Z + kFormat_NC1C0HWPAD, + kFormat_NHWC1C0, + kFormat_FSR_NCHW, + kFormat_FRACTAL_DECONV, + kFormat_C1HWNC0, + kFormat_FRACTAL_DECONV_TRANSPOSE, + kFormat_FRACTAL_DECONV_SP_STRIDE_TRANS, + kFormat_NC1HWC0_C04, // NC1HWC0, C0 =4 + kFormat_FRACTAL_Z_C04, // FRACZ, C0 =4 + kFormat_CHWN, + kFormat_FRACTAL_DECONV_SP_STRIDE8_TRANS, + kFormat_HWCN, + kFormat_NC1KHKWHWC0, // KH,KW kernel h& kernel w maxpooling max output format + kFormat_BN_WEIGHT, + kFormat_FILTER_HWCK, // filter input tensor format + kFormat_HASHTABLE_LOOKUP_LOOKUPS = 20, + kFormat_HASHTABLE_LOOKUP_KEYS, + kFormat_HASHTABLE_LOOKUP_VALUE, + kFormat_HASHTABLE_LOOKUP_OUTPUT, + kFormat_HASHTABLE_LOOKUP_HITS = 24, + kFormat_C1HWNCoC0, + kFormat_MD, + kFormat_NDHWC, + kFormat_FRACTAL_ZZ, + kFormat_FRACTAL_NZ, + kFormat_NCDHW, + kFormat_DHWCN, // 3D filter input tensor format + kFormat_NDC1HWC0, + kFormat_FRACTAL_Z_3D, + kFormat_CN, + kFormat_NC, + kFormat_DHWNC, + kFormat_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format + kFormat_RESERVED, + kFormat_ALL +}; + +static GeFormat GetGeFormat(const std::string &format, size_t shape_size) { + static const std::map format_map = { + // default format: nchw, fractal_nz? + {kOpFormat_DEFAULT, kFormat_NCHW}, + {kOpFormat_NC1KHKWHWC0, kFormat_NC1KHKWHWC0}, + {kOpFormat_ND, kFormat_ND}, + {kOpFormat_NCHW, kFormat_NCHW}, + {kOpFormat_NHWC, kFormat_NHWC}, + {kOpFormat_HWCN, kFormat_HWCN}, + {kOpFormat_NC1HWC0, kFormat_NC1HWC0}, + {kOpFormat_FRAC_Z, kFormat_FRACTAL_Z}, + {kOpFormat_FRAC_NZ, kFormat_FRACTAL_NZ}, + {kOpFormat_C1HWNCoC0, kFormat_C1HWNCoC0}, + {kOpFormat_NC1HWC0_C04, kFormat_NC1HWC0_C04}, + {kOpFormat_FRACTAL_Z_C04, kFormat_FRACTAL_Z_C04}, + {kOpFormat_NDHWC, kFormat_NDHWC}, + }; + MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size; + if (format == kOpFormat_DEFAULT) { + return shape_size == 4 ? kFormat_NCHW : kFormat_ND; + } + auto iter = format_map.find(format); + if (iter == format_map.end()) { + MS_LOG(EXCEPTION) << "Invalid format:" << format; + } + return iter->second; +} +} // namespace ascend +} // namespace device +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_ diff --git a/mindspore/ccsrc/device/ascend/dump/proto/ge_dtype.proto b/mindspore/ccsrc/device/ascend/dump/proto/ge_dtype.proto new file mode 100644 index 00000000000..7c690524d9d --- /dev/null +++ b/mindspore/ccsrc/device/ascend/dump/proto/ge_dtype.proto @@ -0,0 +1,49 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +package ge.proto; + +enum DataType +{ + DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set. + DT_FLOAT = 1; // float type + DT_FLOAT16 = 2; // fp16 type + DT_INT8 = 3; // int8 type + DT_UINT8 = 4; // uint8 type + DT_INT16 = 5; // int16 type + DT_UINT16 = 6; // uint16 type + DT_INT32 = 7; // + DT_INT64 = 8; // int64 type + DT_UINT32 = 9; // unsigned int32 + DT_UINT64 = 10; // unsigned int64 + DT_BOOL = 11; // bool type + DT_DOUBLE = 12; // double type + DT_STRING = 13; // string type + DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */ + DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */ + DT_COMPLEX64 = 16; // complex64 type + DT_COMPLEX128 = 17; // complex128 type + DT_QINT8 = 18; // qint8 type + DT_QINT16 = 19; // qint16 type + DT_QINT32 = 20; // qint32 type + DT_QUINT8 = 21; // quint8 type + DT_QUINT16 = 22; // quint16 type + DT_RESOURCE = 23; // resource type + DT_STRING_REF = 24; // string_ref type + DT_DUAL = 25; /**< dual output type */ +} \ No newline at end of file diff --git a/mindspore/ccsrc/device/ascend/dump/proto/op_mapping_info.proto b/mindspore/ccsrc/device/ascend/dump/proto/op_mapping_info.proto new file mode 100644 index 00000000000..d3377c655d7 --- /dev/null +++ b/mindspore/ccsrc/device/ascend/dump/proto/op_mapping_info.proto @@ -0,0 +1,78 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; +package aicpu.dump; + +message Shape { + repeated uint64 dim = 1; +} + +message Output { + int32 data_type = 1; + int32 format = 2; + Shape shape = 3; + uint64 address = 4; + string original_name = 5; + int32 original_output_index = 6; + int32 original_output_data_type = 7; + int32 original_output_format = 8; + uint64 size = 9; +}; + +message Input { + int32 data_type = 1; + int32 format = 2; + Shape shape = 3; + uint64 address = 4; + uint64 size = 5; +} + +message Op { + string op_name = 1; + string op_type = 2; +}; + +message Task { + uint32 task_id = 1; + uint32 stream_id = 2; + Op op = 3; + repeated Output output = 4; + bool end_graph = 5; + repeated Input input = 6; +}; + +message OpMappingInfo { + string dump_path = 1; + oneof model_name_param { + string model_name = 2; + } + oneof model_id_param { + uint32 model_id = 3; + } + oneof step_id { + uint64 step_id_addr = 4; + } + oneof iterations_per_loop { + uint64 iterations_per_loop_addr = 5; + } + oneof loop_cond { + uint64 loop_cond_addr = 6; + } + uint32 flag = 7; // 0x01 load, 0x00 unload + repeated Task task = 8; + string dump_step = 9; +}; diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc index e026459ae97..00489c72990 100644 --- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc +++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc @@ -127,6 +127,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i AddressPtrList kernel_outputs; auto kernel_mod = AnfAlgo::GetKernelMod(anf_node_ptr); MS_EXCEPTION_IF_NULL(kernel_mod); + kernel_mod->set_kernel_name(anf_node_ptr->fullname_with_scope()); if (AnfAlgo::GetCNodeName(anf_node_ptr) != kAtomicAddrCleanOpName) { for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_node_ptr); ++i) { auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i); diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/device/device_address.h index f4597f6f460..879caf45fc1 100644 --- a/mindspore/ccsrc/device/device_address.h +++ b/mindspore/ccsrc/device/device_address.h @@ -34,6 +34,7 @@ class CPUKernelRuntime; namespace ascend { class AscendKernelRuntime; class AscendMemoryManager; +class DataDumper; namespace tasksink { class TaskGenerator; } // namespace tasksink @@ -90,6 +91,7 @@ class DeviceAddress { friend class mindspore::device::gpu::GPUMemoryManager; friend class mindspore::device::ascend::AscendKernelRuntime; friend class mindspore::device::ascend::AscendMemoryManager; + friend class mindspore::device::ascend::DataDumper; }; using DeviceAddressPtr = std::shared_ptr; diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/device/kernel_adjust.cc index fd0a8eb967b..86dcf2b4498 100644 --- a/mindspore/ccsrc/device/kernel_adjust.cc +++ b/mindspore/ccsrc/device/kernel_adjust.cc @@ -34,6 +34,7 @@ #include "device/ascend/kernel_select_ascend.h" #include "runtime/base.h" #include "device/ascend/ascend_stream_assign.h" + namespace mindspore { namespace device { using device::ascend::ProfilingUtils; @@ -117,6 +118,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr std::vector *mute_inputs = kernel_graph_ptr->MutableInputs(); MS_EXCEPTION_IF_NULL(mute_inputs); mute_inputs->push_back(switch_loop_input[kLoopCountParamName]); + mute_inputs->push_back(switch_loop_input[kEpochParamName]); mute_inputs->push_back(switch_loop_input[kIterLoopParamName]); mute_inputs->push_back(switch_loop_input[kZeroParamName]); mute_inputs->push_back(switch_loop_input[kOneParamName]); @@ -316,6 +318,13 @@ void KernelAdjust::CreateSwitchOpParameters(const std::shared_ptrset_abstract(paremeter_abstract_ptr); ParameterPtr one_new = kernel_graph_ptr->NewParameter(one); (*switch_loop_input)[kOneParamName] = one_new; + + ParameterPtr epoch = std::make_shared(kernel_graph_ptr); + MS_EXCEPTION_IF_NULL(epoch); + epoch->set_name(kEpochParamName); + epoch->set_abstract(paremeter_abstract_ptr); + ParameterPtr epoch_new = kernel_graph_ptr->NewParameter(epoch); + (*switch_loop_input)[kEpochParamName] = epoch_new; } kernel::KernelBuildInfo::KernelBuildInfoBuilder KernelAdjust::CreateMngKernelBuilder( @@ -510,6 +519,14 @@ void KernelAdjust::LoadSwitchInputs(std::vector *inputs) { *val = 0; inputs->push_back(loop_count_tensor); + // Epoch in device + tensor::TensorPtr epoch_tensor = std::make_shared(kInt32->type_id(), shp); + MS_EXCEPTION_IF_NULL(epoch_tensor); + val = static_cast(epoch_tensor->data_c()); + MS_EXCEPTION_IF_NULL(val); + *val = 0; + inputs->push_back(epoch_tensor); + tensor::TensorPtr iter_loop_tensor = std::make_shared(kInt32->type_id(), shp); MS_EXCEPTION_IF_NULL(iter_loop_tensor); val = static_cast(iter_loop_tensor->data_c()); @@ -531,6 +548,7 @@ void KernelAdjust::LoadSwitchInputs(std::vector *inputs) { MS_EXCEPTION_IF_NULL(val); *val = 1; inputs->push_back(one_tensor); + MS_LOG(INFO) << "---------------- LoadSwitchInputs End--"; } diff --git a/mindspore/ccsrc/device/kernel_adjust.h b/mindspore/ccsrc/device/kernel_adjust.h index bf3ba2acb25..9f59c486bc3 100644 --- a/mindspore/ccsrc/device/kernel_adjust.h +++ b/mindspore/ccsrc/device/kernel_adjust.h @@ -37,6 +37,7 @@ constexpr auto kLoopCountParamName = "loop_count"; constexpr auto kIterLoopParamName = "iter_loop"; constexpr auto kZeroParamName = "zero"; constexpr auto kOneParamName = "one"; +constexpr auto kEpochParamName = "loop_epoch"; constexpr auto kStreamNeedActivedFirst = "stream_need_active_first"; constexpr uint32_t kSecondStreamSwitchLabel = 2; diff --git a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc index 2213f176cc4..c6d8a101cde 100644 --- a/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc +++ b/mindspore/ccsrc/kernel/aicpu/aicpu_kernel_mod.cc @@ -26,6 +26,7 @@ #include "kernel/aicpu/aicpu_kernel_build.h" #include "utils/convert_utils.h" #include "kernel/aicpu/aicpu_util.h" +#include "utils/context/ms_context.h" using AicpuTaskInfoPtr = std::shared_ptr; @@ -144,8 +145,9 @@ std::vector AicpuOpKernelMod::GenTask(const std::vector if (node_name_ == kTopK) { node_name_ = kTopKV2; } + AicpuTaskInfoPtr task_info_ptr = make_shared( - stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs); + kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump()); MS_LOG(INFO) << "AicpuOpKernelMod GenTask end"; return {task_info_ptr}; diff --git a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc index 69fc82aad39..101a9f79b6c 100644 --- a/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc +++ b/mindspore/ccsrc/kernel/akg/ascend/akg_ascend_kernel_mod.cc @@ -26,6 +26,7 @@ #include "runtime/rt.h" #include "utils/log_adapter.h" #include "utils/convert_utils.h" +#include "utils/context/ms_context.h" namespace mindspore { namespace kernel { @@ -123,8 +124,8 @@ std::vector AkgKernelMod::GenTask(const std::vector &in MS_LOG(DEBUG) << "The block_dim is:" << block_dim; TbeTaskInfoPtr task_info_ptr = make_shared( - stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs, - output_data_addrs, workspace_addrs); + kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, + input_data_addrs, output_data_addrs, workspace_addrs, NeedDump()); return {task_info_ptr}; } } // namespace kernel diff --git a/mindspore/ccsrc/kernel/ascend_kernel_mod.h b/mindspore/ccsrc/kernel/ascend_kernel_mod.h index 0aee881f7d6..1ca1dbacc89 100644 --- a/mindspore/ccsrc/kernel/ascend_kernel_mod.h +++ b/mindspore/ccsrc/kernel/ascend_kernel_mod.h @@ -21,6 +21,9 @@ #include #include "framework/ge_runtime/task_info.h" #include "kernel/kernel.h" +#ifdef ENABLE_DATA_DUMP +#include "debug/data_dump_parser.h" +#endif using TaskInfoPtr = std::shared_ptr; namespace mindspore { @@ -31,6 +34,13 @@ class AscendKernelMod : public KernelMod { const std::vector &, uint32_t) = 0; uint32_t block_dim() { return block_dim_; } uint32_t stream_id() { return stream_id_; } + virtual bool NeedDump() { +#ifdef ENABLE_DATA_DUMP + return DataDumpParser::GetInstance().NeedDump(kernel_name_); +#else + return false; +#endif + } protected: uint32_t block_dim_{1}; diff --git a/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc b/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc index 87fb8d743d5..d5d6e556980 100644 --- a/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc +++ b/mindspore/ccsrc/kernel/hccl/hccl_kernel.cc @@ -18,6 +18,7 @@ #include "device/ascend/tasksink/runtime_utils.h" #include "session/anf_runtime_algorithm.h" #include "utils/utils.h" +#include "utils/context/ms_context.h" using HcclTaskInfoPtr = std::shared_ptr; using ge::model_runner::HcclTaskInfo; @@ -146,10 +147,12 @@ std::vector HcclKernel::GenTask(const std::vector &inpu << ", root_id=" << root_id_ << ", op_type=" << static_cast(op_type_) << ", data_type=" << static_cast(data_type); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); HcclTaskInfoPtr task_info_ptr = std::make_shared( - stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, private_def, nullptr, - hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, RuntimeUtils::HcomUnbindModel, - RuntimeUtils::HcomDistribute); + kernel_name_, stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, + private_def, nullptr, hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, + RuntimeUtils::HcomUnbindModel, RuntimeUtils::HcomDistribute, NeedDump()); MS_EXCEPTION_IF_NULL(task_info_ptr); return {task_info_ptr}; } diff --git a/mindspore/ccsrc/kernel/kernel.h b/mindspore/ccsrc/kernel/kernel.h index 7bccce49c3a..a15f6b16e7a 100644 --- a/mindspore/ccsrc/kernel/kernel.h +++ b/mindspore/ccsrc/kernel/kernel.h @@ -129,6 +129,10 @@ class KernelMod { virtual std::vector GenParameters() { return {}; } virtual ~KernelMod() = default; + void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; } + + protected: + std::string kernel_name_; }; using KernelModPtr = std::shared_ptr; } // namespace kernel diff --git a/mindspore/ccsrc/kernel/rts/assign.cc b/mindspore/ccsrc/kernel/rts/assign.cc index 7f214b6e6f6..7038004898d 100644 --- a/mindspore/ccsrc/kernel/rts/assign.cc +++ b/mindspore/ccsrc/kernel/rts/assign.cc @@ -58,8 +58,9 @@ std::vector AssignKernel::GenTask(const std::vector &in } stream_id_ = stream_id; - std::shared_ptr task_info_ptr = std::make_shared( - stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE); + std::shared_ptr task_info_ptr = + std::make_shared(kernel_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, + inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false); MS_EXCEPTION_IF_NULL(task_info_ptr); return {task_info_ptr}; } diff --git a/mindspore/ccsrc/kernel/rts/label_goto.cc b/mindspore/ccsrc/kernel/rts/label_goto.cc index 7bcf42a210d..1d29bb4f358 100644 --- a/mindspore/ccsrc/kernel/rts/label_goto.cc +++ b/mindspore/ccsrc/kernel/rts/label_goto.cc @@ -55,7 +55,8 @@ std::vector LabelGotoKernel::GenTask(const std::vector const std::vector &, uint32_t stream_id) { MS_LOG(INFO) << "LabelGotoKernel GenTask label:" << label_ << ", stream id:" << stream_id; std::vector task_info_list; - std::shared_ptr task_info_ptr = std::make_shared(stream_id, label_); + std::shared_ptr task_info_ptr = + std::make_shared(kernel_name_, stream_id, label_); MS_EXCEPTION_IF_NULL(task_info_ptr); task_info_list.emplace_back(task_info_ptr); return task_info_list; diff --git a/mindspore/ccsrc/kernel/rts/label_set.cc b/mindspore/ccsrc/kernel/rts/label_set.cc index 5aedd012dc6..4266e2b0af8 100644 --- a/mindspore/ccsrc/kernel/rts/label_set.cc +++ b/mindspore/ccsrc/kernel/rts/label_set.cc @@ -55,7 +55,7 @@ std::vector LabelSetKernel::GenTask(const std::vector & const std::vector &, uint32_t stream_id) { MS_LOG(INFO) << "LabelSetKernel GenTask label:" << label_ << ", stream id:" << stream_id; std::vector task_info_list; - std::shared_ptr task_info_ptr = std::make_shared(stream_id, label_); + std::shared_ptr task_info_ptr = std::make_shared(kernel_name_, stream_id, label_); MS_EXCEPTION_IF_NULL(task_info_ptr); task_info_list.emplace_back(task_info_ptr); return task_info_list; diff --git a/mindspore/ccsrc/kernel/rts/label_switch.cc b/mindspore/ccsrc/kernel/rts/label_switch.cc index fb1ad1601a4..bc5282b4af5 100644 --- a/mindspore/ccsrc/kernel/rts/label_switch.cc +++ b/mindspore/ccsrc/kernel/rts/label_switch.cc @@ -67,7 +67,7 @@ std::vector LabelSwitchKernel::GenTask(const std::vector task_info_list; cond_ = inputs[0]->addr; - auto task_info_ptr = std::make_shared(stream_id, label_size_, label_list_, cond_); + auto task_info_ptr = std::make_shared(kernel_name_, stream_id, label_size_, label_list_, cond_); MS_EXCEPTION_IF_NULL(task_info_ptr); task_info_list.emplace_back(task_info_ptr); return task_info_list; diff --git a/mindspore/ccsrc/kernel/rts/memcpy_async.cc b/mindspore/ccsrc/kernel/rts/memcpy_async.cc index f5fbec6e56d..ea33c4dd8b5 100644 --- a/mindspore/ccsrc/kernel/rts/memcpy_async.cc +++ b/mindspore/ccsrc/kernel/rts/memcpy_async.cc @@ -23,6 +23,7 @@ #include "common/utils.h" #include "session/anf_runtime_algorithm.h" #include "common/trans.h" +#include "utils/context/ms_context.h" using ge::model_runner::MemcpyAsyncTaskInfo; using MemcpyAsyncTaskInfoPtr = std::shared_ptr; @@ -118,8 +119,9 @@ std::vector MemCpyAsyncKernel::GenTask(const std::vector task_info_ptr = std::make_shared( - stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE); + std::shared_ptr task_info_ptr = + std::make_shared(kernel_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, + inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump()); MS_EXCEPTION_IF_NULL(task_info_ptr); return {task_info_ptr}; } diff --git a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc b/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc index ff005f399bd..0161e8562a3 100644 --- a/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc +++ b/mindspore/ccsrc/kernel/rts/profiling_kernel_mod.cc @@ -63,7 +63,7 @@ std::vector ProfilingKernelMod::GenTask(const std::vector task_info_ptr = - std::make_shared(stream_id, log_id_, notify_, flags_); + std::make_shared(kernel_name_, stream_id, log_id_, notify_, flags_); return {task_info_ptr}; } } // namespace kernel diff --git a/mindspore/ccsrc/kernel/rts/recv.cc b/mindspore/ccsrc/kernel/rts/recv.cc index c195fd1c92e..3fb2fd6bb57 100644 --- a/mindspore/ccsrc/kernel/rts/recv.cc +++ b/mindspore/ccsrc/kernel/rts/recv.cc @@ -60,7 +60,7 @@ std::vector RecvKernel::GenTask(const std::vector &, co const std::vector &, uint32_t stream_id) { MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id; stream_id_ = stream_id; - EventWaitTaskInfoPtr task_info_ptr = std::make_shared(stream_id, event_id_); + EventWaitTaskInfoPtr task_info_ptr = std::make_shared(kernel_name_, stream_id, event_id_); MS_EXCEPTION_IF_NULL(task_info_ptr); return {task_info_ptr}; } diff --git a/mindspore/ccsrc/kernel/rts/send.cc b/mindspore/ccsrc/kernel/rts/send.cc index ccdd43ebb61..298d75befda 100644 --- a/mindspore/ccsrc/kernel/rts/send.cc +++ b/mindspore/ccsrc/kernel/rts/send.cc @@ -57,7 +57,7 @@ std::vector SendKernel::GenTask(const std::vector &, co const std::vector &, uint32_t stream_id) { MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id; stream_id_ = stream_id; - EventRecordTaskInfoPtr task_info_ptr = std::make_shared(stream_id, event_id_); + EventRecordTaskInfoPtr task_info_ptr = std::make_shared(kernel_name_, stream_id, event_id_); MS_EXCEPTION_IF_NULL(task_info_ptr); return {task_info_ptr}; } diff --git a/mindspore/ccsrc/kernel/rts/stream_active.cc b/mindspore/ccsrc/kernel/rts/stream_active.cc index 4f0895a0be2..b5739648685 100644 --- a/mindspore/ccsrc/kernel/rts/stream_active.cc +++ b/mindspore/ccsrc/kernel/rts/stream_active.cc @@ -72,7 +72,8 @@ std::vector StreamActiveKernel::GenTask(const std::vector task_info_list; for (auto &index : active_streams_index_) { - std::shared_ptr task_info_ptr = std::make_shared(stream_id, index); + std::shared_ptr task_info_ptr = + std::make_shared(kernel_name_, stream_id, index); MS_EXCEPTION_IF_NULL(task_info_ptr); task_info_list.emplace_back(task_info_ptr); MS_LOG(INFO) << "StreamActiveKernel GenTask: streamId:" << stream_id << ", Active streamId:" << index; diff --git a/mindspore/ccsrc/kernel/rts/stream_switch.cc b/mindspore/ccsrc/kernel/rts/stream_switch.cc index bab6b043669..44b0a1ef867 100644 --- a/mindspore/ccsrc/kernel/rts/stream_switch.cc +++ b/mindspore/ccsrc/kernel/rts/stream_switch.cc @@ -91,8 +91,8 @@ std::vector StreamSwitchKernel::GenTask(const std::vectoraddr; MS_LOG(INFO) << "cond_:" << static_cast(cond_) << ", true_stream_index_:" << true_stream_index_ << ", stream_id:" << stream_id; - std::shared_ptr task_info_ptr = - std::make_shared(stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_); + std::shared_ptr task_info_ptr = std::make_shared( + kernel_name_, stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_); MS_EXCEPTION_IF_NULL(task_info_ptr); return {task_info_ptr}; } diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc index 0f377940daf..9d5222659ab 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_mod.cc @@ -17,7 +17,7 @@ #include "kernel/tbe/tbe_kernel_mod.h" #include #include "runtime/rt.h" -#include "nlohmann/json.hpp" +#include "utils/context/ms_context.h" #include "graphengine/inc/framework/ge_runtime/task_info.h" namespace mindspore { @@ -99,9 +99,9 @@ std::vector TbeKernelMod::GenTask(const std::vector &in MS_LOG(INFO) << "block_dim is:" << block_dim_; - TbeTaskInfoPtr task_info_ptr = - make_shared(stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, - meta_data, input_data_addrs, output_data_addrs, workspace_addrs); + TbeTaskInfoPtr task_info_ptr = make_shared( + kernel_name_, stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, meta_data, input_data_addrs, + output_data_addrs, workspace_addrs, NeedDump()); return {task_info_ptr}; } diff --git a/mindspore/ccsrc/session/kernel_graph.h b/mindspore/ccsrc/session/kernel_graph.h index 6861d43de0d..2e46cfa76ad 100644 --- a/mindspore/ccsrc/session/kernel_graph.h +++ b/mindspore/ccsrc/session/kernel_graph.h @@ -36,7 +36,7 @@ namespace session { using AnfWithOutIndex = std::pair; class KernelGraph : public FuncGraph { public: - KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false) { + KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false), current_epoch_(0) { inputs_ = std::make_shared>(); execution_order_ = {}; executable_ = true; @@ -154,6 +154,8 @@ class KernelGraph : public FuncGraph { AnfNodePtr GetFrontNodeByInternalOutput(const AnfNodePtr &node) const; void AddFinalOutputKernel(const AnfNodePtr &node); bool IsFinalOutputKernel(const AnfNodePtr &node) const; + uint32_t current_epoch() const { return current_epoch_; } + void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; } private: // remove value node form graph @@ -216,6 +218,7 @@ class KernelGraph : public FuncGraph { std::unordered_map front_to_internal_outputs_map_; std::unordered_map internal_outputs_to_front_map_; std::set final_output_kernels_; + uint32_t current_epoch_; }; } // namespace session using KernelGraphPtr = std::shared_ptr; diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc index 4cc01e62a41..9e437673c92 100644 --- a/mindspore/ccsrc/session/session_basic.cc +++ b/mindspore/ccsrc/session/session_basic.cc @@ -187,6 +187,18 @@ size_t LoadCtrlInputTensor(const std::shared_ptr &graph, std::vecto // set loop_count to zero MS_EXCEPTION_IF_NULL(inputs); inputs->push_back(tensor); + + auto epoch_tensor = (*inputs_params)[1]; + MS_EXCEPTION_IF_NULL(epoch_tensor); + auto *epoch_val = static_cast(epoch_tensor->data_c()); + MS_EXCEPTION_IF_NULL(epoch_val); + *epoch_val = graph->current_epoch(); + epoch_tensor->set_dirty(true); + inputs->push_back(epoch_tensor); + MS_LOG(INFO) << "Load epoch_val:" << *epoch_val; + + graph->set_current_epoch(graph->current_epoch() + 1); + return inputs_params->size(); } @@ -814,13 +826,13 @@ void SessionBasic::AddParameterToGraphInputs(const std::vector ¶ void SessionBasic::LoadInputData(const std::shared_ptr &kernel_graph, const std::vector &inputs_const) const { std::vector inputs(inputs_const); - size_t input_ctrl_size = 1; + size_t input_ctrl_size = 2; MS_EXCEPTION_IF_NULL(kernel_graph); if (kernel_graph->input_ctrl_tensors()) { input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs); } auto input_nodes = kernel_graph->inputs(); - if ((inputs.size() + input_ctrl_size) - 1 != input_nodes.size()) { + if ((inputs.size() + input_ctrl_size) - 2 != input_nodes.size()) { MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size() << ", input_ctrl_size:" << input_ctrl_size; } diff --git a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc index a3a991247cc..9b48adb574c 100644 --- a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc +++ b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc @@ -32,6 +32,8 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint bool ModelRunner::UnloadModel(uint32_t model_id) { return true; } +bool ModelRunner::LoadModelComplete(uint32_t model_id) { return true; } + bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, ge::OutputData *output_data) { return true; } @@ -45,6 +47,11 @@ const std::vector &ModelRunner::GetStreamIdList(uint32_t model_id) con static std::vector stream_id_list; return stream_id_list; } + +const std::map> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const { + static std::map> runtime_info_map; + return runtime_info_map; +} } // namespace model_runner } // namespace ge diff --git a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc index a6ec3a50b5c..8c00e518c3b 100755 --- a/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc +++ b/tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc @@ -15,7 +15,6 @@ */ #include "device/ascend/ascend_stream_assign.h" #include "device/ascend/ascend_label_assign.h" -#include "device/ascend/tasksink/task_generator.h" #include "device/kernel_adjust.h" namespace mindspore { @@ -31,13 +30,6 @@ void AscendStreamAssign::AssignStream(const NotNull &graph_ptr) void AscendStreamAssign::GetWaitStreams(vector *wait_active_stream_list) { return; } void AscendStreamAssign::GetHcomStreams(std::vector *streams) { return; } - -namespace tasksink { -bool TaskGenerator::GenTasks(const std::vector &anf_node_list, std::vector *const task_info_list, - uint32_t graph_id) { - return true; -} -} // namespace tasksink } // namespace ascend void KernelAdjust::InsertSwitchLoop(const std::shared_ptr &kernel_graph_ptr) { return; } bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr &kernel_graph_ptr) { return true; } diff --git a/tests/ut/cpp/stub/tasksink/task_sink_stub.cc b/tests/ut/cpp/stub/tasksink/task_sink_stub.cc new file mode 100644 index 00000000000..b4318488c0c --- /dev/null +++ b/tests/ut/cpp/stub/tasksink/task_sink_stub.cc @@ -0,0 +1,30 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device/ascend/tasksink/task_generator.h" + +namespace mindspore { +namespace device { +namespace ascend { +namespace tasksink { +bool TaskGenerator::GenTasks(const std::vector &anf_node_list, std::vector *const task_info_list, + uint32_t graph_id) { + return true; +} +} // namespace tasksink +} // namespace ascend +} // namespace device +} // namespace mindspore \ No newline at end of file