forked from mindspore-Ecosystem/mindspore
Async Data Dump
This commit is contained in:
parent
da9452ee5e
commit
c577952c9a
14
build.sh
14
build.sh
|
@ -24,7 +24,7 @@ usage()
|
|||
{
|
||||
echo "Usage:"
|
||||
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
|
||||
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
|
||||
echo " [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
|
||||
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
|
@ -48,6 +48,7 @@ usage()
|
|||
echo " -P Enable dump anf graph to file in ProtoBuffer format, default on"
|
||||
echo " -Q Enable dump memory, default off"
|
||||
echo " -D Enable dumping of function graph ir, default on"
|
||||
echo " -S Enable async data dump, default off"
|
||||
echo " -z Compile dataset & mindrecord, default on"
|
||||
echo " -M Enable MPI and NCCL for GPU training, gpu default on"
|
||||
echo " -V Specify the minimum required cuda version, default CUDA 10.1"
|
||||
|
@ -88,6 +89,7 @@ checkopts()
|
|||
ENABLE_TIMELINE="off"
|
||||
ENABLE_DUMP2PROTO="on"
|
||||
ENABLE_DUMPE2E="off"
|
||||
ENABLE_DATA_DUMP="off"
|
||||
ENABLE_DUMP_IR="on"
|
||||
COMPILE_MINDDATA="on"
|
||||
ENABLE_MPI="off"
|
||||
|
@ -102,7 +104,7 @@ checkopts()
|
|||
ENABLE_PYTHON="on"
|
||||
|
||||
# Process the options
|
||||
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt
|
||||
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt
|
||||
do
|
||||
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
|
||||
case "${opt}" in
|
||||
|
@ -218,6 +220,11 @@ checkopts()
|
|||
ENABLE_DUMPE2E="$OPTARG"
|
||||
echo "enable dump end to end"
|
||||
;;
|
||||
S)
|
||||
check_on_off $OPTARG S
|
||||
ENABLE_DATA_DUMP="$OPTARG"
|
||||
echo "enable data dump"
|
||||
;;
|
||||
D)
|
||||
check_on_off $OPTARG D
|
||||
ENABLE_DUMP_IR="$OPTARG"
|
||||
|
@ -321,6 +328,9 @@ build_mindspore()
|
|||
if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
|
||||
fi
|
||||
if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON"
|
||||
fi
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
|
||||
if [[ "X$ENABLE_MPI" = "Xon" ]]; then
|
||||
|
|
|
@ -116,6 +116,10 @@ if(ENABLE_DUMP_E2E)
|
|||
add_compile_definitions(ENABLE_DUMP_E2E)
|
||||
endif()
|
||||
|
||||
if(ENABLE_DATA_DUMP)
|
||||
add_compile_definitions(ENABLE_DATA_DUMP)
|
||||
endif()
|
||||
|
||||
if(ENABLE_DEBUGGER)
|
||||
add_compile_definitions(ENABLE_DEBUGGER)
|
||||
endif()
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"DumpSettings": {
|
||||
"net_name": "ResNet50",
|
||||
"mode": 1,
|
||||
"iteration": 0,
|
||||
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
|
||||
},
|
||||
|
||||
"DumpSettingsSpec": {
|
||||
"net_name": "net name eg:ResNet50",
|
||||
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
|
||||
"iteration": "specified iteration ",
|
||||
"kernels": "op's full scope name which need to be dump"
|
||||
}
|
||||
}
|
|
@ -1 +1 @@
|
|||
Subproject commit 1c2672868fda8b1d012c99e5aca73725ac869ba9
|
||||
Subproject commit 18cf690152add623ffbddfbbb4674d1b34484ca7
|
|
@ -109,8 +109,12 @@ if (ENABLE_D)
|
|||
file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
|
||||
ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})
|
||||
|
||||
file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/dump/proto/*.proto")
|
||||
ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})
|
||||
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})
|
||||
|
||||
add_compile_definitions(ENABLE_D)
|
||||
endif ()
|
||||
|
|
|
@ -19,6 +19,15 @@ if (ENABLE_DEBUGGER)
|
|||
)
|
||||
endif (ENABLE_DEBUGGER)
|
||||
|
||||
if (ENABLE_D)
|
||||
list(APPEND _DEBUG_SRC_LIST
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/common.cc"
|
||||
)
|
||||
if (ENABLE_DATA_DUMP)
|
||||
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/data_dump_parser.cc")
|
||||
endif(ENABLE_DATA_DUMP)
|
||||
endif()
|
||||
|
||||
if (ENABLE_DUMP_E2E)
|
||||
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc")
|
||||
endif (ENABLE_DUMP_E2E)
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "debug/common.h"
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include "utils/system/env.h"
|
||||
#include "utils/system/file_system.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
|
||||
std::string out_path;
|
||||
auto path_split_pos = input_path.find_last_of('/');
|
||||
if (path_split_pos == std::string::npos) {
|
||||
path_split_pos = input_path.find_last_of('\\');
|
||||
}
|
||||
// get real path
|
||||
char real_path[PATH_MAX] = {0};
|
||||
if (path_split_pos != std::string::npos) {
|
||||
std::string prefix_path = input_path.substr(0, path_split_pos);
|
||||
if (prefix_path.length() >= PATH_MAX) {
|
||||
MS_LOG(ERROR) << "Prefix path is too longer!";
|
||||
return std::nullopt;
|
||||
}
|
||||
std::string last_path = input_path.substr(path_split_pos, input_path.length() - path_split_pos);
|
||||
auto ret = CreateNotExistDirs(prefix_path);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (nullptr == realpath(prefix_path.c_str(), real_path)) {
|
||||
MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
|
||||
return std::nullopt;
|
||||
}
|
||||
out_path = std::string(real_path) + last_path;
|
||||
}
|
||||
|
||||
if (path_split_pos == std::string::npos) {
|
||||
if (input_path.length() >= PATH_MAX) {
|
||||
MS_LOG(ERROR) << "Prefix path is too longer!";
|
||||
return std::nullopt;
|
||||
}
|
||||
if (nullptr == realpath(input_path.c_str(), real_path)) {
|
||||
MS_LOG(ERROR) << "File " << input_path << " does not exit, it will be created.";
|
||||
}
|
||||
out_path = std::string(real_path);
|
||||
}
|
||||
return out_path;
|
||||
}
|
||||
|
||||
bool Common::CreateNotExistDirs(const std::string &path) {
|
||||
std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
|
||||
MS_EXCEPTION_IF_NULL(fs);
|
||||
char temp_path[PATH_MAX] = {0};
|
||||
if (path.length() > PATH_MAX) {
|
||||
MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
|
||||
return false;
|
||||
}
|
||||
for (uint32_t i = 0; i < path.length(); i++) {
|
||||
temp_path[i] = path[i];
|
||||
if (temp_path[i] == '\\' || temp_path[i] == '/') {
|
||||
if (i != 0) {
|
||||
char tmp_char = temp_path[i];
|
||||
temp_path[i] = '\0';
|
||||
std::string path_handle(temp_path);
|
||||
if (!fs->FileExist(temp_path)) {
|
||||
MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
|
||||
if (!fs->CreateDir(temp_path)) {
|
||||
MS_LOG(ERROR) << "Create " << path_handle << " dir error";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
temp_path[i] = tmp_char;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!fs->FileExist(path)) {
|
||||
MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
|
||||
if (!fs->CreateDir(path)) {
|
||||
MS_LOG(ERROR) << "Create " << path << " dir error";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<std::string> Common::GetConfigFile(const std::string &env) {
|
||||
if (env.empty()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid env";
|
||||
}
|
||||
auto config_path_str = std::getenv(env.c_str());
|
||||
if (config_path_str == nullptr) {
|
||||
MS_LOG(ERROR) << "Please export env:" << env;
|
||||
return {};
|
||||
}
|
||||
MS_LOG(INFO) << "Async Dump Getenv env:" << env << "=" << config_path_str;
|
||||
|
||||
std::string dump_config_file(config_path_str);
|
||||
std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
|
||||
MS_EXCEPTION_IF_NULL(fs);
|
||||
if (!fs->FileExist(dump_config_file)) {
|
||||
MS_LOG(ERROR) << dump_config_file << " not exist.";
|
||||
return {};
|
||||
}
|
||||
return dump_config_file;
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,36 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
|
||||
|
||||
#include <string>
|
||||
#include <optional>
|
||||
#include "utils/contract.h"
|
||||
|
||||
namespace mindspore {
|
||||
class Common {
|
||||
public:
|
||||
Common() = default;
|
||||
~Common() = default;
|
||||
static std::optional<std::string> GetRealPath(const std::string &input_path);
|
||||
static std::optional<std::string> GetConfigFile(const std::string &env);
|
||||
|
||||
private:
|
||||
static bool CreateNotExistDirs(const std::string &path);
|
||||
};
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
|
|
@ -0,0 +1,152 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "debug/data_dump_parser.h"
|
||||
|
||||
#include <fstream>
|
||||
#include "utils/context/ms_context.h"
|
||||
#include "debug/common.h"
|
||||
|
||||
constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH";
|
||||
constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP";
|
||||
constexpr auto kDataDumpPath = "DATA_DUMP_PATH";
|
||||
namespace mindspore {
|
||||
void DataDumpParser::ResetParam() {
|
||||
enable_ = false;
|
||||
net_name_.clear();
|
||||
dump_mode_ = 0;
|
||||
dump_step_ = 0;
|
||||
kernel_set_.clear();
|
||||
}
|
||||
|
||||
bool DataDumpParser::DumpEnabled() const {
|
||||
auto enable_dump = std::getenv(kEnableDataDump);
|
||||
if (!enable_dump) {
|
||||
MS_LOG(WARNING) << "[DataDump] enable dump is null. Please export ENABLE_DATA_DUMP";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto enabled = std::atoi(enable_dump);
|
||||
if (enabled != 1) {
|
||||
MS_LOG(WARNING) << "[DataDump] Please export ENABLE_DATA_DUMP=1";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context);
|
||||
if (context->execution_mode() == kPynativeMode) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<std::string> DataDumpParser::GetDumpPath() const {
|
||||
auto dump_path = std::getenv(kDataDumpPath);
|
||||
if (!dump_path) {
|
||||
MS_LOG(ERROR) << "[DataDump] dump path is null. Please export DATA_DUMP_PATH";
|
||||
return {};
|
||||
}
|
||||
std::string dump_path_str(dump_path);
|
||||
return dump_path_str;
|
||||
}
|
||||
|
||||
void DataDumpParser::ParseDumpConfig() {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
MS_LOG(INFO) << "[DataDump] parse start";
|
||||
if (!DumpEnabled()) {
|
||||
MS_LOG(INFO) << "[DataDump] dump not enable";
|
||||
return;
|
||||
}
|
||||
|
||||
ResetParam();
|
||||
|
||||
auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah);
|
||||
if (!dump_config_file.has_value()) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] Get config file failed";
|
||||
}
|
||||
|
||||
std::ifstream json_file(dump_config_file.value());
|
||||
if (!json_file.is_open()) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed.";
|
||||
}
|
||||
|
||||
nlohmann::json j;
|
||||
json_file >> j;
|
||||
if (j.find("DumpSettings") == j.end()) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist.";
|
||||
}
|
||||
|
||||
nlohmann::json dump_settings = j.at("DumpSettings");
|
||||
// convert json to string
|
||||
std::stringstream ss;
|
||||
ss << dump_settings;
|
||||
std::string cfg = ss.str();
|
||||
MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg;
|
||||
if (!IsConfigExist(dump_settings)) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid";
|
||||
}
|
||||
|
||||
if (!ParseDumpSetting(dump_settings)) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed";
|
||||
}
|
||||
}
|
||||
|
||||
bool DataDumpParser::NeedDump(const std::string &op_full_name) const {
|
||||
if (!DumpEnabled()) {
|
||||
return false;
|
||||
}
|
||||
if (dump_mode_ == 0) {
|
||||
return true;
|
||||
}
|
||||
auto iter = kernel_set_.find(op_full_name);
|
||||
return iter != kernel_set_.end();
|
||||
}
|
||||
|
||||
bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const {
|
||||
if (dump_settings.find("mode") == dump_settings.end() || dump_settings.find("net_name") == dump_settings.end() ||
|
||||
dump_settings.find("iteration") == dump_settings.end() || dump_settings.find("kernels") == dump_settings.end()) {
|
||||
MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) {
|
||||
auto mode = dump_settings.at("mode");
|
||||
auto net_name = dump_settings.at("net_name");
|
||||
auto iteration = dump_settings.at("iteration");
|
||||
auto kernels = dump_settings.at("kernels");
|
||||
if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) {
|
||||
MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid.";
|
||||
enable_ = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
enable_ = true;
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
dump_mode_ = mode;
|
||||
net_name_ = net_name;
|
||||
dump_step_ = iteration;
|
||||
for (const auto &kernel : kernels) {
|
||||
auto kernel_str = kernel.dump();
|
||||
kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end());
|
||||
MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str;
|
||||
kernel_set_.insert(kernel_str);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,61 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "common/utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
class DataDumpParser {
|
||||
public:
|
||||
static DataDumpParser &GetInstance() {
|
||||
static DataDumpParser instance;
|
||||
return instance;
|
||||
}
|
||||
void ParseDumpConfig();
|
||||
bool NeedDump(const std::string &op_full_name) const;
|
||||
bool DumpEnabled() const;
|
||||
std::optional<std::string> GetDumpPath() const;
|
||||
bool enable() const { return enable_; }
|
||||
const std::string &net_name() const { return net_name_; }
|
||||
uint32_t dump_mode() const { return dump_mode_; }
|
||||
uint32_t dump_step() const { return dump_step_; }
|
||||
const std::set<std::string> &kernel_set() const { return kernel_set_; }
|
||||
|
||||
private:
|
||||
DataDumpParser() = default;
|
||||
virtual ~DataDumpParser() = default;
|
||||
DISABLE_COPY_AND_ASSIGN(DataDumpParser);
|
||||
|
||||
void ResetParam();
|
||||
bool IsConfigExist(const nlohmann::json &dump_settings) const;
|
||||
bool ParseDumpSetting(const nlohmann::json &dump_settings);
|
||||
|
||||
std::mutex lock_;
|
||||
bool enable_{false};
|
||||
std::string net_name_;
|
||||
uint32_t dump_mode_{0};
|
||||
uint32_t dump_step_{0};
|
||||
std::set<std::string> kernel_set_;
|
||||
};
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
|
|
@ -17,12 +17,14 @@
|
|||
#include <limits.h>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <optional>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/system/file_system.h"
|
||||
#include "utils/system/env.h"
|
||||
#include "utils/convert_utils.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
#include "debug/common.h"
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
|
@ -158,100 +160,19 @@ bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len)
|
|||
return false;
|
||||
}
|
||||
|
||||
std::string realpath;
|
||||
bool ret = GetRealPath(filename, &realpath);
|
||||
if (!ret) {
|
||||
auto realpath = Common::GetRealPath(filename);
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Get real path failed.";
|
||||
return false;
|
||||
}
|
||||
std::ofstream fd;
|
||||
fd.open(realpath, std::ios::binary | std::ios::out);
|
||||
fd.open(realpath.value(), std::ios::binary | std::ios::out);
|
||||
if (!fd.is_open()) {
|
||||
MS_LOG(ERROR) << "Open file " << realpath << " fail.";
|
||||
MS_LOG(ERROR) << "Open file " << realpath.value() << " fail.";
|
||||
return false;
|
||||
}
|
||||
(void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
|
||||
fd.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Dump::GetRealPath(const std::string &inpath, std::string *outpath) {
|
||||
MS_EXCEPTION_IF_NULL(outpath);
|
||||
auto path_split_pos = inpath.find_last_of('/');
|
||||
if (path_split_pos == std::string::npos) {
|
||||
path_split_pos = inpath.find_last_of('\\');
|
||||
}
|
||||
// get real path
|
||||
char real_path[PATH_MAX] = {0};
|
||||
if (path_split_pos != std::string::npos) {
|
||||
std::string prefix_path = inpath.substr(0, path_split_pos);
|
||||
if (prefix_path.length() >= PATH_MAX) {
|
||||
MS_LOG(ERROR) << "Prefix path is too longer!";
|
||||
return false;
|
||||
}
|
||||
std::string last_path = inpath.substr(path_split_pos, inpath.length() - path_split_pos);
|
||||
auto ret = CreateNotExistDirs(prefix_path);
|
||||
if (ret == false) {
|
||||
MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (nullptr == realpath(prefix_path.c_str(), real_path)) {
|
||||
MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
|
||||
return false;
|
||||
}
|
||||
*outpath = std::string(real_path) + last_path;
|
||||
}
|
||||
|
||||
if (path_split_pos == std::string::npos) {
|
||||
if (inpath.length() >= PATH_MAX) {
|
||||
MS_LOG(ERROR) << "Prefix path is too longer!";
|
||||
return false;
|
||||
}
|
||||
if (nullptr == realpath(inpath.c_str(), real_path)) {
|
||||
MS_LOG(ERROR) << "File " << inpath << " does not exit, it will be created.";
|
||||
}
|
||||
*outpath = std::string(real_path);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Dump::CreateNotExistDirs(const std::string &path) {
|
||||
std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
|
||||
MS_EXCEPTION_IF_NULL(fs);
|
||||
char temp_path[PATH_MAX] = {0};
|
||||
if (path.length() > PATH_MAX) {
|
||||
MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
|
||||
return false;
|
||||
}
|
||||
for (uint32_t i = 0; i < path.length(); i++) {
|
||||
temp_path[i] = path[i];
|
||||
if (temp_path[i] == '\\' || temp_path[i] == '/') {
|
||||
if (i != 0) {
|
||||
char tmp_char = temp_path[i];
|
||||
temp_path[i] = '\0';
|
||||
std::string path_handle(temp_path);
|
||||
if (!fs->FileExist(temp_path)) {
|
||||
MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
|
||||
if (!fs->CreateDir(temp_path)) {
|
||||
MS_LOG(ERROR) << "Create " << path_handle << " dir error";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
temp_path[i] = tmp_char;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!fs->FileExist(path)) {
|
||||
MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
|
||||
if (!fs->CreateDir(path)) {
|
||||
MS_LOG(ERROR) << "Create " << path << " dir error";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -59,10 +59,6 @@ class Dump {
|
|||
uint32_t cur_iter_;
|
||||
std::vector<std::string> dump_kernels_;
|
||||
|
||||
static bool GetRealPath(const std::string &inpath, std::string *outpath);
|
||||
|
||||
static bool CreateNotExistDirs(const std::string &path);
|
||||
|
||||
private:
|
||||
bool ParseDumpConfig(const std::string &dump_config_file);
|
||||
bool IsConfigExist(const nlohmann::json &dumpSettings);
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#include "device/ascend/ascend_memory_manager.h"
|
||||
#include "debug/tensor_load.h"
|
||||
|
||||
using ge::model_runner::ModelRunner;
|
||||
using mindspore::device::ascend::ProfilingManager;
|
||||
using mindspore::device::ascend::ProfilingUtils;
|
||||
using mindspore::device::ascend::tasksink::TaskGenerator;
|
||||
|
@ -90,9 +91,16 @@ std::string GetRankId() {
|
|||
AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); }
|
||||
|
||||
void AscendKernelRuntime::ClearGraphModelMap() {
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
for (auto &iter : graph_data_dumper_) {
|
||||
MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first;
|
||||
iter.second->UnloadDumpInfo();
|
||||
}
|
||||
graph_data_dumper_.clear();
|
||||
#endif
|
||||
for (auto &iter : graph_model_map_) {
|
||||
MS_LOG(INFO) << "Ge UnloadModel " << iter.first;
|
||||
auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter.first);
|
||||
auto ret = ModelRunner::Instance().UnloadModel(iter.first);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "UnloadModel failed";
|
||||
}
|
||||
|
@ -107,7 +115,7 @@ void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
|
|||
return;
|
||||
}
|
||||
MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first;
|
||||
auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter->first);
|
||||
auto ret = ModelRunner::Instance().UnloadModel(iter->first);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "UnloadModel failed";
|
||||
}
|
||||
|
@ -159,6 +167,10 @@ bool AscendKernelRuntime::Init() {
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
DataDumpParser::GetInstance().ParseDumpConfig();
|
||||
#endif
|
||||
|
||||
// Start up profiling before rtSetDevice
|
||||
ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
|
||||
if (!ret) {
|
||||
|
@ -440,7 +452,7 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
|
|||
<< ", wait_active_stream_list size:" << wait_active_stream_list.size()
|
||||
<< ", force_copy_stream_list size:" << force_copy_stream_list.size();
|
||||
std::vector<std::shared_ptr<ge::model_runner::OpInfo>> empty_list;
|
||||
std::shared_ptr<ge::model_runner::DavinciModel> model = std::make_shared<ge::model_runner::DavinciModel>(
|
||||
auto model = std::make_shared<ge::model_runner::DavinciModel>(
|
||||
task_info_list, empty_list, empty_list, empty_list, empty_list, wait_active_stream_list, force_copy_stream_list, 0,
|
||||
0, 0, 0, 0, 0, resource_manager.get_cur_stream_num(), label_assign_instance.GetLabelNum(NOT_NULL(graph)),
|
||||
resource_manager.get_cur_event_num(), 0);
|
||||
|
@ -477,21 +489,45 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
|
|||
|
||||
std::shared_ptr<ge::ModelListener> listener;
|
||||
MS_LOG(INFO) << "LoadDavinciModel mode_id:" << model_iter->first;
|
||||
bool status = ge::model_runner::ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first,
|
||||
model_iter->second, listener);
|
||||
bool status =
|
||||
ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener);
|
||||
if (!status) {
|
||||
MS_LOG(EXCEPTION) << "Load Task Failed";
|
||||
}
|
||||
if (ProfilingManager::GetInstance().IsProfiling()) {
|
||||
auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first);
|
||||
auto stream_ids = ge::model_runner::ModelRunner::Instance().GetStreamIdList(model_iter->first);
|
||||
auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first);
|
||||
auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first);
|
||||
ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph));
|
||||
}
|
||||
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
LaunchDataDump(NOT_NULL(graph));
|
||||
#endif
|
||||
if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) {
|
||||
MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
void AscendKernelRuntime::LaunchDataDump(NotNull<const session::KernelGraph *> graph) {
|
||||
if (!DataDumpParser::GetInstance().DumpEnabled()) {
|
||||
return;
|
||||
}
|
||||
auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph->graph_id());
|
||||
auto data_dumper = std::make_shared<DataDumper>(graph.get(), runtime_info_map);
|
||||
MS_EXCEPTION_IF_NULL(data_dumper);
|
||||
data_dumper->LoadDumpInfo();
|
||||
auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper);
|
||||
if (!ret.second) {
|
||||
MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) {
|
||||
auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(graph_id);
|
||||
auto task_ids = ModelRunner::Instance().GetTaskIdList(graph_id);
|
||||
auto graph_task_names = ProfilingUtils::graph_kernel_name();
|
||||
auto iter = graph_task_names.find(graph_id);
|
||||
if (iter != graph_task_names.end()) {
|
||||
|
@ -524,7 +560,7 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool status = ge::model_runner::ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
|
||||
bool status = ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
|
||||
if (!status) {
|
||||
MS_LOG(ERROR) << "Run task failed";
|
||||
DebugTaskIdName(graph->graph_id());
|
||||
|
|
|
@ -24,6 +24,10 @@
|
|||
#include "framework/ge_runtime/davinci_model.h"
|
||||
#include "device/kernel_runtime_manager.h"
|
||||
#include "session/session_basic.h"
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
#include "debug/data_dump_parser.h"
|
||||
#include "device/ascend/dump/data_dumper.h"
|
||||
#endif
|
||||
|
||||
using ge::model_runner::TaskInfo;
|
||||
using std::unordered_map;
|
||||
|
@ -66,6 +70,10 @@ class AscendKernelRuntime : public KernelRuntime {
|
|||
bool initialized_{false};
|
||||
unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_;
|
||||
unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_;
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
void LaunchDataDump(NotNull<const session::KernelGraph *> graph);
|
||||
unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_;
|
||||
#endif
|
||||
};
|
||||
|
||||
MS_REG_KERNEL_RUNTIME(kAscendDevice, AscendKernelRuntime);
|
||||
|
|
|
@ -0,0 +1,282 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
#include "device/ascend/dump/data_dumper.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "utility"
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
#include "runtime/mem.h"
|
||||
#include "runtime/kernel.h"
|
||||
#include "device/ascend/dump/ge_dump.h"
|
||||
#include "proto/op_mapping_info.pb.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
#include "debug/data_dump_parser.h"
|
||||
|
||||
constexpr uint32_t kAicpuLoadFlag = 1;
|
||||
constexpr uint32_t kAicpuUnloadFlag = 0;
|
||||
constexpr uint32_t kTupleTaskId = 0;
|
||||
constexpr uint32_t kTupleStreamId = 1;
|
||||
constexpr uint32_t kTupleArgs = 2;
|
||||
constexpr uint32_t kCurrentStepTensorIndex = 0;
|
||||
constexpr uint32_t kCurrentEpochTensorIndex = 1;
|
||||
constexpr uint32_t kStepsPerEpochTensorIndex = 2;
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
|
||||
void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
|
||||
void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr);
|
||||
|
||||
DataDumper::~DataDumper() {
|
||||
ReleaseDevMem(&dev_load_mem_);
|
||||
ReleaseDevMem(&dev_unload_mem_);
|
||||
}
|
||||
|
||||
void DataDumper::LoadDumpInfo() {
|
||||
MS_LOG(INFO) << "[DataDump] LoadDumpInfo start";
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_);
|
||||
aicpu::dump::OpMappingInfo dump_info;
|
||||
SetOpMappingInfo(NOT_NULL(&dump_info));
|
||||
|
||||
auto kernels = kernel_graph_->execution_order();
|
||||
for (const auto &kernel : kernels) {
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
if (!KernelNeedDump(kernel)) {
|
||||
continue;
|
||||
}
|
||||
MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope();
|
||||
dump_kernel_names_.emplace_back(kernel->fullname_with_scope());
|
||||
|
||||
aicpu::dump::Task task;
|
||||
ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task));
|
||||
MS_EXCEPTION_IF_NULL(dump_info.mutable_task());
|
||||
dump_info.mutable_task()->Add(std::move(task));
|
||||
}
|
||||
RtLoadDumpData(dump_info, &dev_load_mem_);
|
||||
load_flag_ = true;
|
||||
MS_LOG(INFO) << "[DataDump] LoadDumpInfo end";
|
||||
}
|
||||
|
||||
void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const {
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_);
|
||||
auto dump_path = DataDumpParser::GetInstance().GetDumpPath();
|
||||
if (!dump_path.has_value()) {
|
||||
MS_LOG(EXCEPTION) << "Dump path invalid";
|
||||
}
|
||||
auto device_id = context_ptr->device_id();
|
||||
dump_info->set_dump_path(dump_path.value() + "_" + std::to_string(device_id) + "/");
|
||||
MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path.value();
|
||||
|
||||
dump_info->set_model_name(DataDumpParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id()));
|
||||
dump_info->set_dump_step(std::to_string(DataDumpParser::GetInstance().dump_step()));
|
||||
dump_info->set_model_id(kernel_graph_->graph_id());
|
||||
dump_info->set_flag(kAicpuLoadFlag);
|
||||
|
||||
const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors();
|
||||
if (input_ctrl_tensors == nullptr || input_ctrl_tensors->size() < 3) {
|
||||
MS_LOG(INFO) << "[DataDump] Not data sink mode, input_ctrl_tensor";
|
||||
return;
|
||||
}
|
||||
const auto ¤t_step_tensor = input_ctrl_tensors->at(kCurrentStepTensorIndex);
|
||||
const auto &currnet_epoch_tensor = input_ctrl_tensors->at(kCurrentEpochTensorIndex);
|
||||
const auto &steps_per_epoch_tensor = input_ctrl_tensors->at(kStepsPerEpochTensorIndex);
|
||||
|
||||
MS_EXCEPTION_IF_NULL(current_step_tensor);
|
||||
MS_EXCEPTION_IF_NULL(currnet_epoch_tensor);
|
||||
MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor);
|
||||
MS_EXCEPTION_IF_NULL(current_step_tensor->device_address());
|
||||
MS_EXCEPTION_IF_NULL(currnet_epoch_tensor->device_address());
|
||||
MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor->device_address());
|
||||
|
||||
void *current_step = current_step_tensor->device_address()->ptr_;
|
||||
void *current_epoch = currnet_epoch_tensor->device_address()->ptr_;
|
||||
void *steps_per_epoch = steps_per_epoch_tensor->device_address()->ptr_;
|
||||
|
||||
if (current_epoch != nullptr && current_step != nullptr && steps_per_epoch != nullptr) {
|
||||
dump_info->set_step_id_addr(reinterpret_cast<uint64_t>(current_epoch));
|
||||
dump_info->set_loop_cond_addr(reinterpret_cast<uint64_t>(current_step));
|
||||
dump_info->set_iterations_per_loop_addr(reinterpret_cast<uint64_t>(steps_per_epoch));
|
||||
} else {
|
||||
MS_LOG(INFO) << "Invalid ctrl tensor device address";
|
||||
}
|
||||
}
|
||||
|
||||
bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const {
|
||||
if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL &&
|
||||
AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) {
|
||||
return false;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(kernel);
|
||||
const auto &kernel_set = DataDumpParser::GetInstance().kernel_set();
|
||||
return kernel_set.find(kernel->fullname_with_scope()) != kernel_set.end();
|
||||
}
|
||||
|
||||
void DataDumper::UnloadDumpInfo() {
|
||||
if (!load_flag_) {
|
||||
MS_LOG(WARNING) << "Load not success, no need to unload";
|
||||
return;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_);
|
||||
MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << kernel_graph_->graph_id();
|
||||
|
||||
aicpu::dump::OpMappingInfo op_mapping_info;
|
||||
op_mapping_info.set_model_id(kernel_graph_->graph_id());
|
||||
op_mapping_info.set_flag(kAicpuUnloadFlag);
|
||||
|
||||
for (const auto &kernel_name : dump_kernel_names_) {
|
||||
aicpu::dump::Task task;
|
||||
auto iter = runtime_info_map_.find(kernel_name);
|
||||
if (iter == runtime_info_map_.end()) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(iter->second);
|
||||
auto task_id = std::get<kTupleTaskId>(*iter->second);
|
||||
task.set_task_id(task_id);
|
||||
MS_EXCEPTION_IF_NULL(op_mapping_info.mutable_task());
|
||||
op_mapping_info.mutable_task()->Add(std::move(task));
|
||||
}
|
||||
|
||||
RtLoadDumpData(op_mapping_info, &dev_unload_mem_);
|
||||
}
|
||||
|
||||
void DataDumper::ReleaseDevMem(void **ptr) const {
|
||||
if (ptr == nullptr) {
|
||||
return;
|
||||
}
|
||||
if (*ptr != nullptr) {
|
||||
rtError_t rt_error = rtFree(*ptr);
|
||||
if (rt_error != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "[DataDump] Call rtFree failed, ret:" << rt_error;
|
||||
}
|
||||
*ptr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const {
|
||||
dump_task->set_end_graph(false);
|
||||
auto iter = runtime_info_map_.find(kernel->fullname_with_scope());
|
||||
if (iter == runtime_info_map_.end()) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(iter->second);
|
||||
auto task_id = std::get<kTupleTaskId>(*iter->second);
|
||||
auto stream_id = std::get<kTupleStreamId>(*iter->second);
|
||||
auto args = std::get<kTupleArgs>(*iter->second);
|
||||
MS_LOG(INFO) << "[DataDump] Get runtime info task_id:" << task_id << " stream_id:" << stream_id;
|
||||
|
||||
dump_task->set_task_id(task_id);
|
||||
dump_task->set_stream_id(stream_id);
|
||||
MS_EXCEPTION_IF_NULL(dump_task->mutable_op());
|
||||
dump_task->mutable_op()->set_op_name(kernel->fullname_with_scope());
|
||||
dump_task->mutable_op()->set_op_type(AnfAlgo::GetCNodeName(kernel.get()));
|
||||
|
||||
DumpKernelOutput(kernel, args, dump_task);
|
||||
DumpKernelInput(kernel, args, dump_task);
|
||||
}
|
||||
|
||||
void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) {
|
||||
std::string proto_str;
|
||||
size_t proto_size = dump_info.ByteSizeLong();
|
||||
bool ret = dump_info.SerializeToString(&proto_str);
|
||||
if (!ret || proto_size == 0) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] Protobuf SerializeToString failed, proto size %zu.";
|
||||
}
|
||||
|
||||
rtError_t rt_ret = rtMalloc(ptr, proto_size, RT_MEMORY_HBM);
|
||||
if (rt_ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed";
|
||||
}
|
||||
|
||||
if (ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "[DataDump] rtMalloc failed, ptr is nullptr";
|
||||
return;
|
||||
}
|
||||
rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
|
||||
if (rt_ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed";
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start";
|
||||
rt_ret = rtDatadumpInfoLoad(*ptr, proto_size);
|
||||
if (rt_ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] Call rtDatadumpInfoLoad failed";
|
||||
}
|
||||
}
|
||||
|
||||
void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
|
||||
MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope();
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
|
||||
auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
|
||||
uint64_t offset = sizeof(void *) * input_size;
|
||||
for (size_t i = 0; i < output_size; ++i) {
|
||||
auto data_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
|
||||
auto output_format = AnfAlgo::GetOutputFormat(kernel, i);
|
||||
auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i);
|
||||
|
||||
aicpu::dump::Output output;
|
||||
output.set_data_type(GetGeDataType(data_type));
|
||||
output.set_format(GetGeFormat(output_format, output_shape.size()));
|
||||
MS_EXCEPTION_IF_NULL(output.mutable_shape());
|
||||
for (auto dim : output_shape) {
|
||||
output.mutable_shape()->add_dim(dim);
|
||||
}
|
||||
output.set_original_output_format(GetGeFormat(output_format, output_shape.size()));
|
||||
output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
|
||||
MS_EXCEPTION_IF_NULL(task->mutable_output());
|
||||
task->mutable_output()->Add(std::move(output));
|
||||
offset += sizeof(void *);
|
||||
}
|
||||
}
|
||||
|
||||
void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
|
||||
MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope();
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
|
||||
uint64_t offset = 0;
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
aicpu::dump::Input input;
|
||||
auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
|
||||
auto input_node = input_node_with_index.first;
|
||||
auto input_index = input_node_with_index.second;
|
||||
std::string output_format = AnfAlgo::GetOutputFormat(input_node, input_index);
|
||||
auto output_type = AnfAlgo::GetOutputDeviceDataType(input_node, input_index);
|
||||
if (output_type == kTypeUnknown) {
|
||||
MS_LOG(WARNING) << "[DataDump] It is not suggested to use a lonely weight parameter as the output of graph";
|
||||
output_type = AnfAlgo::GetOutputInferDataType(input_node, input_index);
|
||||
}
|
||||
auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index);
|
||||
|
||||
input.set_data_type(GetGeDataType(output_type));
|
||||
input.set_format(GetGeFormat(output_format, output_shape.size()));
|
||||
MS_EXCEPTION_IF_NULL(input.mutable_shape());
|
||||
for (auto dim : output_shape) {
|
||||
input.mutable_shape()->add_dim(dim);
|
||||
}
|
||||
input.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
|
||||
MS_EXCEPTION_IF_NULL(task->mutable_input());
|
||||
task->mutable_input()->Add(std::move(input));
|
||||
offset += sizeof(void *);
|
||||
}
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif
|
|
@ -0,0 +1,69 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
#include <tuple>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "session/kernel_graph.h"
|
||||
|
||||
namespace aicpu {
|
||||
namespace dump {
|
||||
class OpMappingInfo;
|
||||
class Task;
|
||||
} // namespace dump
|
||||
} // namespace aicpu
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
// tuple(op_name, task_id, stream_id, args)
|
||||
using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>;
|
||||
class DataDumper {
|
||||
public:
|
||||
DataDumper(const session::KernelGraph *kernel_graph,
|
||||
const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info_map)
|
||||
: load_flag_(false),
|
||||
dev_load_mem_(nullptr),
|
||||
dev_unload_mem_(nullptr),
|
||||
kernel_graph_(kernel_graph),
|
||||
runtime_info_map_(runtime_info_map) {}
|
||||
~DataDumper();
|
||||
void LoadDumpInfo();
|
||||
|
||||
void UnloadDumpInfo();
|
||||
|
||||
private:
|
||||
void ReleaseDevMem(void **ptr) const;
|
||||
bool KernelNeedDump(const CNodePtr &kernel) const;
|
||||
void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const;
|
||||
void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const;
|
||||
|
||||
bool load_flag_;
|
||||
void *dev_load_mem_;
|
||||
void *dev_unload_mem_;
|
||||
std::vector<std::string> dump_kernel_names_;
|
||||
const session::KernelGraph *kernel_graph_;
|
||||
std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map_;
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
|
|
@ -0,0 +1,120 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "proto/ge_dtype.pb.h"
|
||||
#include "ir/dtype/type_id.h"
|
||||
#include "utils/utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
static ge::proto::DataType GetGeDataType(TypeId type_id) {
|
||||
static const std::map<TypeId, ge::proto::DataType> data_type_map = {
|
||||
{TypeId::kTypeUnknown, ge::proto::DT_UNDEFINED}, {TypeId::kNumberTypeFloat32, ge::proto::DT_FLOAT},
|
||||
{TypeId::kNumberTypeFloat16, ge::proto::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::proto::DT_INT8},
|
||||
{TypeId::kNumberTypeUInt8, ge::proto::DT_UINT8}, {TypeId::kNumberTypeInt16, ge::proto::DT_INT16},
|
||||
{TypeId::kNumberTypeUInt16, ge::proto::DT_UINT16}, {TypeId::kNumberTypeInt32, ge::proto::DT_INT32},
|
||||
{TypeId::kNumberTypeInt64, ge::proto::DT_INT64}, {TypeId::kNumberTypeUInt32, ge::proto::DT_UINT32},
|
||||
{TypeId::kNumberTypeUInt64, ge::proto::DT_UINT64}, {TypeId::kNumberTypeBool, ge::proto::DT_BOOL},
|
||||
{TypeId::kNumberTypeFloat64, ge::proto::DT_DOUBLE},
|
||||
};
|
||||
MS_LOG(INFO) << "Vm origin type_id:" << type_id;
|
||||
auto iter = data_type_map.find(type_id);
|
||||
if (iter == data_type_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid data type:" << type_id;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
enum GeFormat {
|
||||
kFormat_NCHW = 0, // NCHW
|
||||
kFormat_NHWC, // NHWC
|
||||
kFormat_ND, // Nd Tensor
|
||||
kFormat_NC1HWC0, // NC1HWC0
|
||||
kFormat_FRACTAL_Z, // FRACTAL_Z
|
||||
kFormat_NC1C0HWPAD,
|
||||
kFormat_NHWC1C0,
|
||||
kFormat_FSR_NCHW,
|
||||
kFormat_FRACTAL_DECONV,
|
||||
kFormat_C1HWNC0,
|
||||
kFormat_FRACTAL_DECONV_TRANSPOSE,
|
||||
kFormat_FRACTAL_DECONV_SP_STRIDE_TRANS,
|
||||
kFormat_NC1HWC0_C04, // NC1HWC0, C0 =4
|
||||
kFormat_FRACTAL_Z_C04, // FRACZ, C0 =4
|
||||
kFormat_CHWN,
|
||||
kFormat_FRACTAL_DECONV_SP_STRIDE8_TRANS,
|
||||
kFormat_HWCN,
|
||||
kFormat_NC1KHKWHWC0, // KH,KW kernel h& kernel w maxpooling max output format
|
||||
kFormat_BN_WEIGHT,
|
||||
kFormat_FILTER_HWCK, // filter input tensor format
|
||||
kFormat_HASHTABLE_LOOKUP_LOOKUPS = 20,
|
||||
kFormat_HASHTABLE_LOOKUP_KEYS,
|
||||
kFormat_HASHTABLE_LOOKUP_VALUE,
|
||||
kFormat_HASHTABLE_LOOKUP_OUTPUT,
|
||||
kFormat_HASHTABLE_LOOKUP_HITS = 24,
|
||||
kFormat_C1HWNCoC0,
|
||||
kFormat_MD,
|
||||
kFormat_NDHWC,
|
||||
kFormat_FRACTAL_ZZ,
|
||||
kFormat_FRACTAL_NZ,
|
||||
kFormat_NCDHW,
|
||||
kFormat_DHWCN, // 3D filter input tensor format
|
||||
kFormat_NDC1HWC0,
|
||||
kFormat_FRACTAL_Z_3D,
|
||||
kFormat_CN,
|
||||
kFormat_NC,
|
||||
kFormat_DHWNC,
|
||||
kFormat_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format
|
||||
kFormat_RESERVED,
|
||||
kFormat_ALL
|
||||
};
|
||||
|
||||
static GeFormat GetGeFormat(const std::string &format, size_t shape_size) {
|
||||
static const std::map<std::string, GeFormat> format_map = {
|
||||
// default format: nchw, fractal_nz?
|
||||
{kOpFormat_DEFAULT, kFormat_NCHW},
|
||||
{kOpFormat_NC1KHKWHWC0, kFormat_NC1KHKWHWC0},
|
||||
{kOpFormat_ND, kFormat_ND},
|
||||
{kOpFormat_NCHW, kFormat_NCHW},
|
||||
{kOpFormat_NHWC, kFormat_NHWC},
|
||||
{kOpFormat_HWCN, kFormat_HWCN},
|
||||
{kOpFormat_NC1HWC0, kFormat_NC1HWC0},
|
||||
{kOpFormat_FRAC_Z, kFormat_FRACTAL_Z},
|
||||
{kOpFormat_FRAC_NZ, kFormat_FRACTAL_NZ},
|
||||
{kOpFormat_C1HWNCoC0, kFormat_C1HWNCoC0},
|
||||
{kOpFormat_NC1HWC0_C04, kFormat_NC1HWC0_C04},
|
||||
{kOpFormat_FRACTAL_Z_C04, kFormat_FRACTAL_Z_C04},
|
||||
{kOpFormat_NDHWC, kFormat_NDHWC},
|
||||
};
|
||||
MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size;
|
||||
if (format == kOpFormat_DEFAULT) {
|
||||
return shape_size == 4 ? kFormat_NCHW : kFormat_ND;
|
||||
}
|
||||
auto iter = format_map.find(format);
|
||||
if (iter == format_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid format:" << format;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
|
|
@ -0,0 +1,49 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package ge.proto;
|
||||
|
||||
enum DataType
|
||||
{
|
||||
DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set.
|
||||
DT_FLOAT = 1; // float type
|
||||
DT_FLOAT16 = 2; // fp16 type
|
||||
DT_INT8 = 3; // int8 type
|
||||
DT_UINT8 = 4; // uint8 type
|
||||
DT_INT16 = 5; // int16 type
|
||||
DT_UINT16 = 6; // uint16 type
|
||||
DT_INT32 = 7; //
|
||||
DT_INT64 = 8; // int64 type
|
||||
DT_UINT32 = 9; // unsigned int32
|
||||
DT_UINT64 = 10; // unsigned int64
|
||||
DT_BOOL = 11; // bool type
|
||||
DT_DOUBLE = 12; // double type
|
||||
DT_STRING = 13; // string type
|
||||
DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */
|
||||
DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */
|
||||
DT_COMPLEX64 = 16; // complex64 type
|
||||
DT_COMPLEX128 = 17; // complex128 type
|
||||
DT_QINT8 = 18; // qint8 type
|
||||
DT_QINT16 = 19; // qint16 type
|
||||
DT_QINT32 = 20; // qint32 type
|
||||
DT_QUINT8 = 21; // quint8 type
|
||||
DT_QUINT16 = 22; // quint16 type
|
||||
DT_RESOURCE = 23; // resource type
|
||||
DT_STRING_REF = 24; // string_ref type
|
||||
DT_DUAL = 25; /**< dual output type */
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
syntax = "proto3";
|
||||
package aicpu.dump;
|
||||
|
||||
message Shape {
|
||||
repeated uint64 dim = 1;
|
||||
}
|
||||
|
||||
message Output {
|
||||
int32 data_type = 1;
|
||||
int32 format = 2;
|
||||
Shape shape = 3;
|
||||
uint64 address = 4;
|
||||
string original_name = 5;
|
||||
int32 original_output_index = 6;
|
||||
int32 original_output_data_type = 7;
|
||||
int32 original_output_format = 8;
|
||||
uint64 size = 9;
|
||||
};
|
||||
|
||||
message Input {
|
||||
int32 data_type = 1;
|
||||
int32 format = 2;
|
||||
Shape shape = 3;
|
||||
uint64 address = 4;
|
||||
uint64 size = 5;
|
||||
}
|
||||
|
||||
message Op {
|
||||
string op_name = 1;
|
||||
string op_type = 2;
|
||||
};
|
||||
|
||||
message Task {
|
||||
uint32 task_id = 1;
|
||||
uint32 stream_id = 2;
|
||||
Op op = 3;
|
||||
repeated Output output = 4;
|
||||
bool end_graph = 5;
|
||||
repeated Input input = 6;
|
||||
};
|
||||
|
||||
message OpMappingInfo {
|
||||
string dump_path = 1;
|
||||
oneof model_name_param {
|
||||
string model_name = 2;
|
||||
}
|
||||
oneof model_id_param {
|
||||
uint32 model_id = 3;
|
||||
}
|
||||
oneof step_id {
|
||||
uint64 step_id_addr = 4;
|
||||
}
|
||||
oneof iterations_per_loop {
|
||||
uint64 iterations_per_loop_addr = 5;
|
||||
}
|
||||
oneof loop_cond {
|
||||
uint64 loop_cond_addr = 6;
|
||||
}
|
||||
uint32 flag = 7; // 0x01 load, 0x00 unload
|
||||
repeated Task task = 8;
|
||||
string dump_step = 9;
|
||||
};
|
|
@ -127,6 +127,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
|
|||
AddressPtrList kernel_outputs;
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(anf_node_ptr);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
kernel_mod->set_kernel_name(anf_node_ptr->fullname_with_scope());
|
||||
if (AnfAlgo::GetCNodeName(anf_node_ptr) != kAtomicAddrCleanOpName) {
|
||||
for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_node_ptr); ++i) {
|
||||
auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i);
|
||||
|
|
|
@ -34,6 +34,7 @@ class CPUKernelRuntime;
|
|||
namespace ascend {
|
||||
class AscendKernelRuntime;
|
||||
class AscendMemoryManager;
|
||||
class DataDumper;
|
||||
namespace tasksink {
|
||||
class TaskGenerator;
|
||||
} // namespace tasksink
|
||||
|
@ -90,6 +91,7 @@ class DeviceAddress {
|
|||
friend class mindspore::device::gpu::GPUMemoryManager;
|
||||
friend class mindspore::device::ascend::AscendKernelRuntime;
|
||||
friend class mindspore::device::ascend::AscendMemoryManager;
|
||||
friend class mindspore::device::ascend::DataDumper;
|
||||
};
|
||||
|
||||
using DeviceAddressPtr = std::shared_ptr<DeviceAddress>;
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "device/ascend/kernel_select_ascend.h"
|
||||
#include "runtime/base.h"
|
||||
#include "device/ascend/ascend_stream_assign.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
using device::ascend::ProfilingUtils;
|
||||
|
@ -117,6 +118,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
|
|||
std::vector<AnfNodePtr> *mute_inputs = kernel_graph_ptr->MutableInputs();
|
||||
MS_EXCEPTION_IF_NULL(mute_inputs);
|
||||
mute_inputs->push_back(switch_loop_input[kLoopCountParamName]);
|
||||
mute_inputs->push_back(switch_loop_input[kEpochParamName]);
|
||||
mute_inputs->push_back(switch_loop_input[kIterLoopParamName]);
|
||||
mute_inputs->push_back(switch_loop_input[kZeroParamName]);
|
||||
mute_inputs->push_back(switch_loop_input[kOneParamName]);
|
||||
|
@ -316,6 +318,13 @@ void KernelAdjust::CreateSwitchOpParameters(const std::shared_ptr<session::Kerne
|
|||
one->set_abstract(paremeter_abstract_ptr);
|
||||
ParameterPtr one_new = kernel_graph_ptr->NewParameter(one);
|
||||
(*switch_loop_input)[kOneParamName] = one_new;
|
||||
|
||||
ParameterPtr epoch = std::make_shared<Parameter>(kernel_graph_ptr);
|
||||
MS_EXCEPTION_IF_NULL(epoch);
|
||||
epoch->set_name(kEpochParamName);
|
||||
epoch->set_abstract(paremeter_abstract_ptr);
|
||||
ParameterPtr epoch_new = kernel_graph_ptr->NewParameter(epoch);
|
||||
(*switch_loop_input)[kEpochParamName] = epoch_new;
|
||||
}
|
||||
|
||||
kernel::KernelBuildInfo::KernelBuildInfoBuilder KernelAdjust::CreateMngKernelBuilder(
|
||||
|
@ -510,6 +519,14 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
|
|||
*val = 0;
|
||||
inputs->push_back(loop_count_tensor);
|
||||
|
||||
// Epoch in device
|
||||
tensor::TensorPtr epoch_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
|
||||
MS_EXCEPTION_IF_NULL(epoch_tensor);
|
||||
val = static_cast<int32_t *>(epoch_tensor->data_c());
|
||||
MS_EXCEPTION_IF_NULL(val);
|
||||
*val = 0;
|
||||
inputs->push_back(epoch_tensor);
|
||||
|
||||
tensor::TensorPtr iter_loop_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
|
||||
MS_EXCEPTION_IF_NULL(iter_loop_tensor);
|
||||
val = static_cast<int32_t *>(iter_loop_tensor->data_c());
|
||||
|
@ -531,6 +548,7 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
|
|||
MS_EXCEPTION_IF_NULL(val);
|
||||
*val = 1;
|
||||
inputs->push_back(one_tensor);
|
||||
|
||||
MS_LOG(INFO) << "---------------- LoadSwitchInputs End--";
|
||||
}
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ constexpr auto kLoopCountParamName = "loop_count";
|
|||
constexpr auto kIterLoopParamName = "iter_loop";
|
||||
constexpr auto kZeroParamName = "zero";
|
||||
constexpr auto kOneParamName = "one";
|
||||
constexpr auto kEpochParamName = "loop_epoch";
|
||||
constexpr auto kStreamNeedActivedFirst = "stream_need_active_first";
|
||||
constexpr uint32_t kSecondStreamSwitchLabel = 2;
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "kernel/aicpu/aicpu_kernel_build.h"
|
||||
#include "utils/convert_utils.h"
|
||||
#include "kernel/aicpu/aicpu_util.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
|
||||
using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
|
||||
|
||||
|
@ -144,8 +145,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
|
|||
if (node_name_ == kTopK) {
|
||||
node_name_ = kTopKV2;
|
||||
}
|
||||
|
||||
AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
|
||||
stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs);
|
||||
kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
|
||||
|
||||
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
|
||||
return {task_info_ptr};
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "runtime/rt.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/convert_utils.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -123,8 +124,8 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in
|
|||
MS_LOG(DEBUG) << "The block_dim is:" << block_dim;
|
||||
|
||||
TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
|
||||
stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs,
|
||||
output_data_addrs, workspace_addrs);
|
||||
kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data,
|
||||
input_data_addrs, output_data_addrs, workspace_addrs, NeedDump());
|
||||
return {task_info_ptr};
|
||||
}
|
||||
} // namespace kernel
|
||||
|
|
|
@ -21,6 +21,9 @@
|
|||
#include <memory>
|
||||
#include "framework/ge_runtime/task_info.h"
|
||||
#include "kernel/kernel.h"
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
#include "debug/data_dump_parser.h"
|
||||
#endif
|
||||
|
||||
using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
|
||||
namespace mindspore {
|
||||
|
@ -31,6 +34,13 @@ class AscendKernelMod : public KernelMod {
|
|||
const std::vector<AddressPtr> &, uint32_t) = 0;
|
||||
uint32_t block_dim() { return block_dim_; }
|
||||
uint32_t stream_id() { return stream_id_; }
|
||||
virtual bool NeedDump() {
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
return DataDumpParser::GetInstance().NeedDump(kernel_name_);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
protected:
|
||||
uint32_t block_dim_{1};
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "device/ascend/tasksink/runtime_utils.h"
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
#include "utils/utils.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
|
||||
using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>;
|
||||
using ge::model_runner::HcclTaskInfo;
|
||||
|
@ -146,10 +147,12 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu
|
|||
<< ", root_id=" << root_id_ << ", op_type=" << static_cast<int>(op_type_)
|
||||
<< ", data_type=" << static_cast<int>(data_type);
|
||||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
HcclTaskInfoPtr task_info_ptr = std::make_shared<HcclTaskInfo>(
|
||||
stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, private_def, nullptr,
|
||||
hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, RuntimeUtils::HcomUnbindModel,
|
||||
RuntimeUtils::HcomDistribute);
|
||||
kernel_name_, stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0,
|
||||
private_def, nullptr, hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel,
|
||||
RuntimeUtils::HcomUnbindModel, RuntimeUtils::HcomDistribute, NeedDump());
|
||||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
return {task_info_ptr};
|
||||
}
|
||||
|
|
|
@ -129,6 +129,10 @@ class KernelMod {
|
|||
virtual std::vector<size_t> GenParameters() { return {}; }
|
||||
|
||||
virtual ~KernelMod() = default;
|
||||
void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; }
|
||||
|
||||
protected:
|
||||
std::string kernel_name_;
|
||||
};
|
||||
using KernelModPtr = std::shared_ptr<KernelMod>;
|
||||
} // namespace kernel
|
||||
|
|
|
@ -58,8 +58,9 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in
|
|||
}
|
||||
stream_id_ = stream_id;
|
||||
|
||||
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
|
||||
stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
|
||||
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
|
||||
std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr,
|
||||
inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false);
|
||||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
return {task_info_ptr};
|
||||
}
|
||||
|
|
|
@ -55,7 +55,8 @@ std::vector<TaskInfoPtr> LabelGotoKernel::GenTask(const std::vector<AddressPtr>
|
|||
const std::vector<AddressPtr> &, uint32_t stream_id) {
|
||||
MS_LOG(INFO) << "LabelGotoKernel GenTask label:" << label_ << ", stream id:" << stream_id;
|
||||
std::vector<TaskInfoPtr> task_info_list;
|
||||
std::shared_ptr<LabelGotoTaskInfo> task_info_ptr = std::make_shared<LabelGotoTaskInfo>(stream_id, label_);
|
||||
std::shared_ptr<LabelGotoTaskInfo> task_info_ptr =
|
||||
std::make_shared<LabelGotoTaskInfo>(kernel_name_, stream_id, label_);
|
||||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
task_info_list.emplace_back(task_info_ptr);
|
||||
return task_info_list;
|
||||
|
|
|
@ -55,7 +55,7 @@ std::vector<TaskInfoPtr> LabelSetKernel::GenTask(const std::vector<AddressPtr> &
|
|||
const std::vector<AddressPtr> &, uint32_t stream_id) {
|
||||
MS_LOG(INFO) << "LabelSetKernel GenTask label:" << label_ << ", stream id:" << stream_id;
|
||||
std::vector<TaskInfoPtr> task_info_list;
|
||||
std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(stream_id, label_);
|
||||
std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(kernel_name_, stream_id, label_);
|
||||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
task_info_list.emplace_back(task_info_ptr);
|
||||
return task_info_list;
|
||||
|
|
|
@ -67,7 +67,7 @@ std::vector<TaskInfoPtr> LabelSwitchKernel::GenTask(const std::vector<AddressPtr
|
|||
MS_LOG(INFO) << "LabelSwitchKernel GenTask label size:" << label_size_ << ", stream id:" << stream_id;
|
||||
std::vector<TaskInfoPtr> task_info_list;
|
||||
cond_ = inputs[0]->addr;
|
||||
auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(stream_id, label_size_, label_list_, cond_);
|
||||
auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(kernel_name_, stream_id, label_size_, label_list_, cond_);
|
||||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
task_info_list.emplace_back(task_info_ptr);
|
||||
return task_info_list;
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "common/utils.h"
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
#include "common/trans.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
|
||||
using ge::model_runner::MemcpyAsyncTaskInfo;
|
||||
using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>;
|
||||
|
@ -118,8 +119,9 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr
|
|||
}
|
||||
|
||||
stream_id_ = stream_id;
|
||||
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
|
||||
stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
|
||||
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
|
||||
std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr,
|
||||
inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
|
||||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
return {task_info_ptr};
|
||||
}
|
||||
|
|
|
@ -63,7 +63,7 @@ std::vector<TaskInfoPtr> ProfilingKernelMod::GenTask(const std::vector<AddressPt
|
|||
<< ", outputs size:" << outputs.size();
|
||||
stream_id_ = stream_id;
|
||||
std::shared_ptr<ProfilerTraceTaskInfo> task_info_ptr =
|
||||
std::make_shared<ProfilerTraceTaskInfo>(stream_id, log_id_, notify_, flags_);
|
||||
std::make_shared<ProfilerTraceTaskInfo>(kernel_name_, stream_id, log_id_, notify_, flags_);
|
||||
return {task_info_ptr};
|
||||
}
|
||||
} // namespace kernel
|
||||
|
|
|
@ -60,7 +60,7 @@ std::vector<TaskInfoPtr> RecvKernel::GenTask(const std::vector<AddressPtr> &, co
|
|||
const std::vector<AddressPtr> &, uint32_t stream_id) {
|
||||
MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id;
|
||||
stream_id_ = stream_id;
|
||||
EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(stream_id, event_id_);
|
||||
EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(kernel_name_, stream_id, event_id_);
|
||||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
return {task_info_ptr};
|
||||
}
|
||||
|
|
|
@ -57,7 +57,7 @@ std::vector<TaskInfoPtr> SendKernel::GenTask(const std::vector<AddressPtr> &, co
|
|||
const std::vector<AddressPtr> &, uint32_t stream_id) {
|
||||
MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id;
|
||||
stream_id_ = stream_id;
|
||||
EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(stream_id, event_id_);
|
||||
EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(kernel_name_, stream_id, event_id_);
|
||||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
return {task_info_ptr};
|
||||
}
|
||||
|
|
|
@ -72,7 +72,8 @@ std::vector<TaskInfoPtr> StreamActiveKernel::GenTask(const std::vector<AddressPt
|
|||
stream_id_ = stream_id;
|
||||
std::vector<TaskInfoPtr> task_info_list;
|
||||
for (auto &index : active_streams_index_) {
|
||||
std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = std::make_shared<StreamActiveTaskInfo>(stream_id, index);
|
||||
std::shared_ptr<StreamActiveTaskInfo> task_info_ptr =
|
||||
std::make_shared<StreamActiveTaskInfo>(kernel_name_, stream_id, index);
|
||||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
task_info_list.emplace_back(task_info_ptr);
|
||||
MS_LOG(INFO) << "StreamActiveKernel GenTask: streamId:" << stream_id << ", Active streamId:" << index;
|
||||
|
|
|
@ -91,8 +91,8 @@ std::vector<TaskInfoPtr> StreamSwitchKernel::GenTask(const std::vector<AddressPt
|
|||
auto ites_per_loop = inputs[1]->addr;
|
||||
MS_LOG(INFO) << "cond_:" << static_cast<int>(cond_) << ", true_stream_index_:" << true_stream_index_
|
||||
<< ", stream_id:" << stream_id;
|
||||
std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr =
|
||||
std::make_shared<StreamSwitchTaskInfo>(stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
|
||||
std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr = std::make_shared<StreamSwitchTaskInfo>(
|
||||
kernel_name_, stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
|
||||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
return {task_info_ptr};
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#include "kernel/tbe/tbe_kernel_mod.h"
|
||||
#include <algorithm>
|
||||
#include "runtime/rt.h"
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "utils/context/ms_context.h"
|
||||
#include "graphengine/inc/framework/ge_runtime/task_info.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -99,9 +99,9 @@ std::vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &in
|
|||
|
||||
MS_LOG(INFO) << "block_dim is:" << block_dim_;
|
||||
|
||||
TbeTaskInfoPtr task_info_ptr =
|
||||
make_shared<ge::model_runner::TbeTaskInfo>(stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0,
|
||||
meta_data, input_data_addrs, output_data_addrs, workspace_addrs);
|
||||
TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
|
||||
kernel_name_, stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, meta_data, input_data_addrs,
|
||||
output_data_addrs, workspace_addrs, NeedDump());
|
||||
return {task_info_ptr};
|
||||
}
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ namespace session {
|
|||
using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>;
|
||||
class KernelGraph : public FuncGraph {
|
||||
public:
|
||||
KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false) {
|
||||
KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false), current_epoch_(0) {
|
||||
inputs_ = std::make_shared<std::vector<AnfNodePtr>>();
|
||||
execution_order_ = {};
|
||||
executable_ = true;
|
||||
|
@ -154,6 +154,8 @@ class KernelGraph : public FuncGraph {
|
|||
AnfNodePtr GetFrontNodeByInternalOutput(const AnfNodePtr &node) const;
|
||||
void AddFinalOutputKernel(const AnfNodePtr &node);
|
||||
bool IsFinalOutputKernel(const AnfNodePtr &node) const;
|
||||
uint32_t current_epoch() const { return current_epoch_; }
|
||||
void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; }
|
||||
|
||||
private:
|
||||
// remove value node form graph
|
||||
|
@ -216,6 +218,7 @@ class KernelGraph : public FuncGraph {
|
|||
std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
|
||||
std::unordered_map<AnfNodePtr, AnfNodePtr> internal_outputs_to_front_map_;
|
||||
std::set<AnfNodePtr> final_output_kernels_;
|
||||
uint32_t current_epoch_;
|
||||
};
|
||||
} // namespace session
|
||||
using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
|
||||
|
|
|
@ -187,6 +187,18 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<KernelGraph> &graph, std::vecto
|
|||
// set loop_count to zero
|
||||
MS_EXCEPTION_IF_NULL(inputs);
|
||||
inputs->push_back(tensor);
|
||||
|
||||
auto epoch_tensor = (*inputs_params)[1];
|
||||
MS_EXCEPTION_IF_NULL(epoch_tensor);
|
||||
auto *epoch_val = static_cast<int32_t *>(epoch_tensor->data_c());
|
||||
MS_EXCEPTION_IF_NULL(epoch_val);
|
||||
*epoch_val = graph->current_epoch();
|
||||
epoch_tensor->set_dirty(true);
|
||||
inputs->push_back(epoch_tensor);
|
||||
MS_LOG(INFO) << "Load epoch_val:" << *epoch_val;
|
||||
|
||||
graph->set_current_epoch(graph->current_epoch() + 1);
|
||||
|
||||
return inputs_params->size();
|
||||
}
|
||||
|
||||
|
@ -814,13 +826,13 @@ void SessionBasic::AddParameterToGraphInputs(const std::vector<AnfNodePtr> ¶
|
|||
void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
|
||||
const std::vector<tensor::TensorPtr> &inputs_const) const {
|
||||
std::vector<tensor::TensorPtr> inputs(inputs_const);
|
||||
size_t input_ctrl_size = 1;
|
||||
size_t input_ctrl_size = 2;
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
if (kernel_graph->input_ctrl_tensors()) {
|
||||
input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs);
|
||||
}
|
||||
auto input_nodes = kernel_graph->inputs();
|
||||
if ((inputs.size() + input_ctrl_size) - 1 != input_nodes.size()) {
|
||||
if ((inputs.size() + input_ctrl_size) - 2 != input_nodes.size()) {
|
||||
MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size()
|
||||
<< ", input_ctrl_size:" << input_ctrl_size;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,8 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint
|
|||
|
||||
bool ModelRunner::UnloadModel(uint32_t model_id) { return true; }
|
||||
|
||||
bool ModelRunner::LoadModelComplete(uint32_t model_id) { return true; }
|
||||
|
||||
bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, ge::OutputData *output_data) {
|
||||
return true;
|
||||
}
|
||||
|
@ -45,6 +47,11 @@ const std::vector<uint32_t> &ModelRunner::GetStreamIdList(uint32_t model_id) con
|
|||
static std::vector<uint32_t> stream_id_list;
|
||||
return stream_id_list;
|
||||
}
|
||||
|
||||
const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const {
|
||||
static std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map;
|
||||
return runtime_info_map;
|
||||
}
|
||||
} // namespace model_runner
|
||||
} // namespace ge
|
||||
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
*/
|
||||
#include "device/ascend/ascend_stream_assign.h"
|
||||
#include "device/ascend/ascend_label_assign.h"
|
||||
#include "device/ascend/tasksink/task_generator.h"
|
||||
#include "device/kernel_adjust.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -31,13 +30,6 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
|
|||
void AscendStreamAssign::GetWaitStreams(vector<uint32_t> *wait_active_stream_list) { return; }
|
||||
|
||||
void AscendStreamAssign::GetHcomStreams(std::vector<uint32_t> *streams) { return; }
|
||||
|
||||
namespace tasksink {
|
||||
bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
|
||||
uint32_t graph_id) {
|
||||
return true;
|
||||
}
|
||||
} // namespace tasksink
|
||||
} // namespace ascend
|
||||
void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }
|
||||
bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return true; }
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "device/ascend/tasksink/task_generator.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
namespace tasksink {
|
||||
bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
|
||||
uint32_t graph_id) {
|
||||
return true;
|
||||
}
|
||||
} // namespace tasksink
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
Loading…
Reference in New Issue