forked from mindspore-Ecosystem/mindspore
refactor callback for ge backend
This commit is contained in:
parent
7a367af9c6
commit
3202fc0df9
|
@ -87,7 +87,22 @@ ms_build_flatbuffers("${FLATBUFFER_IN}" "${FLATBUFFER_IN}" GENERATED_OUTPUT_DIR
|
||||||
file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
"ir/*.cc"
|
"ir/*.cc"
|
||||||
"ir/dtype/*.cc"
|
"ir/dtype/*.cc"
|
||||||
"utils/*.cc"
|
"utils/context/ms_context.cc"
|
||||||
|
"utils/symbolic.cc"
|
||||||
|
"utils/tensorprint_utils.cc"
|
||||||
|
"utils/convert_utils.cc"
|
||||||
|
"utils/graph_utils.cc"
|
||||||
|
"utils/misc.cc"
|
||||||
|
"utils/callbacks.cc"
|
||||||
|
"utils/profile.cc"
|
||||||
|
"utils/base_ref.cc"
|
||||||
|
"utils/summary/event_writer.cc"
|
||||||
|
"utils/log_adapter.cc"
|
||||||
|
"utils/comm_manager.cc"
|
||||||
|
"utils/any.cc"
|
||||||
|
"utils/config_manager.cc"
|
||||||
|
"utils/system/file_system.cc"
|
||||||
|
"utils/system/crc32c.cc"
|
||||||
"common/*.cc"
|
"common/*.cc"
|
||||||
"parallel/*.cc"
|
"parallel/*.cc"
|
||||||
"pipeline/pipeline.cc"
|
"pipeline/pipeline.cc"
|
||||||
|
@ -173,6 +188,7 @@ if(ENABLE_GE)
|
||||||
file(GLOB_RECURSE GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
file(GLOB_RECURSE GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
"transform/*.cc"
|
"transform/*.cc"
|
||||||
"pynative/pynative_execute_ge.cc"
|
"pynative/pynative_execute_ge.cc"
|
||||||
|
"utils/callbacks_ge.cc"
|
||||||
"pipeline/pipeline_ge.cc"
|
"pipeline/pipeline_ge.cc"
|
||||||
)
|
)
|
||||||
list(APPEND MINDSPORE_SRC_LIST ${GE_SRC_LIST})
|
list(APPEND MINDSPORE_SRC_LIST ${GE_SRC_LIST})
|
||||||
|
|
|
@ -616,7 +616,6 @@ py::object ExecutorPy::Run(const py::tuple& args, const py::object& phase) {
|
||||||
return ExecDFGraph(info_, args, phase_s);
|
return ExecDFGraph(info_, args, phase_s);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
MS_LOG(WARNING) << "In ut test " << size << phase_s;
|
|
||||||
if (backend == "ge") {
|
if (backend == "ge") {
|
||||||
std::shared_ptr<py::object> ret_val = std::make_shared<py::object>();
|
std::shared_ptr<py::object> ret_val = std::make_shared<py::object>();
|
||||||
if (info_.count(phase_s) != 0 && info_[phase_s]->func_graph != nullptr) {
|
if (info_.count(phase_s) != 0 && info_[phase_s]->func_graph != nullptr) {
|
||||||
|
|
|
@ -24,6 +24,9 @@
|
||||||
#include "utils/callbacks.h"
|
#include "utils/callbacks.h"
|
||||||
#include "utils/utils.h"
|
#include "utils/utils.h"
|
||||||
#include "./common.h"
|
#include "./common.h"
|
||||||
|
#ifdef ENABLE_GE
|
||||||
|
#include "utils/callbacks_ge.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef NO_GE_CLIENT
|
#ifdef NO_GE_CLIENT
|
||||||
namespace ge {
|
namespace ge {
|
||||||
|
|
|
@ -20,10 +20,6 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "pybind11/pybind11.h"
|
#include "pybind11/pybind11.h"
|
||||||
#ifdef ENABLE_GE
|
|
||||||
#include "transform/df_graph_manager.h"
|
|
||||||
#include "transform/util.h"
|
|
||||||
#endif
|
|
||||||
#include "pipeline/parse/data_converter.h"
|
#include "pipeline/parse/data_converter.h"
|
||||||
#include "pipeline/parse/python_adapter.h"
|
#include "pipeline/parse/python_adapter.h"
|
||||||
#include "utils/visible.h"
|
#include "utils/visible.h"
|
||||||
|
@ -38,155 +34,6 @@ const char kSummary[] = "Summary";
|
||||||
const char kCheckPoint[] = "Save";
|
const char kCheckPoint[] = "Save";
|
||||||
const int ONE_SHAPE = 1;
|
const int ONE_SHAPE = 1;
|
||||||
|
|
||||||
#ifdef ENABLE_GE
|
|
||||||
using mindspore::transform::Status;
|
|
||||||
using mindspore::transform::TransformUtil;
|
|
||||||
|
|
||||||
bool GetParameterShape(const FuncGraphPtr& graph, const std::string& param_name,
|
|
||||||
const std::shared_ptr<std::vector<int>>& shape) {
|
|
||||||
if (graph == nullptr) {
|
|
||||||
MS_LOG(ERROR) << "Graph is null, can not get graph parameter";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto parameter_nodes = graph->parameters();
|
|
||||||
for (auto& node : parameter_nodes) {
|
|
||||||
ParameterPtr param_node = std::static_pointer_cast<Parameter>(node);
|
|
||||||
if (param_node == nullptr) {
|
|
||||||
MS_LOG(ERROR) << "Parameter node is null, can not get graph parameter";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (param_node->name() == param_name) {
|
|
||||||
py::object parameter = param_node->default_param();
|
|
||||||
ValuePtr value = parse::data_converter::PyDataToValue(parameter);
|
|
||||||
TensorPtr tensor = std::dynamic_pointer_cast<tensor::Tensor>(value);
|
|
||||||
if (tensor == nullptr) {
|
|
||||||
shape->push_back(ONE_SHAPE);
|
|
||||||
} else {
|
|
||||||
*shape = tensor->shape();
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
MS_LOG(ERROR) << "Can not find parameter of name:" << param_name;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static TensorPtr GetMeTensorTransformed(uint32_t graph_id, const std::string& parameter_name,
|
|
||||||
const std::shared_ptr<ge::Tensor>& ge_tensor_ptr) {
|
|
||||||
FuncGraphPtr anf_graph = transform::DfGraphManager::GetInstance().GetAnfGraph(graph_id);
|
|
||||||
if (anf_graph == nullptr) {
|
|
||||||
MS_LOG(ERROR) << "Get anf graph failed during callback";
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::shared_ptr<std::vector<int>> parameter_shape_ptr = std::make_shared<std::vector<int>>();
|
|
||||||
if (!GetParameterShape(anf_graph, parameter_name, parameter_shape_ptr)) {
|
|
||||||
MS_LOG(ERROR) << "Can not get parameter shape during callback";
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return TransformUtil::ConvertGeTensor(ge_tensor_ptr, *parameter_shape_ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t CheckpointSaveCallback(uint32_t graph_id, const std::map<std::string, ge::Tensor>& params_list) {
|
|
||||||
// Acquire GIL before calling Python code
|
|
||||||
py::gil_scoped_acquire acquire;
|
|
||||||
|
|
||||||
MS_LOG(DEBUG) << "Start the checkpoint save callback function in checkpoint save process.";
|
|
||||||
py::list parameter_list = py::list();
|
|
||||||
for (auto& item : params_list) {
|
|
||||||
std::string name = item.first;
|
|
||||||
std::shared_ptr<ge::Tensor> ge_tensor_ptr = std::make_shared<ge::Tensor>(item.second);
|
|
||||||
TensorPtr tensor_ptr = GetMeTensorTransformed(graph_id, name, ge_tensor_ptr);
|
|
||||||
if (tensor_ptr == nullptr) {
|
|
||||||
MS_LOG(EXCEPTION) << "Transform ge tensor to me tensor failed";
|
|
||||||
}
|
|
||||||
py::dict param_dict;
|
|
||||||
param_dict["name"] = name;
|
|
||||||
param_dict["data"] = tensor_ptr;
|
|
||||||
parameter_list.append(param_dict);
|
|
||||||
}
|
|
||||||
py::bool_ ret =
|
|
||||||
parse::python_adapter::CallPyFn(PYTHON_MOD_CALLBACK_MODULE, PYTHON_FUN_PROCESS_CHECKPOINT, parameter_list);
|
|
||||||
auto bool_ret = py::cast<bool>(ret);
|
|
||||||
|
|
||||||
uint32_t status = Status::SUCCESS;
|
|
||||||
if (!bool_ret) {
|
|
||||||
status = Status::FAILED;
|
|
||||||
MS_LOG(ERROR) << "python checkpoint return false during callback";
|
|
||||||
}
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
static TensorPtr GetMeTensorForSummary(const std::string& name, const std::shared_ptr<ge::Tensor>& ge_tensor_ptr) {
|
|
||||||
// confirm the type by name
|
|
||||||
// Format: xxx[:Scalar] xxx[:Image] xxx[:Tensor]
|
|
||||||
if (name.empty()) {
|
|
||||||
MS_LOG(EXCEPTION) << "The summary name is empty.";
|
|
||||||
}
|
|
||||||
auto bpos = name.rfind("[:");
|
|
||||||
if (bpos >= name.size()) {
|
|
||||||
MS_LOG(EXCEPTION) << "The summary name(" << name << ") is invalid.";
|
|
||||||
}
|
|
||||||
auto tname = name.substr(bpos);
|
|
||||||
if (tname == "[:Scalar]") {
|
|
||||||
MS_LOG(DEBUG) << "The summary(" << name << ") is Scalar";
|
|
||||||
// process the scalar type summary
|
|
||||||
// Because the ge tensor is dim = 4, so set the (1,1,1,1)-->(1,)
|
|
||||||
// We do the (1,) shape is scalar
|
|
||||||
auto shape = std::vector<int>({ONE_SHAPE});
|
|
||||||
return TransformUtil::ConvertGeTensor(ge_tensor_ptr, shape);
|
|
||||||
}
|
|
||||||
if (tname == "[:Tensor]") {
|
|
||||||
MS_LOG(DEBUG) << "The summary(" << name << ") is Tensor";
|
|
||||||
// process the tensor summary
|
|
||||||
// Now we can't get the real shape, so we keep same shape with GE
|
|
||||||
return TransformUtil::ConvertGeTensor(ge_tensor_ptr);
|
|
||||||
}
|
|
||||||
if (tname == "[:Image]") {
|
|
||||||
MS_LOG(DEBUG) << "The summary(" << name << ") is Image";
|
|
||||||
// process the Image summary
|
|
||||||
// Image dim = 4, is same with ge, so we keep same shape with GE
|
|
||||||
return TransformUtil::ConvertGeTensor(ge_tensor_ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
MS_LOG(EXCEPTION) << "The summary name(" << name << ") is invalid.";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cache the summary callback data
|
|
||||||
// Output Format: [{"name": tag_name, "data": tensor}, {"name": tag_name, "data": tensor},...]
|
|
||||||
uint32_t MS_EXPORT SummarySaveCallback(uint32_t graph_id, const std::map<std::string, ge::Tensor>& params_list) {
|
|
||||||
// Acquire GIL before calling Python code
|
|
||||||
py::gil_scoped_acquire acquire;
|
|
||||||
|
|
||||||
MS_LOG(DEBUG) << "Start the summary save callback function for graph " << graph_id << ".";
|
|
||||||
py::list summary_list = py::list();
|
|
||||||
MS_LOG(DEBUG) << "Param list size = " << params_list.size();
|
|
||||||
for (auto& item : params_list) {
|
|
||||||
std::string tag_name = item.first;
|
|
||||||
std::shared_ptr<ge::Tensor> ge_tensor_ptr = std::make_shared<ge::Tensor>(item.second);
|
|
||||||
TensorPtr tensor_ptr = GetMeTensorForSummary(tag_name, ge_tensor_ptr);
|
|
||||||
if (tensor_ptr == nullptr) {
|
|
||||||
MS_LOG(EXCEPTION) << "ConvertGeTensor return tensor is null";
|
|
||||||
}
|
|
||||||
py::dict summary_value_dict;
|
|
||||||
summary_value_dict["name"] = tag_name;
|
|
||||||
summary_value_dict["data"] = tensor_ptr;
|
|
||||||
summary_list.append(summary_value_dict);
|
|
||||||
}
|
|
||||||
|
|
||||||
py::bool_ ret = parse::python_adapter::CallPyFn(PYTHON_MOD_CALLBACK_MODULE, PYTHON_FUN_PROCESS_SUMMARY, summary_list);
|
|
||||||
auto bool_ret = py::cast<bool>(ret);
|
|
||||||
if (!bool_ret) {
|
|
||||||
MS_LOG(ERROR) << "Python checkpoint return false during callback";
|
|
||||||
return Status::FAILED;
|
|
||||||
}
|
|
||||||
MS_LOG(DEBUG) << "End the summary save callback function.";
|
|
||||||
return Status::SUCCESS;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Cache the summary callback data from ME session
|
// Cache the summary callback data from ME session
|
||||||
// Remove the GE module on new architecture
|
// Remove the GE module on new architecture
|
||||||
// Output Format: [{"name": tag_name, "data": tensor}, {"name": tag_name, "data": tensor},...]
|
// Output Format: [{"name": tag_name, "data": tensor}, {"name": tag_name, "data": tensor},...]
|
||||||
|
|
|
@ -21,10 +21,6 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include "ir/meta_tensor.h"
|
#include "ir/meta_tensor.h"
|
||||||
#ifdef ENABLE_GE
|
|
||||||
#include "transform/types.h"
|
|
||||||
#include "transform/util.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace callbacks {
|
namespace callbacks {
|
||||||
|
@ -45,10 +41,6 @@ const int kCallbackFalied = 1;
|
||||||
|
|
||||||
bool GetParameterShape(const FuncGraphPtr& anf_graph, const std::string& param_name,
|
bool GetParameterShape(const FuncGraphPtr& anf_graph, const std::string& param_name,
|
||||||
const std::shared_ptr<std::vector<int>>& shape);
|
const std::shared_ptr<std::vector<int>>& shape);
|
||||||
#ifdef ENABLE_GE
|
|
||||||
uint32_t CheckpointSaveCallback(uint32_t, const std::map<std::string, ge::Tensor>&);
|
|
||||||
uint32_t SummarySaveCallback(uint32_t, const std::map<std::string, ge::Tensor>&);
|
|
||||||
#endif
|
|
||||||
uint32_t SummarySaveCallback(uint32_t, const std::map<std::string, TensorPtr>&);
|
uint32_t SummarySaveCallback(uint32_t, const std::map<std::string, TensorPtr>&);
|
||||||
|
|
||||||
} // namespace callbacks
|
} // namespace callbacks
|
||||||
|
|
|
@ -0,0 +1,182 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "utils/callbacks_ge.h"
|
||||||
|
#include "pybind11/pybind11.h"
|
||||||
|
#include "transform/df_graph_manager.h"
|
||||||
|
#include "transform/util.h"
|
||||||
|
#include "pipeline/parse/data_converter.h"
|
||||||
|
#include "pipeline/parse/python_adapter.h"
|
||||||
|
#include "utils/visible.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace callbacks {
|
||||||
|
|
||||||
|
const char PYTHON_MOD_CALLBACK_MODULE[] = "mindspore.train.callback";
|
||||||
|
const char PYTHON_FUN_PROCESS_CHECKPOINT[] = "_checkpoint_cb_for_save_op";
|
||||||
|
const char PYTHON_FUN_PROCESS_SUMMARY[] = "_summary_cb_for_save_op";
|
||||||
|
const char kSummary[] = "Summary";
|
||||||
|
const char kCheckPoint[] = "Save";
|
||||||
|
const int ONE_SHAPE = 1;
|
||||||
|
|
||||||
|
using mindspore::transform::Status;
|
||||||
|
using mindspore::transform::TransformUtil;
|
||||||
|
|
||||||
|
bool GetParameterShape(const FuncGraphPtr& graph, const std::string& param_name,
|
||||||
|
const std::shared_ptr<std::vector<int>>& shape) {
|
||||||
|
if (graph == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Graph is null, can not get graph parameter";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto parameter_nodes = graph->parameters();
|
||||||
|
for (auto& node : parameter_nodes) {
|
||||||
|
ParameterPtr param_node = std::static_pointer_cast<Parameter>(node);
|
||||||
|
if (param_node == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Parameter node is null, can not get graph parameter";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (param_node->name() == param_name) {
|
||||||
|
py::object parameter = param_node->default_param();
|
||||||
|
ValuePtr value = parse::data_converter::PyDataToValue(parameter);
|
||||||
|
TensorPtr tensor = std::dynamic_pointer_cast<tensor::Tensor>(value);
|
||||||
|
if (tensor == nullptr) {
|
||||||
|
shape->push_back(ONE_SHAPE);
|
||||||
|
} else {
|
||||||
|
*shape = tensor->shape();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MS_LOG(ERROR) << "Can not find parameter of name:" << param_name;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static TensorPtr GetMeTensorTransformed(uint32_t graph_id, const std::string& parameter_name,
|
||||||
|
const std::shared_ptr<ge::Tensor>& ge_tensor_ptr) {
|
||||||
|
FuncGraphPtr anf_graph = transform::DfGraphManager::GetInstance().GetAnfGraph(graph_id);
|
||||||
|
if (anf_graph == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Get anf graph failed during callback";
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<std::vector<int>> parameter_shape_ptr = std::make_shared<std::vector<int>>();
|
||||||
|
if (!GetParameterShape(anf_graph, parameter_name, parameter_shape_ptr)) {
|
||||||
|
MS_LOG(ERROR) << "Can not get parameter shape during callback";
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TransformUtil::ConvertGeTensor(ge_tensor_ptr, *parameter_shape_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t CheckpointSaveCallback(uint32_t graph_id, const std::map<std::string, ge::Tensor>& params_list) {
|
||||||
|
// Acquire GIL before calling Python code
|
||||||
|
py::gil_scoped_acquire acquire;
|
||||||
|
|
||||||
|
MS_LOG(DEBUG) << "Start the checkpoint save callback function in checkpoint save process.";
|
||||||
|
py::list parameter_list = py::list();
|
||||||
|
for (auto& item : params_list) {
|
||||||
|
std::string name = item.first;
|
||||||
|
std::shared_ptr<ge::Tensor> ge_tensor_ptr = std::make_shared<ge::Tensor>(item.second);
|
||||||
|
TensorPtr tensor_ptr = GetMeTensorTransformed(graph_id, name, ge_tensor_ptr);
|
||||||
|
if (tensor_ptr == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Transform ge tensor to me tensor failed";
|
||||||
|
}
|
||||||
|
py::dict param_dict;
|
||||||
|
param_dict["name"] = name;
|
||||||
|
param_dict["data"] = tensor_ptr;
|
||||||
|
parameter_list.append(param_dict);
|
||||||
|
}
|
||||||
|
py::bool_ ret =
|
||||||
|
parse::python_adapter::CallPyFn(PYTHON_MOD_CALLBACK_MODULE, PYTHON_FUN_PROCESS_CHECKPOINT, parameter_list);
|
||||||
|
auto bool_ret = py::cast<bool>(ret);
|
||||||
|
|
||||||
|
uint32_t status = Status::SUCCESS;
|
||||||
|
if (!bool_ret) {
|
||||||
|
status = Status::FAILED;
|
||||||
|
MS_LOG(ERROR) << "Python checkpoint return false during callback";
|
||||||
|
}
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
static TensorPtr GetMeTensorForSummary(const std::string& name, const std::shared_ptr<ge::Tensor>& ge_tensor_ptr) {
|
||||||
|
// confirm the type by name
|
||||||
|
// Format: xxx[:Scalar] xxx[:Image] xxx[:Tensor]
|
||||||
|
if (name.empty()) {
|
||||||
|
MS_LOG(EXCEPTION) << "The summary name is empty.";
|
||||||
|
}
|
||||||
|
auto bpos = name.rfind("[:");
|
||||||
|
if (bpos >= name.size()) {
|
||||||
|
MS_LOG(EXCEPTION) << "The summary name(" << name << ") is invalid.";
|
||||||
|
}
|
||||||
|
auto tname = name.substr(bpos);
|
||||||
|
if (tname == "[:Scalar]") {
|
||||||
|
MS_LOG(DEBUG) << "The summary(" << name << ") is Scalar";
|
||||||
|
// process the scalar type summary
|
||||||
|
// Because the ge tensor is dim = 4, so set the (1,1,1,1)-->(1,)
|
||||||
|
// We do the (1,) shape is scalar
|
||||||
|
auto shape = std::vector<int>({ONE_SHAPE});
|
||||||
|
return TransformUtil::ConvertGeTensor(ge_tensor_ptr, shape);
|
||||||
|
}
|
||||||
|
if (tname == "[:Tensor]") {
|
||||||
|
MS_LOG(DEBUG) << "The summary(" << name << ") is Tensor";
|
||||||
|
// process the tensor summary
|
||||||
|
// Now we can't get the real shape, so we keep same shape with GE
|
||||||
|
return TransformUtil::ConvertGeTensor(ge_tensor_ptr);
|
||||||
|
}
|
||||||
|
if (tname == "[:Image]") {
|
||||||
|
MS_LOG(DEBUG) << "The summary(" << name << ") is Image";
|
||||||
|
// process the Image summary
|
||||||
|
// Image dim = 4, is same with ge, so we keep same shape with GE
|
||||||
|
return TransformUtil::ConvertGeTensor(ge_tensor_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
MS_LOG(EXCEPTION) << "The summary name(" << name << ") is invalid.";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache the summary callback data
|
||||||
|
// Output Format: [{"name": tag_name, "data": tensor}, {"name": tag_name, "data": tensor},...]
|
||||||
|
uint32_t MS_EXPORT SummarySaveCallback(uint32_t graph_id, const std::map<std::string, ge::Tensor>& params_list) {
|
||||||
|
// Acquire GIL before calling Python code
|
||||||
|
py::gil_scoped_acquire acquire;
|
||||||
|
|
||||||
|
MS_LOG(DEBUG) << "Start the summary save callback function for graph " << graph_id << ".";
|
||||||
|
py::list summary_list = py::list();
|
||||||
|
MS_LOG(DEBUG) << "Param list size = " << params_list.size();
|
||||||
|
for (auto& item : params_list) {
|
||||||
|
std::string tag_name = item.first;
|
||||||
|
std::shared_ptr<ge::Tensor> ge_tensor_ptr = std::make_shared<ge::Tensor>(item.second);
|
||||||
|
TensorPtr tensor_ptr = GetMeTensorForSummary(tag_name, ge_tensor_ptr);
|
||||||
|
if (tensor_ptr == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "ConvertGeTensor return tensor is null";
|
||||||
|
}
|
||||||
|
py::dict summary_value_dict;
|
||||||
|
summary_value_dict["name"] = tag_name;
|
||||||
|
summary_value_dict["data"] = tensor_ptr;
|
||||||
|
summary_list.append(summary_value_dict);
|
||||||
|
}
|
||||||
|
|
||||||
|
py::bool_ ret = parse::python_adapter::CallPyFn(PYTHON_MOD_CALLBACK_MODULE, PYTHON_FUN_PROCESS_SUMMARY, summary_list);
|
||||||
|
auto bool_ret = py::cast<bool>(ret);
|
||||||
|
if (!bool_ret) {
|
||||||
|
MS_LOG(ERROR) << "Python checkpoint return false during callback";
|
||||||
|
return Status::FAILED;
|
||||||
|
}
|
||||||
|
MS_LOG(DEBUG) << "End the summary save callback function.";
|
||||||
|
return Status::SUCCESS;
|
||||||
|
}
|
||||||
|
} // namespace callbacks
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,38 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_UTILS_CALLBACKS_GE_H_
|
||||||
|
#define MINDSPORE_CCSRC_UTILS_CALLBACKS_GE_H_
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include "transform/types.h"
|
||||||
|
#include "transform/util.h"
|
||||||
|
#include "ir/meta_tensor.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace callbacks {
|
||||||
|
|
||||||
|
using mindspore::tensor::TensorPtr;
|
||||||
|
|
||||||
|
uint32_t CheckpointSaveCallback(uint32_t, const std::map<std::string, ge::Tensor>&);
|
||||||
|
uint32_t SummarySaveCallback(uint32_t, const std::map<std::string, ge::Tensor>&);
|
||||||
|
|
||||||
|
} // namespace callbacks
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_UTILS_CALLBACKS_GE_H_
|
|
@ -24,6 +24,9 @@
|
||||||
#include "utils/graph_utils.h"
|
#include "utils/graph_utils.h"
|
||||||
#include "session/session_factory.h"
|
#include "session/session_factory.h"
|
||||||
#include "common/utils.h"
|
#include "common/utils.h"
|
||||||
|
#ifdef ENABLE_GE
|
||||||
|
#include "utils/callbacks_ge.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace compile {
|
namespace compile {
|
||||||
|
|
|
@ -22,6 +22,9 @@
|
||||||
#include "pipeline/parse/python_adapter.h"
|
#include "pipeline/parse/python_adapter.h"
|
||||||
#include "transform/df_graph_manager.h"
|
#include "transform/df_graph_manager.h"
|
||||||
#include "debug/draw.h"
|
#include "debug/draw.h"
|
||||||
|
#ifdef ENABLE_GE
|
||||||
|
#include "utils/callbacks_ge.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace python_adapter = mindspore::parse::python_adapter;
|
namespace python_adapter = mindspore::parse::python_adapter;
|
||||||
|
|
Loading…
Reference in New Issue