forked from mindspore-Ecosystem/mindspore
!33991 [GeAdptor]Ge RunGraphAsync
Merge pull request !33991 from weiyang/rungraphasync
This commit is contained in:
commit
e195cba9a6
|
@ -51,6 +51,8 @@ class COMMON_EXPORT GraphRunner {
|
|||
~GraphRunner() { sess_ = nullptr; }
|
||||
Status RunGraph(const RunOptions &options, const std::vector<MeTensorPtr> &inputs, std::vector<MeTensorPtr> *outputs);
|
||||
Status RunGraph(const RunOptions &options, const std::vector<GeTensorPtr> &inputs, std::vector<GeTensorPtr> *outputs);
|
||||
Status RunGraph(const RunOptions &options, const std::vector<GeTensorPtr> &inputs, std::vector<MeTensorPtr> *outputs,
|
||||
const std::vector<TypeId> &me_types);
|
||||
static std::shared_ptr<ge::Session> NewSession(const SessionOptions &sess_options);
|
||||
|
||||
private:
|
||||
|
|
|
@ -105,6 +105,15 @@ class COMMON_EXPORT TransformUtil {
|
|||
* */
|
||||
static MeTensorPtr ConvertGeTensor(const GeTensorPtr &tensor);
|
||||
|
||||
/*
|
||||
* Parameters:
|
||||
* tensor: [GeTensor] the data tensor in GE
|
||||
* me_type: [TypeId] the type of created Me tensor
|
||||
* Return:
|
||||
* [MeTensor] the data tensor in ME
|
||||
* */
|
||||
static MeTensorPtr ConvertGeTensor(const GeTensorPtr &tensor, const TypeId &me_type);
|
||||
|
||||
/*
|
||||
* Parameters:
|
||||
* tensor: [GeTensor] the data tensor in GE
|
||||
|
|
|
@ -1539,10 +1539,10 @@ void InitPipeline() {
|
|||
// open tsd before ge initialize
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
(void)context::InitGe(ms_context);
|
||||
if (!context::OpenTsd(ms_context)) {
|
||||
MS_LOG(EXCEPTION) << "Open tsd failed";
|
||||
}
|
||||
(void)context::InitGe(ms_context);
|
||||
}
|
||||
|
||||
void FinalizeBackend() {
|
||||
|
|
|
@ -38,6 +38,7 @@ namespace pipeline {
|
|||
using Tensor = mindspore::tensor::Tensor;
|
||||
using MetaTensor = mindspore::tensor::MetaTensor;
|
||||
using TensorOrderMap = std::map<std::string, std::shared_ptr<Tensor>>;
|
||||
using mindspore::abstract::AbstractScalar;
|
||||
using mindspore::abstract::AbstractTensor;
|
||||
using mindspore::abstract::AbstractTuple;
|
||||
using mindspore::abstract::AbstractTuplePtr;
|
||||
|
@ -397,14 +398,36 @@ py::object StructureOutput(const AnfNodePtr &output_node, const py::tuple &data,
|
|||
return ExtractGeneralCnodeRet(output_c->abstract(), data, count);
|
||||
}
|
||||
|
||||
std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr &graph, const std::vector<MeTensorPtr> &inputs,
|
||||
const std::string &phase) {
|
||||
void GetMeRetDataType(const AbstractBasePtr &cnode_data, std::vector<TypeId> *me_types) {
|
||||
MS_EXCEPTION_IF_NULL(cnode_data);
|
||||
|
||||
if (cnode_data->isa<AbstractTensor>()) {
|
||||
TypeId me_type = cnode_data->BuildType()->type_id();
|
||||
if (me_type == kObjectTypeTensorType) {
|
||||
me_type = dyn_cast<TensorType>(cnode_data->BuildType())->element()->type_id();
|
||||
me_types->emplace_back(me_type);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (cnode_data->isa<AbstractScalar>()) {
|
||||
TypeId me_type = cnode_data->BuildType()->type_id();
|
||||
me_types->emplace_back(me_type);
|
||||
}
|
||||
auto abstract_tuple = cnode_data->cast<AbstractTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(abstract_tuple);
|
||||
auto elements = abstract_tuple->elements();
|
||||
for (size_t i = 0; i < abstract_tuple->size(); ++i) {
|
||||
GetMeRetDataType(elements[i], me_types);
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<py::object> DoExecGraphAsync(const FuncGraphPtr &graph, const std::vector<MeTensorPtr> &inputs,
|
||||
const std::string &phase) {
|
||||
std::vector<GeTensorPtr> ge_tensors = TransformUtil::ConvertInputTensors(inputs, kOpFormat_NCHW);
|
||||
if (ge_tensors.size() != inputs.size()) {
|
||||
MS_LOG(EXCEPTION) << "Convert me args to ge tensor error.";
|
||||
}
|
||||
|
||||
std::vector<GeTensorPtr> ge_outputs;
|
||||
transform::RunOptions run_options;
|
||||
run_options.name = phase;
|
||||
auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();
|
||||
|
@ -412,20 +435,31 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr &graph, const std::ve
|
|||
MS_LOG(EXCEPTION) << "Can not found GraphRunner.";
|
||||
}
|
||||
|
||||
AnfNodePtr output_node = graph->get_return()->input(1);
|
||||
MS_EXCEPTION_IF_NULL(output_node);
|
||||
std::vector<TypeId> me_types;
|
||||
auto output_c = output_node->cast<CNodePtr>()->abstract();
|
||||
// get output node data types
|
||||
GetMeRetDataType(output_c, &me_types);
|
||||
|
||||
std::vector<MeTensorPtr> me_outputs;
|
||||
{
|
||||
// Release GIL before calling into (potentially long-running) C++ code
|
||||
py::gil_scoped_release release;
|
||||
MS_LOG(DEBUG) << "Run graph begin, inputs size is: " << inputs.size();
|
||||
Status ret = graph_runner->RunGraph(run_options, ge_tensors, &ge_outputs);
|
||||
MS_LOG(DEBUG) << "Run graph finish, outputs size is: " << ge_outputs.size();
|
||||
if (ret != Status::SUCCESS) {
|
||||
MS_LOG(ERROR) << "Exec graph failed";
|
||||
return nullptr;
|
||||
try {
|
||||
Status ret = graph_runner->RunGraph(run_options, ge_tensors, &me_outputs, me_types);
|
||||
MS_LOG(DEBUG) << "Run graph finish, outputs size is: " << me_outputs.size();
|
||||
if (ret != Status::SUCCESS) {
|
||||
MS_LOG(ERROR) << "Exec graph failed";
|
||||
return nullptr;
|
||||
}
|
||||
} catch (const std::exception &ex) {
|
||||
throw(ex);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<MeTensorPtr> me_outputs = TransformUtil::ConvertGeTensors(ge_outputs);
|
||||
if (me_outputs.size() != ge_outputs.size()) {
|
||||
if (me_outputs.size() != me_types.size()) {
|
||||
MS_LOG(WARNING) << "Convert output Ge tensor to Me tensor failed";
|
||||
}
|
||||
|
||||
|
@ -435,9 +469,6 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr &graph, const std::ve
|
|||
}
|
||||
|
||||
std::shared_ptr<py::object> ret = nullptr;
|
||||
|
||||
AnfNodePtr output_node = graph->get_return()->input(1);
|
||||
MS_EXCEPTION_IF_NULL(output_node);
|
||||
size_t count = 0;
|
||||
py::object oj = StructureOutput(output_node, outputs, &count);
|
||||
ret = std::make_shared<py::object>(oj);
|
||||
|
@ -501,7 +532,7 @@ py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const
|
|||
std::vector<tensor::TensorPtr> inputs;
|
||||
ProcessGeArg(info, args, phase, &inputs);
|
||||
|
||||
std::shared_ptr<py::object> ret = DoExecGraph(anf_graph, inputs, phase);
|
||||
std::shared_ptr<py::object> ret = DoExecGraphAsync(anf_graph, inputs, phase);
|
||||
ConfigManager::GetInstance().ResetConfig();
|
||||
if (ret != nullptr) {
|
||||
return *ret;
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "include/common/utils/callbacks.h"
|
||||
#ifdef ENABLE_D
|
||||
#include "include/common/utils/callbacks_ge.h"
|
||||
#include "common/ge_inner_error_codes.h"
|
||||
#endif
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
|
@ -169,6 +170,98 @@ Status GraphRunner::RunGraph(const RunOptions &options, const std::vector<GeTens
|
|||
return Status::SUCCESS;
|
||||
}
|
||||
|
||||
Status GraphRunner::RunGraph(const RunOptions &options, const std::vector<GeTensorPtr> &inputs,
|
||||
std::vector<MeTensorPtr> *outputs, const std::vector<TypeId> &me_types) {
|
||||
std::string name = options.name;
|
||||
if (name.empty()) {
|
||||
MS_LOG(ERROR) << "The graph name is null";
|
||||
return Status::INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
DfGraphWrapperPtr wrap_ptr = graph_manager_.GetGraphByName(name);
|
||||
if (wrap_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "Get graph form DfGraphManager failed!";
|
||||
return Status::NOT_FOUND;
|
||||
}
|
||||
|
||||
if (wrap_ptr->graph_ptr_ == nullptr) {
|
||||
MS_LOG(WARNING) << "The graph is null";
|
||||
return Status::NOT_FOUND;
|
||||
}
|
||||
|
||||
// call ge::RunGraphAsync() to exec a graph;
|
||||
std::vector<GeTensor> ge_inputs;
|
||||
|
||||
(void)std::transform(inputs.begin(), inputs.end(), std::back_inserter(ge_inputs),
|
||||
[](const GeTensorPtr &i) { return *i; });
|
||||
|
||||
MS_LOG(INFO) << "Run the graph in GE with " << ge_inputs.size() << " inputs";
|
||||
|
||||
struct timeval start_time, end_time;
|
||||
(void)gettimeofday(&start_time, nullptr);
|
||||
|
||||
#ifdef ENABLE_D
|
||||
std::mutex mutex;
|
||||
std::condition_variable condition;
|
||||
bool is_finished = false;
|
||||
bool end_of_sequence = false;
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
auto call_back = [=, &is_finished, &end_of_sequence, &condition](ge::Status ge_status,
|
||||
std::vector<ge::Tensor> &ge_outputs) {
|
||||
if (ge_status == Status::SUCCESS) {
|
||||
if (me_types.size() != ge_outputs.size()) {
|
||||
MS_LOG(EXCEPTION) << "Convert output ge tensor to me tensor failed.";
|
||||
}
|
||||
for (size_t i = 0; i < ge_outputs.size(); ++i) {
|
||||
std::shared_ptr<ge::Tensor> ge_tensor_ptr = std::make_shared<ge::Tensor>(ge_outputs[i]);
|
||||
auto me_tensor = TransformUtil::ConvertGeTensor(ge_tensor_ptr, me_types[i]);
|
||||
if (me_tensor != nullptr) {
|
||||
outputs->emplace_back(me_tensor);
|
||||
}
|
||||
}
|
||||
is_finished = true;
|
||||
} else if (ge_status == ge::END_OF_SEQUENCE) {
|
||||
MS_LOG(WARNING) << "RunAsync out of range: End of sequence.";
|
||||
end_of_sequence = true;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "RunAsync failed.";
|
||||
}
|
||||
condition.notify_all();
|
||||
return;
|
||||
};
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
if (ms_context->backend_policy() == "ge") {
|
||||
if (sess_ == nullptr) {
|
||||
MS_LOG(ERROR) << "The GE session is null, can't run the graph!";
|
||||
return Status::FAILED;
|
||||
}
|
||||
ge::Status ret = sess_->RunGraphAsync(static_cast<uint32_t>(wrap_ptr->id_), ge_inputs, call_back);
|
||||
if (ret != ge::GRAPH_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Call GE RunGraphAsync Failed, ret is: " << ret;
|
||||
return Status::FAILED;
|
||||
}
|
||||
if (!is_finished) {
|
||||
condition.wait(lock);
|
||||
}
|
||||
if (end_of_sequence) {
|
||||
throw(std::runtime_error("End of sequence."));
|
||||
}
|
||||
if (!is_finished) {
|
||||
MS_LOG(ERROR) << "Call GE RunGraphAsync failed.";
|
||||
return Status::FAILED;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
(void)gettimeofday(&end_time, nullptr);
|
||||
const uint64_t kUSecondInSecond = 1000000;
|
||||
uint64_t cost = kUSecondInSecond * static_cast<uint64_t>(end_time.tv_sec - start_time.tv_sec);
|
||||
cost += static_cast<uint64_t>(end_time.tv_usec - start_time.tv_usec);
|
||||
MS_LOG(INFO) << "Call GE RunGraph Success in " << cost << " us, the GE outputs num is: " << outputs->size();
|
||||
|
||||
return Status::SUCCESS;
|
||||
}
|
||||
|
||||
Status GraphRunner::RunGraph(const RunOptions &options, const std::vector<MeTensorPtr> &inputs,
|
||||
std::vector<MeTensorPtr> *const outputs) {
|
||||
std::vector<GeTensorPtr> ge_inputs;
|
||||
|
|
|
@ -384,6 +384,18 @@ MeTensorPtr TransformUtil::ConvertGeTensor(const GeTensorPtr &ge_tensor) {
|
|||
return GenerateMeTensor(ge_tensor, me_dims, type_id);
|
||||
}
|
||||
|
||||
MeTensorPtr TransformUtil::ConvertGeTensor(const GeTensorPtr &ge_tensor, const TypeId &me_type) {
|
||||
MS_EXCEPTION_IF_NULL(ge_tensor);
|
||||
GeShape ge_shape = ge_tensor->GetTensorDesc().GetShape();
|
||||
vector<int64_t> me_dims = ConvertGeShape(ge_shape);
|
||||
|
||||
if (me_type == MeDataType::kTypeUnknown) {
|
||||
MS_LOG(ERROR) << "Unsupported data type: " << static_cast<int>(me_type);
|
||||
return nullptr;
|
||||
}
|
||||
return GenerateMeTensor(ge_tensor, me_dims, me_type);
|
||||
}
|
||||
|
||||
// if request_dims is empty, use ge tensor's shape,otherwise convert to request shape
|
||||
MeTensorPtr TransformUtil::ConvertGeTensor(const GeTensorPtr ge_tensor, const ShapeVector &request_dims) {
|
||||
MS_EXCEPTION_IF_NULL(ge_tensor);
|
||||
|
|
Loading…
Reference in New Issue