!22123 update pynative profiling codes in fp and bp

Merge pull request !22123 from lvchangquan/profiling_formal
This commit is contained in:
i-robot 2021-08-26 11:10:33 +00:00 committed by Gitee
commit 01ade5857d
11 changed files with 226 additions and 134 deletions

View File

@ -13,12 +13,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/session/executor.h"
#include "backend/session/executor_manager.h"
#include <algorithm>
#include <exception>
#include <set>
#include "runtime/device/kernel_runtime_manager.h"
#include "pipeline/pynative/pynative_profiling.h"
#include "utils/comm_manager.h"
#include "utils/scoped_long_running.h"
#include "pybind_api/ir/tensor_py.h"
@ -150,13 +152,17 @@ void RunGraphTask::Run() {
}
void RunOpTask::Run() {
PynativeProfiler::SetForwardTimePoint("ForwardRunOpImpl", "Start");
MS_EXCEPTION_IF_NULL(session_);
session_->RunOpImpl(graph_info_, op_run_info_, input_tensors_, &outputs_, tensors_mask_);
PynativeProfiler::SetForwardTimePoint("ForwardRunOpImpl", "End");
}
void RunOpsInGraphTask::Run() {
PynativeProfiler::SetBackwardTimePoint("BackwardRunOpsInGraphImpl", "Start");
MS_EXCEPTION_IF_NULL(session_);
session_->RunOpsInGraphImpl(graph_id_, input_tensors_, &outputs_);
PynativeProfiler::SetBackwardTimePoint("BackwardRunOpsInGraphImpl", "End");
}
void CreateCommGroupTask::Run() { result_ = CommManager::GetInstance().CreateGroupSync(group_name_, ranks_); }

View File

@ -35,6 +35,7 @@
#include "backend/optimizer/common/common_backend_optimization.h"
#include "backend/optimizer/common/helper.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "pipeline/pynative/pynative_profiling.h"
#include "utils/ms_utils.h"
#include "ir/anf.h"
#include "ir/func_graph_cloner.h"
@ -2232,7 +2233,9 @@ void SessionBasic::RunOpsInGraphImpl(const GraphId &graph_id, const std::vector<
CreateOutputPlaceholder(kernel_graph, inputs, graph_output_info.graph_outputs, &graph_output_info.output_indexes);
std::map<KernelWithIndex, size_t> cnode_refcount;
GetRefCount(kernel_graph.get(), &cnode_refcount);
PynativeProfiler::SetBackwardTimePoint("BackwardBuildOpsInGraph", "Start");
BuildOpsInGraph(graph_id, parameter_index, inputs, cnode_refcount);
PynativeProfiler::SetBackwardTimePoint("BackwardBuildOpsInGraph", "End");
// Clear bucket resources every step
if (kernel_graph->is_bprop()) {
@ -2252,9 +2255,11 @@ void SessionBasic::RunOpsInGraphImpl(const GraphId &graph_id, const std::vector<
// Build and run current single op
VectorRef op_outputs;
PynativeProfiler::SetBackwardRunOpImplOpName(kernel->fullname_with_scope());
PynativeProfiler::SetBackwardTimePoint("BackwardRunOpImpl", "Start");
RunOpImpl(graph_info, &run_info, &input_tensor_info.input_tensors, &op_outputs,
input_tensor_info.input_tensors_mask);
PynativeProfiler::SetBackwardTimePoint("BackwardRunOpImpl", "End");
graph_output_info.graph_output_tensors.clear();
// Handle inputs and outputs of current op
HandleOpInputs(input_tensor_info.input_kernel, &cnode_refcount, &op_output_map);

View File

@ -1,4 +1,4 @@
file(GLOB_RECURSE _PYNATIVE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "pynative_execute.cc")
file(GLOB_RECURSE _PYNATIVE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "pynative_execute.cc" "pynative_profiling.cc")
if(ENABLE_GE)
file(GLOB_RECURSE _GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "pynative_execute_ge.cc")

View File

@ -52,6 +52,7 @@
#include "pipeline/jit/action.h"
#include "pipeline/pynative/base.h"
#include "pipeline/pynative/pynative_profiling.h"
#include "pybind_api/api_register.h"
#include "pybind_api/pybind_patch.h"
#include "vm/transform.h"
@ -68,7 +69,6 @@
#include "debug/anf_ir_dump.h"
#include "runtime/hardware/device_context_manager.h"
#include "runtime/device/pynative_profiling.h"
using mindspore::tensor::TensorPy;
@ -271,12 +271,14 @@ bool GetSignatureType(const PrimitivePyPtr &prim, std::vector<SignatureEnumDType
void PynativeInfer(const PrimitivePyPtr &prim, OpExecInfo *const op_exec_info,
const abstract::AbstractBasePtrList &args_spec_list) {
PynativeProfiler::SetForwardTimePoint("ForwardPynativeInfer", "Start");
MS_LOG(DEBUG) << "Prim " << prim->name() << " input infer " << mindspore::ToString(args_spec_list);
prim->BeginRecordAddAttr();
AbstractBasePtr infer_res = EvalOnePrim(prim, args_spec_list)->abstract();
prim->EndRecordAddAttr();
op_exec_info->abstract = infer_res;
MS_LOG(DEBUG) << "Prim " << prim->name() << " infer result " << op_exec_info->abstract->ToString();
PynativeProfiler::SetForwardTimePoint("ForwardPynativeInfer", "End");
}
std::string GetSingleOpGraphInfo(const OpExecInfoPtr &op_exec_info, const std::vector<tensor::TensorPtr> &input_tensors,
@ -705,9 +707,8 @@ py::object GetDstType(const TypeId &type_id) {
} // namespace
py::object RealRunOp(const py::args &args) {
auto real_run_op_start = GetTime();
auto &profiler_inst = device::PynativeProfiler::GetInstance();
profiler_inst.AddRealRunOpIndex();
PynativeProfiler::SetEnableProfilingFlag();
PynativeProfiler::SetForwardTimePoint("RealRunOp", "Start");
CheckPyNativeContext();
auto executor = PynativeExecutor::GetInstance();
MS_EXCEPTION_IF_NULL(executor);
@ -715,10 +716,9 @@ py::object RealRunOp(const py::args &args) {
MS_EXCEPTION_IF_NULL(op_exec_info);
py::object ret = py::none();
PynativeExecutorTry(executor->forward_executor()->RunOpS, &ret, op_exec_info);
auto real_run_op_end = GetTime();
profiler_inst.SetRealRunOpName(op_exec_info->op_name);
profiler_inst.SetRealRunOpTime(std::make_pair(real_run_op_start, real_run_op_end));
profiler_inst.SingleOpProfilingData();
PynativeProfiler::SetRealRunOpName(op_exec_info->op_name);
PynativeProfiler::SetForwardTimePoint("RealRunOp", "End");
PynativeProfiler::SingleOpForwardHostProfilingData();
return ret;
}
@ -876,6 +876,7 @@ void ForwardExecutor::RunMixedPrecisionCastOp(const OpExecInfoPtr &op_exec_info,
void ForwardExecutor::GetInputsArgsSpec(const OpExecInfoPtr &op_exec_info,
abstract::AbstractBasePtrList *args_spec_list) {
PynativeProfiler::SetForwardTimePoint("GetInputsAbstract", "Start");
MS_EXCEPTION_IF_NULL(args_spec_list);
auto prim = op_exec_info->py_primitive;
for (size_t i = 0; i < op_exec_info->op_inputs.size(); i++) {
@ -907,9 +908,11 @@ void ForwardExecutor::GetInputsArgsSpec(const OpExecInfoPtr &op_exec_info,
}
args_spec_list->emplace_back(abs);
}
PynativeProfiler::SetForwardTimePoint("GetInputsAbstract", "End");
}
AnfNodePtr ForwardExecutor::ConstructForwardGraph(const OpExecInfoPtr &op_exec_info) {
PynativeProfiler::SetForwardTimePoint("ConstructForwardGraph", "Start");
auto prim = op_exec_info->py_primitive;
std::vector<AnfNodePtr> inputs;
std::vector<int64_t> op_masks;
@ -948,6 +951,7 @@ AnfNodePtr ForwardExecutor::ConstructForwardGraph(const OpExecInfoPtr &op_exec_i
cnode = grad()->curr_g()->NewCNodeInOrder(inputs);
MS_LOG(DEBUG) << "Make CNode for " << op_exec_info->op_name << ", new cnode is " << cnode->DebugString();
}
PynativeProfiler::SetForwardTimePoint("ConstructForwardGraph", "End");
return cnode;
}
@ -2577,6 +2581,7 @@ void GradExecutor::CheckNeedCompileGraph() {
}
void GradExecutor::RunGradGraph(py::object *ret, const py::object &cell, const py::tuple &args) {
PynativeProfiler::SetBackwardTimePoint("BackwardRunGradGraph", "Start");
MS_EXCEPTION_IF_NULL(ret);
auto cell_id = GetCellId(cell, args);
MS_LOG(DEBUG) << "Run start cell id " << cell_id;
@ -2621,6 +2626,9 @@ void GradExecutor::RunGradGraph(py::object *ret, const py::object &cell, const p
} else if (GetHighOrderStackSize() >= 2) {
SwitchTopcell();
}
PynativeProfiler::SetBackwardTimePoint("BackwardRunGradGraph", "End");
PynativeProfiler::SingleOpBackwardHostProfilingData();
PynativeProfiler::SingleOpDeviceProfilingData();
}
void GradExecutor::SwitchTopcell() {

View File

@ -0,0 +1,134 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "pipeline/pynative/pynative_profiling.h"
#include <iostream>
#include <utility>
#include <memory>
#include <string>
#include "utils/profile.h"
#include "utils/ms_context.h"
namespace mindspore {
void PynativeProfiler::SetEnableProfilingFlag() {
static bool flag = false;
if (flag) {
return;
}
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
enable_profiler_flag_ = ms_context->get_param<bool>(MS_CTX_ENABLE_PROFILING);
flag = true;
}
void PynativeProfiler::SetForwardTimePoint(std::string stage_name, std::string flag) {
if (!enable_profiler_flag_) {
return;
}
forward_data_.push_back(std::make_pair(stage_name, std::make_pair(flag, GetTime())));
}
void PynativeProfiler::SetRealRunOpName(const std::string &name) {
if (!enable_profiler_flag_) {
return;
}
real_run_op_name_ = name;
}
void PynativeProfiler::SetBackwardTimePoint(std::string stage_name, std::string flag) {
if (!enable_profiler_flag_) {
return;
}
backward_data_.push_back(std::make_pair(stage_name, std::make_pair(flag, GetTime())));
}
void PynativeProfiler::SetBackwardRunOpImplOpName(const std::string &name) {
if (!enable_profiler_flag_) {
return;
}
backward_run_op_impl_op_name_.push_back(name);
}
void PynativeProfiler::SingleOpForwardHostProfilingData() {
if (!enable_profiler_flag_ || forward_data_.empty()) {
return;
}
static std::ofstream of_host("pynative_forward_host_profiling_data.csv");
of_host.setf(std::ios::fixed, std::ios::floatfield);
++real_run_op_index_;
of_host << "RealRunOpIndex" << ',' << "RealRunOpName";
for (const auto &i : forward_data_) {
of_host << ',' << i.first + i.second.first + "Time(s)";
}
of_host << std::endl;
of_host << real_run_op_index_ << ',' << real_run_op_name_;
for (const auto &i : forward_data_) {
of_host << ',' << i.second.second;
}
of_host << std::endl;
forward_data_.clear();
}
void PynativeProfiler::SingleOpBackwardHostProfilingData() {
if (!enable_profiler_flag_ || backward_data_.empty()) {
return;
}
static std::ofstream of_host("pynative_backward_host_profiling_data.csv");
of_host.setf(std::ios::fixed, std::ios::floatfield);
++backward_run_grad_graph_index_;
of_host << "BackwardIndex";
for (const auto &i : backward_data_) {
if (i.first == "BackwardRunOpImpl" && i.second.first == "Start") {
of_host << ',' << "BackwardRunOpImplOpName" << ',' << i.first + i.second.first + "Time(s)";
continue;
}
of_host << ',' << i.first + i.second.first + "Time(s)";
}
of_host << std::endl;
of_host << backward_run_grad_graph_index_;
int backward_run_op_impl_op_name_index = 0;
int backward_run_op_impl_op_name_size = backward_run_op_impl_op_name_.size();
for (const auto &i : backward_data_) {
if (i.first == "BackwardRunOpImpl" && i.second.first == "Start") {
if (backward_run_op_impl_op_name_index >= backward_run_op_impl_op_name_size) {
MS_LOG(EXCEPTION) << "backward_run_op_impl_op_name_index is bigger than backward_run_op_impl_op_name_size";
}
of_host << ',' << backward_run_op_impl_op_name_[backward_run_op_impl_op_name_index++] << ',' << i.second.second;
continue;
}
of_host << ',' << i.second.second;
}
of_host << std::endl;
backward_data_.clear();
backward_run_op_impl_op_name_.clear();
}
void PynativeProfiler::SingleOpDeviceProfilingData() {
if (!enable_profiler_flag_ || op_name_launch_time_vec_.empty()) {
return;
}
static std::ofstream of_device("pynative_device_profiling_data.csv");
of_device.setf(std::ios::fixed, std::ios::floatfield);
of_device << "DeviceIndex" << ',' << "op_name" << ',' << "LaunchStartTime(s)" << ',' << "LaunchEndTime(s)"
<< std::endl;
for (size_t i = 1; i <= op_name_launch_time_vec_.size(); ++i) {
of_device << i << ',' << op_name_launch_time_vec_[i - 1].first << ','
<< op_name_launch_time_vec_[i - 1].second.first << ',' << op_name_launch_time_vec_[i - 1].second.second
<< std::endl;
}
op_name_launch_time_vec_.clear();
}
} // namespace mindspore

View File

@ -0,0 +1,58 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_
#include <memory>
#include <string>
#include <vector>
#include <utility>
#include <unordered_map>
namespace mindspore {
class PynativeProfiler {
public:
static void SetForwardTimePoint(std::string stage_name, std::string flag);
static void SetRealRunOpName(const std::string &name);
static void SetBackwardTimePoint(std::string stage_name, std::string flag);
static void SetBackwardRunOpImplOpName(const std::string &name);
static void SetOpNameAndLaunchTime(const std::pair<std::string, std::pair<double, double>> &name_start_end) {
op_name_launch_time_vec_.push_back(name_start_end);
}
static void SetEnableProfilingFlag();
static void SingleOpForwardHostProfilingData();
static void SingleOpBackwardHostProfilingData();
static void SingleOpDeviceProfilingData();
private:
PynativeProfiler() = default;
~PynativeProfiler() = default;
PynativeProfiler(const PynativeProfiler &) = delete;
PynativeProfiler &operator=(const PynativeProfiler &) = delete;
inline static bool enable_profiler_flag_ = false;
inline static int real_run_op_index_ = 0;
inline static std::string real_run_op_name_;
inline static std::vector<std::pair<std::string, std::pair<std::string, double>>> forward_data_;
inline static int backward_run_grad_graph_index_ = 0;
inline static std::vector<std::string> backward_run_op_impl_op_name_;
inline static std::vector<std::pair<std::string, std::pair<std::string, double>>> backward_data_;
inline static std::vector<std::pair<std::string, std::pair<double, double>>> op_name_launch_time_vec_;
};
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_

View File

@ -1,7 +1,7 @@
file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/*.cc"
"kernel_info.cc" "executor/dynamic_kernel.cc" "executor/executor_callback.cc" "kernel_runtime.cc"
"memory_manager.cc" "kernel_runtime_manager.cc" "convert_tensor_utils.cc"
"bucket.cc" "launch_kernel.cc" "launch_mul.cc" "pynative_profiling.cc"
"bucket.cc" "launch_kernel.cc" "launch_mul.cc"
)
if("${ENABLE_HIDDEN}" STREQUAL "OFF")

View File

@ -32,7 +32,7 @@
#include "utils/utils.h"
#include "frontend/parallel/context.h"
#include "debug/env_config_parser.h"
#include "runtime/device/pynative_profiling.h"
#include "pipeline/pynative/pynative_profiling.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#include "ps/ps_cache/ps_cache_manager.h"
#endif
@ -66,7 +66,6 @@ std::vector<AnfNodePtr> GetGraphInputs(const session::KernelGraph *graph) {
}
} // namespace
constexpr size_t kMinInputSize = 2;
KernelRuntime::~KernelRuntime() {}
bool KernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; }
@ -987,10 +986,9 @@ bool KernelRuntime::LaunchKernelWithPynativeProfiling(kernel::KernelMod *kernel_
end->SyncEvent();
start->ElapsedTime(&cost_time, end.get());
auto launch_end_time = GetTime();
auto &profiler_inst = PynativeProfiler::GetInstance();
double launch_start_time = launch_end_time - cost_time / kBasicTimeTransferUnit;
auto op_launch_start_time_end_time = std::make_pair(launch_start_time, launch_end_time);
profiler_inst.SetOpNameAndLaunchTime(std::make_pair(op_name, op_launch_start_time_end_time));
PynativeProfiler::SetOpNameAndLaunchTime(std::make_pair(op_name, op_launch_start_time_end_time));
if (!ret) {
MS_LOG(EXCEPTION) << "Launch kernel failed, kernel name is : " << op_name;
}

View File

@ -1,59 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "runtime/device/pynative_profiling.h"
#include <iostream>
#include <fstream>
#include <utility>
#include <memory>
#include <string>
#include "utils/profile.h"
#include "utils/utils.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace device {
PynativeProfiler::PynativeProfiler() {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
enable_profiler_flag = ms_context->get_param<bool>(MS_CTX_ENABLE_PROFILING);
}
void PynativeProfiler::SingleOpProfilingData() {
if (!enable_profiler_flag) {
return;
}
static std::ofstream of("pynative_forward_profiling_data.csv");
of.setf(std::ios::fixed, std::ios::floatfield);
if (real_run_op_index_ == 1) {
of << "RealRunOpIndex" << ',' << "RealRunOpName" << ',' << "OpName" << ',' << "RealRunOpStartTime(s)" << ','
<< "OpDeviceStartTime(s)" << ',' << "OpDeviceEndTime(s)" << ',' << "RealRunOpEndTime(s)" << std::endl;
}
if (op_name_launch_start_time_end_time_vec_.empty()) {
of << real_run_op_index_ << ',' << real_run_op_name_ << ',' << "nopnode" << ','
<< real_run_op_start_time_end_time_.first << ',' << "nopnode" << ',' << "nopnode" << ','
<< real_run_op_start_time_end_time_.second << std::endl;
return;
}
for (const auto &i : op_name_launch_start_time_end_time_vec_) {
of << real_run_op_index_ << ',' << real_run_op_name_ << ',' << i.first << ','
<< real_run_op_start_time_end_time_.first << ',' << i.second.first << ',' << i.second.second << ','
<< real_run_op_start_time_end_time_.second << std::endl;
}
op_name_launch_start_time_end_time_vec_.clear();
}
} // namespace device
} // namespace mindspore

View File

@ -1,57 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_
#include <memory>
#include <string>
#include <vector>
#include <utility>
#include <unordered_map>
namespace mindspore {
namespace device {
class PynativeProfiler {
public:
static PynativeProfiler &GetInstance() {
static PynativeProfiler instance;
return instance;
}
void AddRealRunOpIndex() { ++real_run_op_index_; }
void SetRealRunOpName(const std::string &name) { real_run_op_name_ = name; }
void SetRealRunOpTime(const std::pair<double, double> &start_end) { real_run_op_start_time_end_time_ = start_end; }
void SetOpNameAndLaunchTime(const std::pair<std::string, std::pair<double, double>> &name_start_end) {
op_name_launch_start_time_end_time_vec_.push_back(name_start_end);
}
void SingleOpProfilingData();
private:
PynativeProfiler();
~PynativeProfiler() = default;
PynativeProfiler(const PynativeProfiler &) = delete;
PynativeProfiler &operator=(const PynativeProfiler &) = delete;
bool enable_profiler_flag = false;
int real_run_op_index_ = 0;
std::string real_run_op_name_;
std::pair<double, double> real_run_op_start_time_end_time_;
std::vector<std::pair<std::string, std::pair<double, double>>> op_name_launch_start_time_end_time_vec_;
};
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_

View File

@ -17,7 +17,7 @@
#include "runtime/hardware/gpu/gpu_device_context.h"
#include <dlfcn.h>
#include <utility>
#include "runtime/device/pynative_profiling.h"
#include "pipeline/pynative/pynative_profiling.h"
#include "runtime/device/gpu/kernel_info_setter.h"
#include "runtime/device/gpu/gpu_kernel_build.h"
#include "runtime/device/gpu/gpu_device_address.h"
@ -437,9 +437,8 @@ bool GPUDeviceContext::LaunchKernelWithProfiling(const CNodePtr &kernel, const s
profiler_inst->OpDataProducerEnd();
auto op_launch_start_end_time = profiler_inst->GetSingleOpLaunchTime();
auto &pynative_profiler = PynativeProfiler::GetInstance();
std::string op_name = kernel->fullname_with_scope();
pynative_profiler.SetOpNameAndLaunchTime(std::make_pair(op_name, op_launch_start_end_time));
PynativeProfiler::SetOpNameAndLaunchTime(std::make_pair(op_name, op_launch_start_end_time));
if (profiler_inst->GetSyncEnableFlag()) {
CHECK_RET_WITH_RETURN_ERROR(SyncStream(), "Profiler SyncStream failed.");