!22123 update pynative profiling codes in fp and bp
Merge pull request !22123 from lvchangquan/profiling_formal
This commit is contained in:
commit
01ade5857d
|
@ -13,12 +13,14 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/session/executor.h"
|
||||
#include "backend/session/executor_manager.h"
|
||||
#include <algorithm>
|
||||
#include <exception>
|
||||
#include <set>
|
||||
#include "runtime/device/kernel_runtime_manager.h"
|
||||
#include "pipeline/pynative/pynative_profiling.h"
|
||||
#include "utils/comm_manager.h"
|
||||
#include "utils/scoped_long_running.h"
|
||||
#include "pybind_api/ir/tensor_py.h"
|
||||
|
@ -150,13 +152,17 @@ void RunGraphTask::Run() {
|
|||
}
|
||||
|
||||
void RunOpTask::Run() {
|
||||
PynativeProfiler::SetForwardTimePoint("ForwardRunOpImpl", "Start");
|
||||
MS_EXCEPTION_IF_NULL(session_);
|
||||
session_->RunOpImpl(graph_info_, op_run_info_, input_tensors_, &outputs_, tensors_mask_);
|
||||
PynativeProfiler::SetForwardTimePoint("ForwardRunOpImpl", "End");
|
||||
}
|
||||
|
||||
void RunOpsInGraphTask::Run() {
|
||||
PynativeProfiler::SetBackwardTimePoint("BackwardRunOpsInGraphImpl", "Start");
|
||||
MS_EXCEPTION_IF_NULL(session_);
|
||||
session_->RunOpsInGraphImpl(graph_id_, input_tensors_, &outputs_);
|
||||
PynativeProfiler::SetBackwardTimePoint("BackwardRunOpsInGraphImpl", "End");
|
||||
}
|
||||
|
||||
void CreateCommGroupTask::Run() { result_ = CommManager::GetInstance().CreateGroupSync(group_name_, ranks_); }
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "backend/optimizer/common/common_backend_optimization.h"
|
||||
#include "backend/optimizer/common/helper.h"
|
||||
#include "runtime/device/kernel_runtime_manager.h"
|
||||
#include "pipeline/pynative/pynative_profiling.h"
|
||||
#include "utils/ms_utils.h"
|
||||
#include "ir/anf.h"
|
||||
#include "ir/func_graph_cloner.h"
|
||||
|
@ -2232,7 +2233,9 @@ void SessionBasic::RunOpsInGraphImpl(const GraphId &graph_id, const std::vector<
|
|||
CreateOutputPlaceholder(kernel_graph, inputs, graph_output_info.graph_outputs, &graph_output_info.output_indexes);
|
||||
std::map<KernelWithIndex, size_t> cnode_refcount;
|
||||
GetRefCount(kernel_graph.get(), &cnode_refcount);
|
||||
PynativeProfiler::SetBackwardTimePoint("BackwardBuildOpsInGraph", "Start");
|
||||
BuildOpsInGraph(graph_id, parameter_index, inputs, cnode_refcount);
|
||||
PynativeProfiler::SetBackwardTimePoint("BackwardBuildOpsInGraph", "End");
|
||||
|
||||
// Clear bucket resources every step
|
||||
if (kernel_graph->is_bprop()) {
|
||||
|
@ -2252,9 +2255,11 @@ void SessionBasic::RunOpsInGraphImpl(const GraphId &graph_id, const std::vector<
|
|||
|
||||
// Build and run current single op
|
||||
VectorRef op_outputs;
|
||||
PynativeProfiler::SetBackwardRunOpImplOpName(kernel->fullname_with_scope());
|
||||
PynativeProfiler::SetBackwardTimePoint("BackwardRunOpImpl", "Start");
|
||||
RunOpImpl(graph_info, &run_info, &input_tensor_info.input_tensors, &op_outputs,
|
||||
input_tensor_info.input_tensors_mask);
|
||||
|
||||
PynativeProfiler::SetBackwardTimePoint("BackwardRunOpImpl", "End");
|
||||
graph_output_info.graph_output_tensors.clear();
|
||||
// Handle inputs and outputs of current op
|
||||
HandleOpInputs(input_tensor_info.input_kernel, &cnode_refcount, &op_output_map);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
file(GLOB_RECURSE _PYNATIVE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "pynative_execute.cc")
|
||||
file(GLOB_RECURSE _PYNATIVE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "pynative_execute.cc" "pynative_profiling.cc")
|
||||
|
||||
if(ENABLE_GE)
|
||||
file(GLOB_RECURSE _GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "pynative_execute_ge.cc")
|
||||
|
|
|
@ -52,6 +52,7 @@
|
|||
#include "pipeline/jit/action.h"
|
||||
|
||||
#include "pipeline/pynative/base.h"
|
||||
#include "pipeline/pynative/pynative_profiling.h"
|
||||
#include "pybind_api/api_register.h"
|
||||
#include "pybind_api/pybind_patch.h"
|
||||
#include "vm/transform.h"
|
||||
|
@ -68,7 +69,6 @@
|
|||
|
||||
#include "debug/anf_ir_dump.h"
|
||||
#include "runtime/hardware/device_context_manager.h"
|
||||
#include "runtime/device/pynative_profiling.h"
|
||||
|
||||
using mindspore::tensor::TensorPy;
|
||||
|
||||
|
@ -271,12 +271,14 @@ bool GetSignatureType(const PrimitivePyPtr &prim, std::vector<SignatureEnumDType
|
|||
|
||||
void PynativeInfer(const PrimitivePyPtr &prim, OpExecInfo *const op_exec_info,
|
||||
const abstract::AbstractBasePtrList &args_spec_list) {
|
||||
PynativeProfiler::SetForwardTimePoint("ForwardPynativeInfer", "Start");
|
||||
MS_LOG(DEBUG) << "Prim " << prim->name() << " input infer " << mindspore::ToString(args_spec_list);
|
||||
prim->BeginRecordAddAttr();
|
||||
AbstractBasePtr infer_res = EvalOnePrim(prim, args_spec_list)->abstract();
|
||||
prim->EndRecordAddAttr();
|
||||
op_exec_info->abstract = infer_res;
|
||||
MS_LOG(DEBUG) << "Prim " << prim->name() << " infer result " << op_exec_info->abstract->ToString();
|
||||
PynativeProfiler::SetForwardTimePoint("ForwardPynativeInfer", "End");
|
||||
}
|
||||
|
||||
std::string GetSingleOpGraphInfo(const OpExecInfoPtr &op_exec_info, const std::vector<tensor::TensorPtr> &input_tensors,
|
||||
|
@ -705,9 +707,8 @@ py::object GetDstType(const TypeId &type_id) {
|
|||
} // namespace
|
||||
|
||||
py::object RealRunOp(const py::args &args) {
|
||||
auto real_run_op_start = GetTime();
|
||||
auto &profiler_inst = device::PynativeProfiler::GetInstance();
|
||||
profiler_inst.AddRealRunOpIndex();
|
||||
PynativeProfiler::SetEnableProfilingFlag();
|
||||
PynativeProfiler::SetForwardTimePoint("RealRunOp", "Start");
|
||||
CheckPyNativeContext();
|
||||
auto executor = PynativeExecutor::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(executor);
|
||||
|
@ -715,10 +716,9 @@ py::object RealRunOp(const py::args &args) {
|
|||
MS_EXCEPTION_IF_NULL(op_exec_info);
|
||||
py::object ret = py::none();
|
||||
PynativeExecutorTry(executor->forward_executor()->RunOpS, &ret, op_exec_info);
|
||||
auto real_run_op_end = GetTime();
|
||||
profiler_inst.SetRealRunOpName(op_exec_info->op_name);
|
||||
profiler_inst.SetRealRunOpTime(std::make_pair(real_run_op_start, real_run_op_end));
|
||||
profiler_inst.SingleOpProfilingData();
|
||||
PynativeProfiler::SetRealRunOpName(op_exec_info->op_name);
|
||||
PynativeProfiler::SetForwardTimePoint("RealRunOp", "End");
|
||||
PynativeProfiler::SingleOpForwardHostProfilingData();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -876,6 +876,7 @@ void ForwardExecutor::RunMixedPrecisionCastOp(const OpExecInfoPtr &op_exec_info,
|
|||
|
||||
void ForwardExecutor::GetInputsArgsSpec(const OpExecInfoPtr &op_exec_info,
|
||||
abstract::AbstractBasePtrList *args_spec_list) {
|
||||
PynativeProfiler::SetForwardTimePoint("GetInputsAbstract", "Start");
|
||||
MS_EXCEPTION_IF_NULL(args_spec_list);
|
||||
auto prim = op_exec_info->py_primitive;
|
||||
for (size_t i = 0; i < op_exec_info->op_inputs.size(); i++) {
|
||||
|
@ -907,9 +908,11 @@ void ForwardExecutor::GetInputsArgsSpec(const OpExecInfoPtr &op_exec_info,
|
|||
}
|
||||
args_spec_list->emplace_back(abs);
|
||||
}
|
||||
PynativeProfiler::SetForwardTimePoint("GetInputsAbstract", "End");
|
||||
}
|
||||
|
||||
AnfNodePtr ForwardExecutor::ConstructForwardGraph(const OpExecInfoPtr &op_exec_info) {
|
||||
PynativeProfiler::SetForwardTimePoint("ConstructForwardGraph", "Start");
|
||||
auto prim = op_exec_info->py_primitive;
|
||||
std::vector<AnfNodePtr> inputs;
|
||||
std::vector<int64_t> op_masks;
|
||||
|
@ -948,6 +951,7 @@ AnfNodePtr ForwardExecutor::ConstructForwardGraph(const OpExecInfoPtr &op_exec_i
|
|||
cnode = grad()->curr_g()->NewCNodeInOrder(inputs);
|
||||
MS_LOG(DEBUG) << "Make CNode for " << op_exec_info->op_name << ", new cnode is " << cnode->DebugString();
|
||||
}
|
||||
PynativeProfiler::SetForwardTimePoint("ConstructForwardGraph", "End");
|
||||
return cnode;
|
||||
}
|
||||
|
||||
|
@ -2577,6 +2581,7 @@ void GradExecutor::CheckNeedCompileGraph() {
|
|||
}
|
||||
|
||||
void GradExecutor::RunGradGraph(py::object *ret, const py::object &cell, const py::tuple &args) {
|
||||
PynativeProfiler::SetBackwardTimePoint("BackwardRunGradGraph", "Start");
|
||||
MS_EXCEPTION_IF_NULL(ret);
|
||||
auto cell_id = GetCellId(cell, args);
|
||||
MS_LOG(DEBUG) << "Run start cell id " << cell_id;
|
||||
|
@ -2621,6 +2626,9 @@ void GradExecutor::RunGradGraph(py::object *ret, const py::object &cell, const p
|
|||
} else if (GetHighOrderStackSize() >= 2) {
|
||||
SwitchTopcell();
|
||||
}
|
||||
PynativeProfiler::SetBackwardTimePoint("BackwardRunGradGraph", "End");
|
||||
PynativeProfiler::SingleOpBackwardHostProfilingData();
|
||||
PynativeProfiler::SingleOpDeviceProfilingData();
|
||||
}
|
||||
|
||||
void GradExecutor::SwitchTopcell() {
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "pipeline/pynative/pynative_profiling.h"
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "utils/profile.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
void PynativeProfiler::SetEnableProfilingFlag() {
|
||||
static bool flag = false;
|
||||
if (flag) {
|
||||
return;
|
||||
}
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
enable_profiler_flag_ = ms_context->get_param<bool>(MS_CTX_ENABLE_PROFILING);
|
||||
flag = true;
|
||||
}
|
||||
|
||||
void PynativeProfiler::SetForwardTimePoint(std::string stage_name, std::string flag) {
|
||||
if (!enable_profiler_flag_) {
|
||||
return;
|
||||
}
|
||||
forward_data_.push_back(std::make_pair(stage_name, std::make_pair(flag, GetTime())));
|
||||
}
|
||||
|
||||
void PynativeProfiler::SetRealRunOpName(const std::string &name) {
|
||||
if (!enable_profiler_flag_) {
|
||||
return;
|
||||
}
|
||||
real_run_op_name_ = name;
|
||||
}
|
||||
|
||||
void PynativeProfiler::SetBackwardTimePoint(std::string stage_name, std::string flag) {
|
||||
if (!enable_profiler_flag_) {
|
||||
return;
|
||||
}
|
||||
backward_data_.push_back(std::make_pair(stage_name, std::make_pair(flag, GetTime())));
|
||||
}
|
||||
|
||||
void PynativeProfiler::SetBackwardRunOpImplOpName(const std::string &name) {
|
||||
if (!enable_profiler_flag_) {
|
||||
return;
|
||||
}
|
||||
backward_run_op_impl_op_name_.push_back(name);
|
||||
}
|
||||
|
||||
void PynativeProfiler::SingleOpForwardHostProfilingData() {
|
||||
if (!enable_profiler_flag_ || forward_data_.empty()) {
|
||||
return;
|
||||
}
|
||||
static std::ofstream of_host("pynative_forward_host_profiling_data.csv");
|
||||
of_host.setf(std::ios::fixed, std::ios::floatfield);
|
||||
++real_run_op_index_;
|
||||
of_host << "RealRunOpIndex" << ',' << "RealRunOpName";
|
||||
for (const auto &i : forward_data_) {
|
||||
of_host << ',' << i.first + i.second.first + "Time(s)";
|
||||
}
|
||||
of_host << std::endl;
|
||||
of_host << real_run_op_index_ << ',' << real_run_op_name_;
|
||||
for (const auto &i : forward_data_) {
|
||||
of_host << ',' << i.second.second;
|
||||
}
|
||||
of_host << std::endl;
|
||||
forward_data_.clear();
|
||||
}
|
||||
|
||||
void PynativeProfiler::SingleOpBackwardHostProfilingData() {
|
||||
if (!enable_profiler_flag_ || backward_data_.empty()) {
|
||||
return;
|
||||
}
|
||||
static std::ofstream of_host("pynative_backward_host_profiling_data.csv");
|
||||
of_host.setf(std::ios::fixed, std::ios::floatfield);
|
||||
++backward_run_grad_graph_index_;
|
||||
of_host << "BackwardIndex";
|
||||
for (const auto &i : backward_data_) {
|
||||
if (i.first == "BackwardRunOpImpl" && i.second.first == "Start") {
|
||||
of_host << ',' << "BackwardRunOpImplOpName" << ',' << i.first + i.second.first + "Time(s)";
|
||||
continue;
|
||||
}
|
||||
of_host << ',' << i.first + i.second.first + "Time(s)";
|
||||
}
|
||||
of_host << std::endl;
|
||||
of_host << backward_run_grad_graph_index_;
|
||||
int backward_run_op_impl_op_name_index = 0;
|
||||
int backward_run_op_impl_op_name_size = backward_run_op_impl_op_name_.size();
|
||||
for (const auto &i : backward_data_) {
|
||||
if (i.first == "BackwardRunOpImpl" && i.second.first == "Start") {
|
||||
if (backward_run_op_impl_op_name_index >= backward_run_op_impl_op_name_size) {
|
||||
MS_LOG(EXCEPTION) << "backward_run_op_impl_op_name_index is bigger than backward_run_op_impl_op_name_size";
|
||||
}
|
||||
of_host << ',' << backward_run_op_impl_op_name_[backward_run_op_impl_op_name_index++] << ',' << i.second.second;
|
||||
continue;
|
||||
}
|
||||
of_host << ',' << i.second.second;
|
||||
}
|
||||
of_host << std::endl;
|
||||
backward_data_.clear();
|
||||
backward_run_op_impl_op_name_.clear();
|
||||
}
|
||||
|
||||
void PynativeProfiler::SingleOpDeviceProfilingData() {
|
||||
if (!enable_profiler_flag_ || op_name_launch_time_vec_.empty()) {
|
||||
return;
|
||||
}
|
||||
static std::ofstream of_device("pynative_device_profiling_data.csv");
|
||||
of_device.setf(std::ios::fixed, std::ios::floatfield);
|
||||
of_device << "DeviceIndex" << ',' << "op_name" << ',' << "LaunchStartTime(s)" << ',' << "LaunchEndTime(s)"
|
||||
<< std::endl;
|
||||
for (size_t i = 1; i <= op_name_launch_time_vec_.size(); ++i) {
|
||||
of_device << i << ',' << op_name_launch_time_vec_[i - 1].first << ','
|
||||
<< op_name_launch_time_vec_[i - 1].second.first << ',' << op_name_launch_time_vec_[i - 1].second.second
|
||||
<< std::endl;
|
||||
}
|
||||
op_name_launch_time_vec_.clear();
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,58 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace mindspore {
|
||||
class PynativeProfiler {
|
||||
public:
|
||||
static void SetForwardTimePoint(std::string stage_name, std::string flag);
|
||||
static void SetRealRunOpName(const std::string &name);
|
||||
static void SetBackwardTimePoint(std::string stage_name, std::string flag);
|
||||
static void SetBackwardRunOpImplOpName(const std::string &name);
|
||||
static void SetOpNameAndLaunchTime(const std::pair<std::string, std::pair<double, double>> &name_start_end) {
|
||||
op_name_launch_time_vec_.push_back(name_start_end);
|
||||
}
|
||||
|
||||
static void SetEnableProfilingFlag();
|
||||
static void SingleOpForwardHostProfilingData();
|
||||
static void SingleOpBackwardHostProfilingData();
|
||||
static void SingleOpDeviceProfilingData();
|
||||
|
||||
private:
|
||||
PynativeProfiler() = default;
|
||||
~PynativeProfiler() = default;
|
||||
PynativeProfiler(const PynativeProfiler &) = delete;
|
||||
PynativeProfiler &operator=(const PynativeProfiler &) = delete;
|
||||
inline static bool enable_profiler_flag_ = false;
|
||||
inline static int real_run_op_index_ = 0;
|
||||
inline static std::string real_run_op_name_;
|
||||
inline static std::vector<std::pair<std::string, std::pair<std::string, double>>> forward_data_;
|
||||
inline static int backward_run_grad_graph_index_ = 0;
|
||||
inline static std::vector<std::string> backward_run_op_impl_op_name_;
|
||||
inline static std::vector<std::pair<std::string, std::pair<std::string, double>>> backward_data_;
|
||||
inline static std::vector<std::pair<std::string, std::pair<double, double>>> op_name_launch_time_vec_;
|
||||
};
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_
|
|
@ -1,7 +1,7 @@
|
|||
file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/*.cc"
|
||||
"kernel_info.cc" "executor/dynamic_kernel.cc" "executor/executor_callback.cc" "kernel_runtime.cc"
|
||||
"memory_manager.cc" "kernel_runtime_manager.cc" "convert_tensor_utils.cc"
|
||||
"bucket.cc" "launch_kernel.cc" "launch_mul.cc" "pynative_profiling.cc"
|
||||
"bucket.cc" "launch_kernel.cc" "launch_mul.cc"
|
||||
)
|
||||
|
||||
if("${ENABLE_HIDDEN}" STREQUAL "OFF")
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
#include "utils/utils.h"
|
||||
#include "frontend/parallel/context.h"
|
||||
#include "debug/env_config_parser.h"
|
||||
#include "runtime/device/pynative_profiling.h"
|
||||
#include "pipeline/pynative/pynative_profiling.h"
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
||||
#include "ps/ps_cache/ps_cache_manager.h"
|
||||
#endif
|
||||
|
@ -66,7 +66,6 @@ std::vector<AnfNodePtr> GetGraphInputs(const session::KernelGraph *graph) {
|
|||
}
|
||||
} // namespace
|
||||
constexpr size_t kMinInputSize = 2;
|
||||
|
||||
KernelRuntime::~KernelRuntime() {}
|
||||
|
||||
bool KernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; }
|
||||
|
@ -987,10 +986,9 @@ bool KernelRuntime::LaunchKernelWithPynativeProfiling(kernel::KernelMod *kernel_
|
|||
end->SyncEvent();
|
||||
start->ElapsedTime(&cost_time, end.get());
|
||||
auto launch_end_time = GetTime();
|
||||
auto &profiler_inst = PynativeProfiler::GetInstance();
|
||||
double launch_start_time = launch_end_time - cost_time / kBasicTimeTransferUnit;
|
||||
auto op_launch_start_time_end_time = std::make_pair(launch_start_time, launch_end_time);
|
||||
profiler_inst.SetOpNameAndLaunchTime(std::make_pair(op_name, op_launch_start_time_end_time));
|
||||
PynativeProfiler::SetOpNameAndLaunchTime(std::make_pair(op_name, op_launch_start_time_end_time));
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Launch kernel failed, kernel name is : " << op_name;
|
||||
}
|
||||
|
|
|
@ -1,59 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/device/pynative_profiling.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "utils/profile.h"
|
||||
#include "utils/utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
PynativeProfiler::PynativeProfiler() {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
enable_profiler_flag = ms_context->get_param<bool>(MS_CTX_ENABLE_PROFILING);
|
||||
}
|
||||
|
||||
void PynativeProfiler::SingleOpProfilingData() {
|
||||
if (!enable_profiler_flag) {
|
||||
return;
|
||||
}
|
||||
static std::ofstream of("pynative_forward_profiling_data.csv");
|
||||
of.setf(std::ios::fixed, std::ios::floatfield);
|
||||
if (real_run_op_index_ == 1) {
|
||||
of << "RealRunOpIndex" << ',' << "RealRunOpName" << ',' << "OpName" << ',' << "RealRunOpStartTime(s)" << ','
|
||||
<< "OpDeviceStartTime(s)" << ',' << "OpDeviceEndTime(s)" << ',' << "RealRunOpEndTime(s)" << std::endl;
|
||||
}
|
||||
if (op_name_launch_start_time_end_time_vec_.empty()) {
|
||||
of << real_run_op_index_ << ',' << real_run_op_name_ << ',' << "nopnode" << ','
|
||||
<< real_run_op_start_time_end_time_.first << ',' << "nopnode" << ',' << "nopnode" << ','
|
||||
<< real_run_op_start_time_end_time_.second << std::endl;
|
||||
return;
|
||||
}
|
||||
for (const auto &i : op_name_launch_start_time_end_time_vec_) {
|
||||
of << real_run_op_index_ << ',' << real_run_op_name_ << ',' << i.first << ','
|
||||
<< real_run_op_start_time_end_time_.first << ',' << i.second.first << ',' << i.second.second << ','
|
||||
<< real_run_op_start_time_end_time_.second << std::endl;
|
||||
}
|
||||
op_name_launch_start_time_end_time_vec_.clear();
|
||||
}
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -1,57 +0,0 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
class PynativeProfiler {
|
||||
public:
|
||||
static PynativeProfiler &GetInstance() {
|
||||
static PynativeProfiler instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void AddRealRunOpIndex() { ++real_run_op_index_; }
|
||||
void SetRealRunOpName(const std::string &name) { real_run_op_name_ = name; }
|
||||
void SetRealRunOpTime(const std::pair<double, double> &start_end) { real_run_op_start_time_end_time_ = start_end; }
|
||||
void SetOpNameAndLaunchTime(const std::pair<std::string, std::pair<double, double>> &name_start_end) {
|
||||
op_name_launch_start_time_end_time_vec_.push_back(name_start_end);
|
||||
}
|
||||
void SingleOpProfilingData();
|
||||
|
||||
private:
|
||||
PynativeProfiler();
|
||||
~PynativeProfiler() = default;
|
||||
PynativeProfiler(const PynativeProfiler &) = delete;
|
||||
PynativeProfiler &operator=(const PynativeProfiler &) = delete;
|
||||
bool enable_profiler_flag = false;
|
||||
int real_run_op_index_ = 0;
|
||||
std::string real_run_op_name_;
|
||||
std::pair<double, double> real_run_op_start_time_end_time_;
|
||||
std::vector<std::pair<std::string, std::pair<double, double>>> op_name_launch_start_time_end_time_vec_;
|
||||
};
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_PYNATIVE_PROFILING_H_
|
|
@ -17,7 +17,7 @@
|
|||
#include "runtime/hardware/gpu/gpu_device_context.h"
|
||||
#include <dlfcn.h>
|
||||
#include <utility>
|
||||
#include "runtime/device/pynative_profiling.h"
|
||||
#include "pipeline/pynative/pynative_profiling.h"
|
||||
#include "runtime/device/gpu/kernel_info_setter.h"
|
||||
#include "runtime/device/gpu/gpu_kernel_build.h"
|
||||
#include "runtime/device/gpu/gpu_device_address.h"
|
||||
|
@ -437,9 +437,8 @@ bool GPUDeviceContext::LaunchKernelWithProfiling(const CNodePtr &kernel, const s
|
|||
profiler_inst->OpDataProducerEnd();
|
||||
|
||||
auto op_launch_start_end_time = profiler_inst->GetSingleOpLaunchTime();
|
||||
auto &pynative_profiler = PynativeProfiler::GetInstance();
|
||||
std::string op_name = kernel->fullname_with_scope();
|
||||
pynative_profiler.SetOpNameAndLaunchTime(std::make_pair(op_name, op_launch_start_end_time));
|
||||
PynativeProfiler::SetOpNameAndLaunchTime(std::make_pair(op_name, op_launch_start_end_time));
|
||||
|
||||
if (profiler_inst->GetSyncEnableFlag()) {
|
||||
CHECK_RET_WITH_RETURN_ERROR(SyncStream(), "Profiler SyncStream failed.");
|
||||
|
|
Loading…
Reference in New Issue