forked from mindspore-Ecosystem/mindspore
Fit Gpu LoopCount for profiler module v2
This commit is contained in:
parent
191b1a4fba
commit
4c78a47184
|
@ -21,4 +21,12 @@ if(ENABLE_CPU AND NOT (ENABLE_D OR ENABLE_GPU))
|
|||
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
|
||||
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
|
||||
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
|
||||
endif()
|
||||
|
||||
if(ENABLE_TESTCASES)
|
||||
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"device/profiling.cc")
|
||||
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
|
||||
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
|
||||
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
|
||||
endif()
|
|
@ -24,6 +24,8 @@
|
|||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace cpu {
|
||||
std::shared_ptr<CpuDataSaver> CpuDataSaver::cpu_data_saver_inst_ = std::make_shared<CpuDataSaver>();
|
||||
|
||||
void CpuDataSaver::WriteFile(const std::string out_path_dir) {
|
||||
if (op_detail_infos_.empty() || op_type_infos_.empty()) {
|
||||
MS_LOG(INFO) << "No cpu operation detail infos to write.";
|
||||
|
@ -38,6 +40,10 @@ void CpuDataSaver::WriteFile(const std::string out_path_dir) {
|
|||
WriteOpType(out_path_dir);
|
||||
WriteOpTimestamp(out_path_dir);
|
||||
}
|
||||
|
||||
OpTimestampInfo &CpuDataSaver::GetOpTimeStampInfo() { return op_timestamps_map_; }
|
||||
|
||||
std::shared_ptr<CpuDataSaver> &CpuDataSaver::GetInstance() { return cpu_data_saver_inst_; }
|
||||
} // namespace cpu
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -29,6 +29,8 @@ namespace profiler {
|
|||
namespace cpu {
|
||||
class CpuDataSaver : public DataSaver {
|
||||
public:
|
||||
static std::shared_ptr<CpuDataSaver> &GetInstance();
|
||||
|
||||
CpuDataSaver() = default;
|
||||
|
||||
~CpuDataSaver() = default;
|
||||
|
@ -37,7 +39,12 @@ class CpuDataSaver : public DataSaver {
|
|||
|
||||
CpuDataSaver &operator=(const CpuDataSaver &) = delete;
|
||||
|
||||
OpTimestampInfo &GetOpTimeStampInfo();
|
||||
|
||||
void WriteFile(const std::string out_path);
|
||||
|
||||
private:
|
||||
static std::shared_ptr<CpuDataSaver> cpu_data_saver_inst_;
|
||||
};
|
||||
} // namespace cpu
|
||||
} // namespace profiler
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "pybind_api/api_register.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
@ -62,13 +63,24 @@ void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t
|
|||
op_time_start_ = GetHostMonoTimeStamp();
|
||||
op_time_mono_start_ = GetHostMonoTimeStamp();
|
||||
SetRunTimeData(op_name, pid);
|
||||
|
||||
#if ENABLE_GPU
|
||||
if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
|
||||
// For heterogeneous scene, record op name to gpu_profiler_inst.
|
||||
auto gpu_profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
|
||||
// For cpu network, no gpu profiler, do not to raise exception.
|
||||
if (gpu_profiler_inst && gpu_profiler_inst->GetEnableFlag()) {
|
||||
gpu_profiler_inst->RecordOneStepStartEndInfo(op_name);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void CPUProfiler::OpDataProducerEnd() {
|
||||
float op_time_elapsed = 0;
|
||||
op_time_stop_ = GetHostMonoTimeStamp();
|
||||
op_time_elapsed = (op_time_stop_ - op_time_start_) / kNanosecondToMillisecond;
|
||||
MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed;
|
||||
MS_LOG(DEBUG) << "Host Time Elapsed(ms)," << op_name_ << "," << op_time_elapsed;
|
||||
Profiler::SetRunTimeData(op_name_, op_time_elapsed);
|
||||
Profiler::SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed);
|
||||
}
|
||||
|
@ -83,9 +95,10 @@ void CPUProfiler::SaveProfileData() {
|
|||
if (profile_data_path_.empty()) {
|
||||
MS_LOG(WARNING) << "Profile data path is empty, skip save profile data.";
|
||||
} else {
|
||||
CpuDataSaver dataSaver;
|
||||
dataSaver.ParseOpInfo(op_info_map_);
|
||||
dataSaver.WriteFile(profile_data_path_);
|
||||
auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(cpu_data_saver_inst);
|
||||
cpu_data_saver_inst->ParseOpInfo(op_info_map_);
|
||||
cpu_data_saver_inst->WriteFile(profile_data_path_);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,9 @@
|
|||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include "profiler/device/profiling.h"
|
||||
#if ENABLE_GPU
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
|
|
@ -105,6 +105,22 @@ void GpuDataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfo
|
|||
}
|
||||
}
|
||||
|
||||
void GpuDataSaver::CpuProfilingTimeSynchronizedToGpu(const BaseTime &start_time) {
|
||||
auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(cpu_data_saver_inst);
|
||||
auto &cpu_op_timestamps_map = cpu_data_saver_inst->GetOpTimeStampInfo();
|
||||
auto cpu_op_iter = cpu_op_timestamps_map.begin();
|
||||
while (cpu_op_iter != cpu_op_timestamps_map.end()) {
|
||||
for (auto &time_iter : cpu_op_iter->second) {
|
||||
time_iter.start_timestamp =
|
||||
time_iter.start_timestamp - start_time.host_start_monotonic_raw_time + start_time.gpu_start_time;
|
||||
// time unit from ms to us.
|
||||
time_iter.duration *= kTimeUnit;
|
||||
}
|
||||
cpu_op_iter++;
|
||||
}
|
||||
}
|
||||
|
||||
void GpuDataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) {
|
||||
if (out_path_dir.empty()) {
|
||||
MS_LOG(WARNING) << "Output directory. Ignore the writing data.";
|
||||
|
@ -122,6 +138,7 @@ void GpuDataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_tim
|
|||
WriteActivity(out_path_dir);
|
||||
WriteOpTimestamp(out_path_dir);
|
||||
WriteStartTime(out_path_dir, start_time);
|
||||
CpuProfilingTimeSynchronizedToGpu(start_time);
|
||||
if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
|
||||
WriteStepTraceAsyncLaunchKernel(out_path_dir);
|
||||
} else {
|
||||
|
@ -176,6 +193,11 @@ void GpuDataSaver::WriteStepTraceAsyncLaunchKernel(const std::string &saver_base
|
|||
return;
|
||||
}
|
||||
|
||||
// cpu profiler information.
|
||||
auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(cpu_data_saver_inst);
|
||||
auto &cpu_op_timestamps_map = cpu_data_saver_inst->GetOpTimeStampInfo();
|
||||
|
||||
// write step trace time info into file
|
||||
uint32_t step = 0;
|
||||
uint64_t duration;
|
||||
|
@ -188,17 +210,32 @@ void GpuDataSaver::WriteStepTraceAsyncLaunchKernel(const std::string &saver_base
|
|||
auto bp_end_op_timestamp = op_timestamps_map_.find(step_trace_op_name_.trace_bp_end);
|
||||
auto iter_end_op_timestamp = op_timestamps_map_.find(iter_end_op_name);
|
||||
|
||||
if (iter_end_op_name == "Default/InitDataSetQueue-op0") continue;
|
||||
// if iter_start/fp_start/iter_end op is executed on cpu, update it.
|
||||
if (iter_start_op_timestamp == op_timestamps_map_.end()) {
|
||||
iter_start_op_timestamp = cpu_op_timestamps_map.find(iter_start_op_name);
|
||||
}
|
||||
if (fp_op_timestamp == op_timestamps_map_.end()) {
|
||||
fp_op_timestamp = cpu_op_timestamps_map.find(fp_op_name);
|
||||
}
|
||||
if (iter_end_op_timestamp == op_timestamps_map_.end()) {
|
||||
iter_end_op_timestamp = cpu_op_timestamps_map.find(iter_end_op_name);
|
||||
}
|
||||
|
||||
if (iter_end_op_name == "Default/InitDataSetQueue-op0") {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (iter_start_op_timestamp == op_timestamps_map_.end() || fp_op_timestamp == op_timestamps_map_.end() ||
|
||||
iter_end_op_timestamp == op_timestamps_map_.end() || bp_end_op_timestamp == op_timestamps_map_.end()) {
|
||||
MS_LOG(ERROR) << "[profiling step trace] failed, do not find " << fp_op_name << " or " << iter_end_op_name << "or"
|
||||
<< step_trace_op_name_.trace_bp_end;
|
||||
MS_LOG(ERROR) << "[profiling step trace] failed, do not find \"" << fp_op_name << "\" or \"" << iter_end_op_name
|
||||
<< "\" or \"" << step_trace_op_name_.trace_bp_end << "\"";
|
||||
ofs.close();
|
||||
return;
|
||||
}
|
||||
if (iter_start_op_timestamp->second.size() <= step || fp_op_timestamp->second.size() <= step ||
|
||||
iter_end_op_timestamp->second.size() <= step || bp_end_op_timestamp->second.size() <= step) {
|
||||
MS_LOG(ERROR) << "[profiling step trace] the number of fp/bp/iter_end timestamp not enough";
|
||||
ofs.close();
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -215,13 +252,15 @@ void GpuDataSaver::WriteStepTraceAsyncLaunchKernel(const std::string &saver_base
|
|||
// convert the time unit from 1ns to 10ns (keep the same with ascend)
|
||||
auto iter_op_timestamp = op_timestamps_map_.find(op_name);
|
||||
if (iter_op_timestamp == op_timestamps_map_.end()) {
|
||||
MS_LOG(ERROR) << "[profiling step trace] failed, do not find " << fp_op_name << " or " << iter_end_op_name
|
||||
<< "or" << step_trace_op_name_.trace_bp_end;
|
||||
MS_LOG(ERROR) << "[profiling step trace] failed, do not find \"" << fp_op_name << "\" or " << iter_end_op_name
|
||||
<< "\" or \"" << step_trace_op_name_.trace_bp_end << "\"";
|
||||
ofs.close();
|
||||
return;
|
||||
}
|
||||
|
||||
if (iter_op_timestamp->second.size() <= step) {
|
||||
MS_LOG(ERROR) << "[profiling step trace] the number of communication op timestamp not enough";
|
||||
ofs.close();
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -278,6 +317,7 @@ void GpuDataSaver::WriteStepTrace(const std::string &saver_base_dir) {
|
|||
ofs << std::endl;
|
||||
} catch (const std::exception &e) {
|
||||
MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what();
|
||||
ofs.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -302,6 +342,7 @@ void GpuDataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseT
|
|||
ofs << "gpu_start_time(ns): " << start_time.gpu_start_time << std::endl;
|
||||
} catch (const std::exception &e) {
|
||||
MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what();
|
||||
ofs.close();
|
||||
}
|
||||
|
||||
ofs.close();
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <string>
|
||||
#include <memory>
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#include "profiler/device/cpu/cpu_data_saver.h"
|
||||
#include "profiler/device/data_saver.h"
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
@ -93,6 +94,8 @@ class GpuDataSaver : public DataSaver {
|
|||
|
||||
void WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time);
|
||||
|
||||
void CpuProfilingTimeSynchronizedToGpu(const BaseTime &start_time);
|
||||
|
||||
AllActivityInfos activity_infos_;
|
||||
ProfilingTraceInfo step_trace_op_name_from_graph_;
|
||||
ProfilingTraceInfo step_trace_op_name_;
|
||||
|
|
|
@ -211,7 +211,10 @@ std::string GetKernelFuncName(std::string kernel_name) {
|
|||
return kernel_name.substr(func_name_begin_iter);
|
||||
}
|
||||
|
||||
std::shared_ptr<GPUProfiler> &GPUProfiler::GetInstance() { return profiler_inst_; }
|
||||
std::shared_ptr<GPUProfiler> &GPUProfiler::GetInstance() {
|
||||
MS_EXCEPTION_IF_NULL(profiler_inst_);
|
||||
return profiler_inst_;
|
||||
}
|
||||
|
||||
void GPUProfiler::SyncEnable(const bool enable_flag) {
|
||||
MS_LOG(INFO) << "GPU Profiler synchronous enable flag:" << enable_flag;
|
||||
|
@ -434,7 +437,9 @@ void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) {
|
|||
}
|
||||
SetRunTimeData(op_name, stream);
|
||||
|
||||
if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) RecordOneStepStartEndInfo(op_name);
|
||||
if (MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
|
||||
RecordOneStepStartEndInfo(op_name);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUProfiler::OpDataProducerEnd() {
|
||||
|
@ -490,7 +495,7 @@ void GPUProfiler::SaveProfileData() {
|
|||
if (profile_data_path_.empty()) {
|
||||
MS_LOG(WARNING) << "Profile data path is empty, skip save profile data.";
|
||||
} else {
|
||||
GpuDataSaver dataSaver(step_trace_op_name, all_step_start_end_info_);
|
||||
GpuDataSaver dataSaver(step_trace_op_name_, all_step_start_end_info_);
|
||||
dataSaver.ParseOpInfo(op_info_map_);
|
||||
dataSaver.ParseEvent(events_);
|
||||
dataSaver.WriteFile(profile_data_path_, base_time_);
|
||||
|
@ -498,30 +503,6 @@ void GPUProfiler::SaveProfileData() {
|
|||
}
|
||||
}
|
||||
|
||||
void GPUProfiler::RecordOneStepStartEndInfo() {
|
||||
std::lock_guard<std::mutex> locker(record_mutex_);
|
||||
step_start_end_info_.iter_end_timestamp = GetCUPTITimeStamp();
|
||||
all_step_start_end_info_.push_back(step_start_end_info_);
|
||||
step_start_end_info_.iter_start_op_name = "";
|
||||
step_start_end_info_.fp_start_op_name = "";
|
||||
}
|
||||
|
||||
void GPUProfiler::RecordOneStepStartEndInfo(const std::string op_name) {
|
||||
std::lock_guard<std::mutex> locker(record_mutex_);
|
||||
if (step_start_end_info_.iter_start_op_name.empty()) {
|
||||
step_start_end_info_.iter_start_op_name = op_name;
|
||||
step_start_end_info_.fp_start_op_name = op_name;
|
||||
}
|
||||
|
||||
std::string fp_start_op_name = step_start_end_info_.fp_start_op_name;
|
||||
|
||||
auto op_type_begin_iter = fp_start_op_name.rfind('/') + 1;
|
||||
auto op_type_end_iter = fp_start_op_name.rfind('-');
|
||||
auto op_type = fp_start_op_name.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
|
||||
if (op_type == "InitDataSetQueue" || op_type == "GetNext") step_start_end_info_.fp_start_op_name = op_name;
|
||||
step_start_end_info_.iter_end_op_name = op_name;
|
||||
}
|
||||
|
||||
void GPUProfiler::ClearInst() {
|
||||
op_info_map_.clear();
|
||||
op_name_map_.clear();
|
||||
|
@ -701,7 +682,7 @@ void GPUProfiler::HandleActivityRecord(CUpti_Activity *record) {
|
|||
AddEvent(std::move(profilingData));
|
||||
}
|
||||
|
||||
void GPUProfiler::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; }
|
||||
void GPUProfiler::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name_ = trace_op_name; }
|
||||
|
||||
void GPUProfiler::RegisterProfilingOp(std::shared_ptr<ProfilingOp> node) {
|
||||
PROFILER_ERROR_IF_NULLPTR(node);
|
||||
|
|
|
@ -132,7 +132,6 @@ class GPUProfiler : public Profiler {
|
|||
void ProcessEvents();
|
||||
void RegisterProfilingOp(std::shared_ptr<ProfilingOp> node);
|
||||
void SetStepTraceOpName(ProfilingTraceInfo trace_op_name);
|
||||
void RecordOneStepStartEndInfo();
|
||||
std::string ProfileDataPath() const { return profile_data_path_; }
|
||||
|
||||
private:
|
||||
|
@ -143,7 +142,6 @@ class GPUProfiler : public Profiler {
|
|||
void AddEvent(Event &&event);
|
||||
void SetRunTimeData(const std::string &op_name, void *stream);
|
||||
void FixOpNameByCorrelationId(Event *event);
|
||||
void RecordOneStepStartEndInfo(std::string op_name);
|
||||
|
||||
static std::shared_ptr<GPUProfiler> profiler_inst_;
|
||||
bool enable_flag_ = false;
|
||||
|
@ -175,8 +173,7 @@ class GPUProfiler : public Profiler {
|
|||
uint64_t op_cupti_time_start_;
|
||||
std::string profile_data_path_;
|
||||
std::map<std::string, std::shared_ptr<ProfilingOp>> profiling_op_;
|
||||
ProfilingTraceInfo step_trace_op_name;
|
||||
std::mutex record_mutex_;
|
||||
ProfilingTraceInfo step_trace_op_name_;
|
||||
};
|
||||
} // namespace gpu
|
||||
} // namespace profiler
|
||||
|
|
|
@ -41,6 +41,7 @@ ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const sessio
|
|||
}
|
||||
|
||||
ProfilingTraceInfo empty_info;
|
||||
ProfilingTraceInfo last_graph_profiling_trace = profiling_trace;
|
||||
profiling_trace = empty_info;
|
||||
SetTraceIterEnd(cnode_exec_order);
|
||||
SetTraceFpStart(cnode_exec_order);
|
||||
|
@ -49,7 +50,13 @@ ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const sessio
|
|||
|
||||
OutputStepTraceOpNameStatus();
|
||||
is_first_step_map_[graph_ptr->graph_id()] = false;
|
||||
return profiling_trace;
|
||||
|
||||
// If current graph has only one node, the bp_end will be empty, so select the last graph node.
|
||||
if (profiling_trace.trace_bp_end != "") {
|
||||
return profiling_trace;
|
||||
} else {
|
||||
return last_graph_profiling_trace;
|
||||
}
|
||||
}
|
||||
|
||||
void ProfilingUtils::OutputStepTraceOpNameStatus() {
|
||||
|
|
|
@ -19,13 +19,17 @@
|
|||
#include <cxxabi.h>
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
#include "profiler/device/cpu/cpu_data_saver.h"
|
||||
#include "pybind_api/api_register.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/utils.h"
|
||||
#if ENABLE_GPU
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
std::shared_ptr<ProfilerManager> ProfilerManager::profiler_manager_inst_ = std::make_shared<ProfilerManager>();
|
||||
|
||||
uint64_t Profiler::GetHostMonoTimeStamp() const {
|
||||
struct timespec ts;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
@ -57,5 +61,51 @@ void Profiler::SetRunTimeData(const std::string &op_name, const uint64_t start,
|
|||
iter->second.start_duration.emplace_back(StartDuration({start, duration}));
|
||||
}
|
||||
}
|
||||
|
||||
void Profiler::RecordOneStepStartEndInfo() {
|
||||
std::lock_guard<std::mutex> locker(record_mutex_);
|
||||
all_step_start_end_info_.push_back(step_start_end_info_);
|
||||
step_start_end_info_.iter_start_op_name = "";
|
||||
step_start_end_info_.fp_start_op_name = "";
|
||||
}
|
||||
|
||||
void Profiler::RecordOneStepStartEndInfo(const std::string op_name) {
|
||||
std::lock_guard<std::mutex> locker(record_mutex_);
|
||||
if (step_start_end_info_.iter_start_op_name.empty()) {
|
||||
step_start_end_info_.iter_start_op_name = op_name;
|
||||
step_start_end_info_.fp_start_op_name = op_name;
|
||||
}
|
||||
|
||||
std::string fp_start_op_name = step_start_end_info_.fp_start_op_name;
|
||||
|
||||
auto op_type_begin_iter = fp_start_op_name.rfind('/') + 1;
|
||||
auto op_type_end_iter = fp_start_op_name.rfind('-');
|
||||
auto op_type = fp_start_op_name.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
|
||||
if (op_type == "InitDataSetQueue" || op_type == "GetNext") {
|
||||
step_start_end_info_.fp_start_op_name = op_name;
|
||||
}
|
||||
step_start_end_info_.iter_end_op_name = op_name;
|
||||
}
|
||||
|
||||
std::shared_ptr<ProfilerManager> &ProfilerManager::GetInstance() {
|
||||
MS_EXCEPTION_IF_NULL(profiler_manager_inst_);
|
||||
return profiler_manager_inst_;
|
||||
}
|
||||
|
||||
bool ProfilerManager::GetEnableRecorderActorFlag() {
|
||||
#if ENABLE_GPU
|
||||
return profiler::gpu::GPUProfiler::GetInstance()->GetEnableFlag();
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
void ProfilerManager::RecordOneStepStartEndInfo() {
|
||||
#if ENABLE_GPU
|
||||
auto gpu_profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
|
||||
if (gpu_profiler_inst->GetEnableFlag()) {
|
||||
gpu_profiler_inst->RecordOneStepStartEndInfo();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -37,8 +37,6 @@ struct OneStepStartEndInfo {
|
|||
std::string iter_start_op_name;
|
||||
std::string fp_start_op_name;
|
||||
std::string iter_end_op_name;
|
||||
uint64_t fp_start_timestamp = 0l;
|
||||
uint64_t iter_end_timestamp = 0l;
|
||||
};
|
||||
|
||||
struct OpInfo {
|
||||
|
@ -54,6 +52,20 @@ struct OpInfo {
|
|||
uint32_t pid;
|
||||
};
|
||||
|
||||
class ProfilerManager {
|
||||
public:
|
||||
static std::shared_ptr<ProfilerManager> &GetInstance();
|
||||
ProfilerManager() = default;
|
||||
~ProfilerManager() = default;
|
||||
ProfilerManager(const ProfilerManager &) = delete;
|
||||
ProfilerManager &operator=(const ProfilerManager &) = delete;
|
||||
bool GetEnableRecorderActorFlag();
|
||||
void RecordOneStepStartEndInfo();
|
||||
|
||||
private:
|
||||
static std::shared_ptr<ProfilerManager> profiler_manager_inst_;
|
||||
};
|
||||
|
||||
class Profiler {
|
||||
public:
|
||||
Profiler() = default;
|
||||
|
@ -63,8 +75,10 @@ class Profiler {
|
|||
virtual void Stop() = 0;
|
||||
virtual void StepProfilingEnable(const bool enable_flag) = 0;
|
||||
virtual void OpDataProducerEnd() = 0;
|
||||
void RecordOneStepStartEndInfo();
|
||||
bool GetEnableFlag() const { return enable_flag_; }
|
||||
std::string ProfileDataPath() const { return profile_data_path_; }
|
||||
void RecordOneStepStartEndInfo(std::string op_name);
|
||||
|
||||
protected:
|
||||
void SetRunTimeData(const std::string &op_name, const float time_elapsed);
|
||||
|
@ -77,6 +91,7 @@ class Profiler {
|
|||
std::unordered_map<std::string, OpInfo> op_info_map_;
|
||||
OneStepStartEndInfo step_start_end_info_;
|
||||
std::vector<OneStepStartEndInfo> all_step_start_end_info_;
|
||||
std::mutex record_mutex_;
|
||||
};
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -65,12 +65,8 @@ void RecorderActor::RecordInfo(const std::string op_name, const KernelLaunchInfo
|
|||
void RecorderActor::RecordOnStepEnd(OpContext<DeviceTensor> *op_context) {
|
||||
MS_EXCEPTION_IF_NULL(op_context);
|
||||
// todo clear
|
||||
#if ENABLE_GPU
|
||||
// Record fp_start and iter_end op name and timestamp at the step end. (GPU)
|
||||
auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(profiler_inst);
|
||||
if (profiler_inst->GetEnableFlag()) profiler_inst->RecordOneStepStartEndInfo();
|
||||
#endif
|
||||
// Record iter_start, fp_start and iter_end op name and timestamp at the step end. (GPU)
|
||||
profiler::ProfilerManager::GetInstance()->RecordOneStepStartEndInfo();
|
||||
}
|
||||
|
||||
} // namespace runtime
|
||||
|
|
|
@ -22,9 +22,7 @@
|
|||
#include "runtime/framework/actor/actor_common.h"
|
||||
#include "runtime/framework/device_tensor_store.h"
|
||||
#include "runtime/hardware/device_context.h"
|
||||
#if ENABLE_GPU
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#endif
|
||||
#include "profiler/device/profiling.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace runtime {
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#ifdef ENABLE_DEBUGGER
|
||||
#include "debug/debugger/debugger.h"
|
||||
#endif
|
||||
#include "profiler/device/profiling.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace runtime {
|
||||
|
@ -456,6 +457,13 @@ void GraphScheduler::Initialize() {
|
|||
<< ", the computed OMP thread number : " << OMP_thread_num
|
||||
<< ", the used OMP thread number : " << stoi(OMP_thread_num_used);
|
||||
|
||||
BuildAndScheduleGlobalActor();
|
||||
}
|
||||
|
||||
void GraphScheduler::BuildAndScheduleGlobalActor() {
|
||||
auto actorMgr = ActorMgr::GetActorMgrRef();
|
||||
MS_EXCEPTION_IF_NULL(actorMgr);
|
||||
|
||||
// Create and schedule memory manager actor.
|
||||
auto memory_manager_actor = std::make_shared<MemoryManagerActor>();
|
||||
MS_EXCEPTION_IF_NULL(memory_manager_actor);
|
||||
|
@ -465,9 +473,17 @@ void GraphScheduler::Initialize() {
|
|||
// Bind single thread to response to memory alloc and free quickly.
|
||||
(void)actorMgr->Spawn(base_actor, false);
|
||||
|
||||
// Create and schedule recorder actor.
|
||||
// Create and schedule recorder actor.
|
||||
bool recorder_actor_need = false;
|
||||
if (profiler::ProfilerManager::GetInstance()->GetEnableRecorderActorFlag()) {
|
||||
recorder_actor_need = true;
|
||||
}
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
if (mindspore::RecorderManager::Instance().RdrEnable()) {
|
||||
recorder_actor_need = true;
|
||||
}
|
||||
#endif
|
||||
if (recorder_actor_need) {
|
||||
auto recorder_actor = std::make_shared<RecorderActor>();
|
||||
MS_EXCEPTION_IF_NULL(recorder_actor);
|
||||
recorder_aid_ = &(recorder_actor->GetAID());
|
||||
|
@ -475,7 +491,7 @@ void GraphScheduler::Initialize() {
|
|||
base_recorder_actor->set_thread_pool(thread_pool_);
|
||||
(void)actorMgr->Spawn(base_recorder_actor, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Create and schedule debug actor.
|
||||
bool debugger_actor_need = DumpJsonParser::GetInstance().e2e_dump_enabled();
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
|
|
|
@ -170,6 +170,9 @@ class GraphScheduler {
|
|||
~GraphScheduler() = default;
|
||||
DISABLE_COPY_AND_ASSIGN(GraphScheduler);
|
||||
|
||||
// The Global actors contain memory manager actor, recorder actor and debug actor.
|
||||
void BuildAndScheduleGlobalActor();
|
||||
|
||||
// Transform the nodes of graph to actors.
|
||||
ActorSetPtr Build(const GraphCompilerInfo &graph_compiler_info);
|
||||
// Link actors to DAG through the edge connection of graph and graph execution strategy.
|
||||
|
|
|
@ -561,7 +561,7 @@ class BaseTimelineGenerator:
|
|||
os.chmod(display_file_path, stat.S_IREAD | stat.S_IWRITE)
|
||||
except (IOError, OSError) as err:
|
||||
logger.error('Error occurred when write timeline display file: %s', err)
|
||||
raise ProfilerIOException
|
||||
raise ProfilerIOException()
|
||||
|
||||
def write_timeline_summary(self):
|
||||
"""Write timeline summary to json."""
|
||||
|
@ -578,7 +578,7 @@ class BaseTimelineGenerator:
|
|||
os.chmod(timeline_summary_file_path, stat.S_IREAD | stat.S_IWRITE)
|
||||
except (IOError, OSError) as err:
|
||||
logger.error('Error occurred when write timeline summary file: %s', err)
|
||||
raise ProfilerIOException
|
||||
raise ProfilerIOException()
|
||||
|
||||
@staticmethod
|
||||
def _update_num_of_streams(timeline, stream_count_dict):
|
||||
|
@ -852,7 +852,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|||
gpu_start_time = int(lines[1].strip().split(':')[-1])
|
||||
except (IOError, OSError) as err:
|
||||
logger.error(f'Error occurred when read {start_time_file_path}: {err}')
|
||||
raise ProfilerIOException
|
||||
raise ProfilerIOException()
|
||||
|
||||
time_diff = gpu_start_time - host_monotonic_start_time
|
||||
for idx, time_item in enumerate(timeline_list):
|
||||
|
@ -874,7 +874,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|||
op_timeline_list.append(line_list)
|
||||
except (IOError, OSError) as err:
|
||||
logger.error('Error occurred when load operator timeline data intermediate file: %s', err)
|
||||
raise ProfilerIOException
|
||||
raise ProfilerIOException()
|
||||
|
||||
return op_timeline_list
|
||||
|
||||
|
@ -898,7 +898,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|||
activity_timeline_list.append(line_list)
|
||||
except (IOError, OSError) as err:
|
||||
logger.error('Error occurred when load activity timeline data intermediate file: %s', err)
|
||||
raise ProfilerIOException
|
||||
raise ProfilerIOException()
|
||||
|
||||
return activity_timeline_list
|
||||
|
||||
|
@ -967,7 +967,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|||
step_num += 1
|
||||
except (IOError, OSError) as err:
|
||||
logger.error(f'Error occurred when read {step_trace_profiling_path}: {err}')
|
||||
raise ProfilerIOException
|
||||
raise ProfilerIOException()
|
||||
|
||||
return step_time_list
|
||||
|
||||
|
@ -985,7 +985,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|||
return bool(len(first_string.split(',')) == 2)
|
||||
except (IOError, OSError) as err:
|
||||
logger.error(f'Error occurred when read {step_trace_profiling_path}: {err}')
|
||||
raise ProfilerIOException
|
||||
raise ProfilerIOException()
|
||||
|
||||
|
||||
class AscendTimelineGenerator(BaseTimelineGenerator):
|
||||
|
@ -1018,7 +1018,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|||
timeline_list.append(line_list)
|
||||
except (IOError, OSError) as err:
|
||||
logger.error('Error occurred when read timeline intermediate file: %s', err)
|
||||
raise ProfilerIOException
|
||||
raise ProfilerIOException()
|
||||
|
||||
return timeline_list
|
||||
|
||||
|
@ -1139,7 +1139,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|||
host_monotonic = int(lines[2].strip().split(':')[1])
|
||||
except (IOError, OSError) as err:
|
||||
logger.error('Error occurred when read host_start.log: %s', err)
|
||||
raise ProfilerIOException
|
||||
raise ProfilerIOException()
|
||||
try:
|
||||
with open(dev_start_file_path) as f_obj:
|
||||
lines = f_obj.readlines()
|
||||
|
@ -1147,7 +1147,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|||
dev_cntvct = int(lines[2].strip().split(':')[1])
|
||||
except (IOError, OSError) as err:
|
||||
logger.error('Error occurred when read dev_start.log: %s', err)
|
||||
raise ProfilerIOException
|
||||
raise ProfilerIOException()
|
||||
|
||||
factor_ns_to_ms = 1e-6
|
||||
factor_ten_ns_to_ns = 10
|
||||
|
|
Loading…
Reference in New Issue