!12503 refactor the c++ code of gpu profiler

From: @gzhcv
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2021-02-26 11:45:24 +08:00 committed by Gitee
commit ec439fdb97
15 changed files with 508 additions and 555 deletions

View File

@ -1,6 +1,6 @@
if(ENABLE_GPU)
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"device/gpu/*.cc" "device/cpu/*.cc")
"device/gpu/*.cc" "device/cpu/*.cc" "device/profiling.cc" "device/data_saver.cc")
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
@ -8,7 +8,7 @@ endif()
if(ENABLE_D)
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"device/common/*.cc" "device/ascend/*.cc" "device/cpu/*.cc")
"device/common/*.cc" "device/ascend/*.cc" "device/cpu/*.cc" "device/profiling.cc" "device/data_saver.cc")
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
@ -16,7 +16,8 @@ if(ENABLE_D)
endif()
if(ENABLE_CPU AND NOT (ENABLE_D OR ENABLE_GPU))
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/cpu/*.cc")
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"device/cpu/*.cc" "device/profiling.cc" "device/data_saver.cc")
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS
SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})

View File

@ -24,65 +24,7 @@
namespace mindspore {
namespace profiler {
namespace cpu {
OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion)
: op_info_(op_info), proportion_(proportion) {
// op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
op_full_name_ = op_info->op_name;
auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
auto op_type_end_iter = op_full_name_.rfind('-');
op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
op_name_ = op_full_name_.substr(op_type_begin_iter);
op_avg_time_ = op_info->op_cost_time / op_info->op_count;
}
void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
const float factor_percent = 100;
op_detail_infos_.reserve(op_info_maps.size());
float total_time_sum = GetTotalOpTime(op_info_maps);
for (auto item : op_info_maps) {
op_timestamps_map_[item.first] = item.second.start_duration;
float proportion = item.second.op_cost_time / total_time_sum * factor_percent;
auto op_info = std::make_shared<OpInfo>(item.second);
OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
op_detail_infos_.emplace_back(op_detail_info);
AddOpDetailInfoForType(op_detail_info);
}
// update average time of op type
for (auto &op_type : op_type_infos_) {
// device_infos: <type_name, op_type_info>
op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
}
MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
}
void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
// Construct OpType object according to op detail info
OpType op_type = OpType{op_detail_info.op_type_,
op_detail_info.op_info_->op_count,
op_detail_info.op_info_->op_count,
op_detail_info.op_info_->op_cost_time,
0,
op_detail_info.proportion_};
// Set the OpType into op_type_infos_ map
std::string type_name = op_detail_info.op_type_;
auto iter = op_type_infos_.find(type_name);
if (iter == op_type_infos_.end()) {
op_type_infos_.emplace(type_name, op_type);
} else {
iter->second += op_type;
}
}
float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
float sum = 0;
sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
[](float i, auto iter) { return i + iter.second.op_cost_time; });
MS_LOG(DEBUG) << "The total op time is " << sum;
return sum;
}
void DataSaver::WriteFile(std::string out_path_dir) {
void CpuDataSaver::WriteFile(std::string out_path_dir) {
if (op_detail_infos_.empty() || op_type_infos_.empty()) {
MS_LOG(INFO) << "No cpu operation detail infos to write.";
return;
@ -91,84 +33,11 @@ void DataSaver::WriteFile(std::string out_path_dir) {
MS_EXCEPTION_IF_NULL(context_ptr);
auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
device_id_ = std::to_string(device_id);
op_side_ = "cpu";
WriteOpDetail(out_path_dir);
WriteOpType(out_path_dir);
WriteOpTimestamp(out_path_dir);
}
void DataSaver::WriteOpType(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/cpu_op_type_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
// check if the file is writable
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
// write op type info into file
ofs << OpType().GetHeader() << std::endl;
for (auto op_type_info : op_type_infos_) {
ofs << op_type_info.second << std::endl;
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
}
void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/cpu_op_detail_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
// write op detail info into file
ofs << OpDetailInfo().GetHeader() << std::endl;
for (auto op_detail : op_detail_infos_) {
ofs << op_detail << std::endl;
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
}
void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/cpu_op_execute_timestamp_" + device_id_ + ".txt";
std::ofstream ofs(file_path);
// check if the file is writable
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
// write op timestamp info into file
for (const auto &op_timestamp_info : op_timestamps_map_) {
ofs << op_timestamp_info.first << ";host_cpu_ops;";
for (auto start_end : op_timestamp_info.second) {
ofs << start_end.start_timestamp << "," << start_end.duration << " ";
}
ofs << std::endl;
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
}
void DataSaver::ChangeFileMode(const std::string &file_path) {
if (chmod(common::SafeCStr(file_path), S_IRUSR) == -1) {
MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail.";
return;
}
}
} // namespace cpu
} // namespace profiler
} // namespace mindspore

View File

@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_CPU_DATA_SAVER_H
#define MINDSPORE_CPU_DATA_SAVER_H
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H
#define MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H
#include <iostream>
#include <algorithm>
#include <unordered_map>
@ -23,101 +23,24 @@
#include <string>
#include <memory>
#include "profiler/device/cpu/cpu_profiling.h"
#include "profiler/device/data_saver.h"
namespace mindspore {
namespace profiler {
namespace cpu {
struct OpDetailInfo {
std::string op_type_;
std::string op_name_;
std::string op_full_name_;
std::shared_ptr<OpInfo> op_info_{nullptr};
float op_avg_time_{0};
float proportion_{0};
OpDetailInfo() = default;
OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion);
std::string GetHeader() const {
return "op_side,op_type,op_name,full_op_name,op_occurrences,compute_time(ms),"
"avg_execution_time(ms),total_proportion,subgraph,pid";
}
friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) {
os << "Host," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ','
<< event.op_info_->op_count << ',' << event.op_info_->op_cost_time << ',' << event.op_avg_time_ << ','
<< event.proportion_ << ",Default," << event.op_info_->pid;
return os;
}
};
struct OpType {
std::string op_type_;
int count_{0};
int step_{0};
float total_time_{0};
float avg_time_{0};
float proportion_{0};
std::string GetHeader() const {
return "op_type,total_called_times,called_times(per-step),"
"total_compute_time,compute_time(ms per-step),percent";
}
friend std::ostream &operator<<(std::ostream &os, const OpType &event) {
os << event.op_type_ << ',' << event.count_ << ',' << event.count_ / event.step_ << ',' << event.total_time_ << ','
<< event.total_time_ / event.step_ << ',' << event.proportion_;
return os;
}
OpType &operator+=(const OpType &other) {
this->count_ += other.count_;
this->total_time_ += other.total_time_;
this->proportion_ += other.proportion_;
return *this;
}
};
using OpInfoMap = std::unordered_map<std::string, OpInfo>;
using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype>
using OpDetailInfos = std::vector<OpDetailInfo>;
// <op_full_name, StartDuration>
using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>;
class DataSaver {
class CpuDataSaver : public DataSaver {
public:
DataSaver() = default;
CpuDataSaver() = default;
~DataSaver() = default;
~CpuDataSaver() = default;
DataSaver(const DataSaver &) = delete;
CpuDataSaver(const CpuDataSaver &) = delete;
DataSaver &operator=(const DataSaver &) = delete;
void ParseOpInfo(const OpInfoMap &op_info_maps);
CpuDataSaver &operator=(const CpuDataSaver &) = delete;
void WriteFile(std::string out_path);
private:
void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info);
float GetTotalOpTime(const OpInfoMap &op_info_maps);
void WriteOpType(const std::string &saver_base_dir);
void WriteOpDetail(const std::string &saver_base_dir);
void WriteOpTimestamp(const std::string &saver_base_dir);
void ChangeFileMode(const std::string &file_path);
std::string device_id_;
OpTypeInfos op_type_infos_;
OpDetailInfos op_detail_infos_;
OpTimestampInfo op_timestamps_map_;
};
} // namespace cpu
} // namespace profiler
} // namespace mindspore
#endif // MINDSPORE_CPU_DATA_SAVER_H
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_CPU_DATA_SAVER_H

View File

@ -29,18 +29,6 @@ namespace profiler {
namespace cpu {
std::shared_ptr<CPUProfiler> CPUProfiler::profiler_inst_ = nullptr;
uint64_t GetMonoTimeStamp() {
struct timespec ts;
#if defined(_WIN32) || defined(_WIN64)
clock_gettime(CLOCK_MONOTONIC, &ts);
#else
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
#endif
constexpr uint64_t kNSecondInSecond = 1000000000;
uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec;
return cur_time_stamp;
}
std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() {
if (profiler_inst_ == nullptr) {
profiler_inst_ = std::shared_ptr<CPUProfiler>(new (std::nothrow) CPUProfiler());
@ -50,7 +38,7 @@ std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() {
void CPUProfiler::Init(const std::string &profileDataPath = "") {
MS_LOG(INFO) << "Initialize CPU Profiling";
base_time_ = GetMonoTimeStamp();
base_time_ = GetHostMonoTimeStamp();
profile_data_path_ = profileDataPath;
MS_LOG(INFO) << " Host start time(ns): " << base_time_ << " profile data path: " << profile_data_path_;
}
@ -75,34 +63,19 @@ void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint32_t pid)
pid_ = pid;
}
void CPUProfiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) {
auto iter = op_info_map_.find(op_name);
if (iter != op_info_map_.end()) {
// The time unit is ms, convert to us
iter->second.op_cost_time += time_elapsed;
}
}
void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) {
auto iter = op_info_map_.find(op_name);
if (iter != op_info_map_.end()) {
iter->second.start_duration.emplace_back(StartDuration({start, duration}));
}
}
void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t pid) {
op_time_start_ = GetMonoTimeStamp();
op_time_mono_start_ = GetMonoTimeStamp();
op_time_start_ = GetHostMonoTimeStamp();
op_time_mono_start_ = GetHostMonoTimeStamp();
SetRunTimeData(op_name, pid);
}
void CPUProfiler::OpDataProducerEnd() {
float op_time_elapsed = 0;
op_time_stop_ = GetMonoTimeStamp();
op_time_stop_ = GetHostMonoTimeStamp();
op_time_elapsed = (op_time_stop_ - op_time_start_) / kTimeUnit;
MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed;
SetRunTimeData(op_name_, op_time_elapsed);
SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed);
Profiler::SetRunTimeData(op_name_, op_time_elapsed);
Profiler::SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed);
}
void CPUProfiler::Stop() {
@ -115,7 +88,7 @@ void CPUProfiler::SaveProfileData() {
if (profile_data_path_.empty()) {
MS_LOG(WARNING) << "Profile data path is empty, skip save profile data.";
} else {
DataSaver dataSaver;
CpuDataSaver dataSaver;
dataSaver.ParseOpInfo(op_info_map_);
dataSaver.WriteFile(profile_data_path_);
}

View File

@ -14,73 +14,52 @@
* limitations under the License.
*/
#ifndef MINDSPORE_CPU_PROFILING_H
#define MINDSPORE_CPU_PROFILING_H
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H
#define MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H
#include <algorithm>
#include <cstdio>
#include <map>
#include <memory>
#include <mutex>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "profiler/device/profiling.h"
namespace mindspore {
namespace profiler {
namespace cpu {
struct StartDuration {
uint64_t start_timestamp = 0l;
float duration = 0l;
};
struct OpInfo {
std::string op_name;
float op_cost_time = 0;
int op_count = 0;
std::vector<StartDuration> start_duration;
uint32_t pid;
};
const float kTimeUnit = 1000;
class CPUProfiler {
class CPUProfiler : public Profiler {
public:
static std::shared_ptr<CPUProfiler> GetInstance();
~CPUProfiler() = default;
CPUProfiler(const CPUProfiler &) = delete;
CPUProfiler &operator=(const CPUProfiler &) = delete;
void Init(const std::string &profileDataPath);
void Stop();
void StepProfilingEnable(const bool enable_flag);
bool GetEnableFlag() const { return enable_flag_; }
void Init(const std::string &profileDataPath) override;
void Stop() override;
void StepProfilingEnable(const bool enable_flag) override;
void OpDataProducerBegin(const std::string op_name, const uint32_t pid);
void OpDataProducerEnd();
std::string ProfileDataPath() const { return profile_data_path_; }
void OpDataProducerEnd() override;
private:
CPUProfiler() = default;
void ClearInst();
void SetRunTimeData(const std::string &op_name, const uint32_t pid);
void SetRunTimeData(const std::string &op_name, const float time_elapsed);
void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration);
void SaveProfileData() override;
void ClearInst() override;
static std::shared_ptr<CPUProfiler> profiler_inst_;
bool enable_flag_ = false;
std::unordered_map<std::string, OpInfo> op_info_map_;
uint64_t base_time_;
std::string op_name_;
uint32_t pid_;
void SaveProfileData();
uint64_t op_time_start_;
uint64_t op_time_mono_start_;
uint64_t op_time_stop_;
std::string profile_data_path_;
};
} // namespace cpu
} // namespace profiler
} // namespace mindspore
#endif // MINDSPORE_CPU_PROFILING_H
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_CPU_PROFILING_H

View File

@ -0,0 +1,177 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "profiler/device/data_saver.h"
#include <fstream>
#include <numeric>
#include "sys/stat.h"
#include "utils/log_adapter.h"
#include "utils/ms_utils.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace profiler {
OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion)
: op_info_(op_info), proportion_(proportion) {
// op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
op_full_name_ = op_info->op_name;
auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
auto op_type_end_iter = op_full_name_.rfind('-');
op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
op_name_ = op_full_name_.substr(op_type_begin_iter);
op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
}
void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
op_detail_infos_.reserve(op_info_maps.size());
float total_time_sum = GetTotalOpTime(op_info_maps);
for (auto item : op_info_maps) {
op_timestamps_map_[item.first] = item.second.start_duration;
float proportion = item.second.op_host_cost_time / total_time_sum;
auto op_info = std::make_shared<OpInfo>(item.second);
OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
op_detail_infos_.emplace_back(op_detail_info);
AddOpDetailInfoForType(op_detail_info);
}
// update average time of op type
for (auto &op_type : op_type_infos_) {
// device_infos: <type_name, op_type_info>
op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
}
MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
}
void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
// Construct OpType object according to op detail info
OpType op_type = OpType{op_detail_info.op_type_,
op_detail_info.op_info_->op_count,
op_detail_info.op_info_->op_count,
op_detail_info.op_info_->op_host_cost_time,
0,
op_detail_info.proportion_};
// Set the OpType into op_type_infos_ map
std::string type_name = op_detail_info.op_type_;
auto iter = op_type_infos_.find(type_name);
if (iter == op_type_infos_.end()) {
op_type_infos_.emplace(type_name, op_type);
} else {
iter->second += op_type;
}
}
float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
float sum = 0;
sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
[](float i, auto iter) { return i + iter.second.op_host_cost_time; });
MS_LOG(DEBUG) << "The total op time is " << sum;
return sum;
}
void DataSaver::WriteOpType(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_type_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
// check if the file is writable
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
// write op type info into file
if (op_side_ == "cpu") {
ofs << OpType().GetCpuHeader() << std::endl;
for (auto op_type_info : op_type_infos_) {
op_type_info.second.OutputCpuOpTypeInfo(ofs);
}
}
if (op_side_ == "gpu") {
ofs << OpType().GetGpuHeader() << std::endl;
for (auto op_type_info : op_type_infos_) {
op_type_info.second.OutputGpuOpTypeInfo(ofs);
}
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
}
void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_detail_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
// write op detail info into file
if (op_side_ == "cpu") {
ofs << OpDetailInfo().GetCpuHeader() << std::endl;
for (auto op_detail : op_detail_infos_) {
op_detail.OutputCpuOpDetailInfo(ofs);
}
}
if (op_side_ == "gpu") {
ofs << OpDetailInfo().GetGpuHeader() << std::endl;
for (auto op_detail : op_detail_infos_) {
op_detail.OutputGpuOpDetailInfo(ofs);
}
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
}
void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_execute_timestamp_" + device_id_ + ".txt";
std::ofstream ofs(file_path);
// check if the file is writable
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
try {
// write op timestamp info into file
for (const auto &op_timestamp_info : op_timestamps_map_) {
if (op_side_ == "cpu") {
ofs << op_timestamp_info.first << ";HostCpuOps;";
} else {
ofs << op_timestamp_info.first << ";GpuOps;";
}
for (auto start_end : op_timestamp_info.second) {
ofs << start_end.start_timestamp << "," << start_end.duration << " ";
}
ofs << std::endl;
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what();
}
ofs.close();
ChangeFileMode(file_path);
}
void DataSaver::ChangeFileMode(const std::string &file_path) {
if (chmod(common::SafeCStr(file_path), S_IRUSR | S_IWUSR) == -1) {
MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail.";
return;
}
}
} // namespace profiler
} // namespace mindspore

View File

@ -0,0 +1,127 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_DATA_SAVER_H
#define MINDSPORE_CCSRC_PROFILER_DEVICE_DATA_SAVER_H
#include <iostream>
#include <algorithm>
#include <unordered_map>
#include <vector>
#include <string>
#include <memory>
#include "profiler/device/profiling.h"
namespace mindspore {
namespace profiler {
struct OpDetailInfo {
std::string op_type_;
std::string op_name_;
std::string op_full_name_;
std::shared_ptr<OpInfo> op_info_{nullptr};
float op_avg_time_{0};
float proportion_{0};
OpDetailInfo() = default;
OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion);
std::string GetCpuHeader() const {
return "op_side,op_type,op_name,full_op_name,op_occurrences,op_total_time(ms),"
"op_avg_time(ms),total_proportion,subgraph,pid";
}
std::string GetGpuHeader() const {
return "op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion,"
"cuda_activity_cost_time(us),cuda_activity_call_count";
}
void OutputCpuOpDetailInfo(std::ostream &os) {
os << "Host," << op_type_ << ',' << op_name_ << ',' << op_full_name_ << ',' << op_info_->op_count << ','
<< op_info_->op_host_cost_time << ',' << op_avg_time_ << ',' << proportion_ << ",Default," << op_info_->pid
<< std::endl;
}
void OutputGpuOpDetailInfo(std::ostream &os) {
os << "Device," << op_type_ << ',' << op_name_ << ',' << op_full_name_ << ',' << op_info_->op_count << ','
<< op_info_->op_host_cost_time << ',' << op_avg_time_ << ',' << proportion_ << ','
<< op_info_->cupti_activity_time << ',' << op_info_->op_kernel_count << std::endl;
}
};
struct OpType {
std::string op_type_;
int count_{0};
int step_{0};
float total_time_{0};
float avg_time_{0};
float proportion_{0};
std::string GetCpuHeader() const {
return "op_type,type_occurrences,execution_frequency(per-step),"
"total_compute_time,avg_time(ms),percent";
}
std::string GetGpuHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; }
void OutputCpuOpTypeInfo(std::ostream &os) {
os << op_type_ << ',' << count_ << ',' << count_ / step_ << ',' << total_time_ << ',' << total_time_ / count_ << ','
<< proportion_ << std::endl;
}
void OutputGpuOpTypeInfo(std::ostream &os) {
os << op_type_ << ',' << count_ << ',' << total_time_ << ',' << proportion_ << ',' << avg_time_ << std::endl;
}
OpType &operator+=(const OpType &other) {
this->count_ += other.count_;
this->total_time_ += other.total_time_;
this->proportion_ += other.proportion_;
return *this;
}
};
using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>; // <op_full_name, StartDuration>
using OpInfoMap = std::unordered_map<std::string, OpInfo>;
using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype>
using OpDetailInfos = std::vector<OpDetailInfo>;
class DataSaver {
public:
DataSaver() = default;
virtual ~DataSaver() = default;
void ParseOpInfo(const OpInfoMap &op_info_maps);
protected:
void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info);
float GetTotalOpTime(const OpInfoMap &op_info_maps);
void WriteOpType(const std::string &saver_base_dir);
void WriteOpDetail(const std::string &saver_base_dir);
void WriteOpTimestamp(const std::string &saver_base_dir);
void ChangeFileMode(const std::string &file_path);
OpTypeInfos op_type_infos_;
OpDetailInfos op_detail_infos_;
OpTimestampInfo op_timestamps_map_;
std::string op_side_;
std::string device_id_;
};
} // namespace profiler
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_DATA_SAVER_H

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "profiler/device/gpu/data_saver.h"
#include "profiler/device/gpu/gpu_data_saver.h"
#include <fstream>
#include <numeric>
#include "sys/stat.h"
@ -23,17 +23,6 @@
namespace mindspore {
namespace profiler {
namespace gpu {
OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion)
: op_info_(op_info), proportion_(proportion) {
// op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
op_full_name_ = op_info->op_name;
auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
auto op_type_end_iter = op_full_name_.rfind('-');
op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
op_name_ = op_full_name_.substr(op_type_begin_iter);
op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
}
ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) {
grid_dim_ = basic_info_->activity_type == ActivityType::kKernel
? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' +
@ -65,49 +54,7 @@ ActivityData &ActivityData::operator+=(const ActivityData &other) {
return *this;
}
void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
op_detail_infos_.reserve(op_info_maps.size());
float total_time_sum = GetTotalOpTime(op_info_maps);
for (auto item : op_info_maps) {
op_timestamps_map_[item.first] = item.second.start_duration;
float proportion = item.second.op_host_cost_time / total_time_sum;
auto op_info = std::make_shared<OpInfo>(item.second);
OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
op_detail_infos_.emplace_back(op_detail_info);
AddOpDetailInfoForType(op_detail_info);
}
// update average time of op type
for (auto &op_type : op_type_infos_) {
// device_infos: <type_name, op_type_info>
op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
}
MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
}
void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
// Construct OpType object according to op detail info
OpType op_type = OpType{op_detail_info.op_type_, op_detail_info.op_info_->op_count,
op_detail_info.op_info_->op_host_cost_time, 0, op_detail_info.proportion_};
// Set the OpType into op_type_infos_ map
std::string type_name = op_detail_info.op_type_;
auto iter = op_type_infos_.find(type_name);
if (iter == op_type_infos_.end()) {
op_type_infos_.emplace(type_name, op_type);
} else {
iter->second += op_type;
}
}
float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
float sum = 0;
sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
[](float i, auto iter) { return i + iter.second.op_host_cost_time; });
MS_LOG(DEBUG) << "The total op time is " << sum;
return sum;
}
void DataSaver::ParseEvent(const std::vector<Event> &events) {
void GpuDataSaver::ParseEvent(const std::vector<Event> &events) {
// Put Kernel activity events into activity_infos_
for (const auto &event : events) {
if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity ||
@ -127,7 +74,7 @@ void DataSaver::ParseEvent(const std::vector<Event> &events) {
}
}
void DataSaver::AddKernelEvent(const Event &event) {
void GpuDataSaver::AddKernelEvent(const Event &event) {
// Put kernel event to activity_infos according to device id
uint32_t device_id = event.device_id;
auto iter = activity_infos_.find(device_id);
@ -139,7 +86,7 @@ void DataSaver::AddKernelEvent(const Event &event) {
}
}
void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) {
void GpuDataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) {
// Combine kernel activity with same kernel name
auto event_ptr = std::make_shared<Event>(event);
ActivityData activity_data = ActivityData(event_ptr);
@ -153,7 +100,7 @@ void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *
}
}
void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) {
void GpuDataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) {
if (out_path_dir.empty()) {
MS_LOG(WARNING) << "Output directory. Ignore the writing data.";
return;
@ -164,6 +111,7 @@ void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time)
}
// not support multi-device for operator info per process yet
device_id_ = std::to_string(activity_infos_.begin()->first);
op_side_ = "gpu";
WriteOpDetail(out_path_dir);
WriteOpType(out_path_dir);
WriteActivity(out_path_dir);
@ -172,42 +120,7 @@ void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time)
WriteStartTime(out_path_dir, start_time);
}
void DataSaver::WriteOpType(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/gpu_op_type_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
// check if the file is writable
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
// write op type info into file
ofs << OpType().GetHeader() << std::endl;
for (auto op_type_info : op_type_infos_) {
ofs << op_type_info.second << std::endl;
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
}
void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/gpu_op_detail_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
// write op detail info into file
ofs << OpDetailInfo().GetHeader() << std::endl;
for (auto op_detail : op_detail_infos_) {
ofs << op_detail << std::endl;
}
ofs.close();
ChangeFileMode(file_path);
MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
}
void DataSaver::WriteActivity(const std::string &saver_base_dir) {
void GpuDataSaver::WriteActivity(const std::string &saver_base_dir) {
std::string file_path_base = saver_base_dir + "/gpu_activity_data_";
std::string timestamp_file_path_base = saver_base_dir + "/activity_execute_timestamp_";
for (auto device_info : activity_infos_) {
@ -244,27 +157,7 @@ void DataSaver::WriteActivity(const std::string &saver_base_dir) {
}
}
void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/op_execute_timestamp_" + device_id_ + ".txt";
std::ofstream ofs(file_path);
// check if the file is writable
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
// write op timestamp info into file
for (const auto &op_timestamp_info : op_timestamps_map_) {
ofs << op_timestamp_info.first << ";Ops;";
for (auto start_end : op_timestamp_info.second) {
ofs << start_end.start_timestamp << "," << start_end.duration << " ";
}
ofs << std::endl;
}
ofs.close();
ChangeFileMode(file_path);
}
void DataSaver::WriteStepTrace(const std::string &saver_base_dir) {
void GpuDataSaver::WriteStepTrace(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/step_trace_profiling_" + device_id_ + ".txt";
std::ofstream ofs(file_path);
// check if the file is writable
@ -308,7 +201,7 @@ void DataSaver::WriteStepTrace(const std::string &saver_base_dir) {
MS_LOG(INFO) << "Write step trace infos into file: " << file_path;
}
void DataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time) {
void GpuDataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time) {
std::string file_path = saver_base_dir + "/start_time_" + device_id_ + ".txt";
std::ofstream ofs(file_path);
// check if the file is writable
@ -330,14 +223,7 @@ void DataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime
MS_LOG(INFO) << "Write profiler start time infos into file: " << file_path;
}
void DataSaver::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; }
void DataSaver::ChangeFileMode(const std::string &file_path) {
if (chmod(common::SafeCStr(file_path), S_IRUSR) == -1) {
MS_LOG(WARNING) << "Modify file:" << file_path << " to rw fail.";
return;
}
}
void GpuDataSaver::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; }
} // namespace gpu
} // namespace profiler
} // namespace mindspore

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_DATA_SAVER_H
#define MINDSPORE_DATA_SAVER_H
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H
#define MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H
#include <iostream>
#include <algorithm>
#include <unordered_map>
@ -23,57 +23,10 @@
#include <string>
#include <memory>
#include "profiler/device/gpu/gpu_profiling.h"
#include "profiler/device/data_saver.h"
namespace mindspore {
namespace profiler {
namespace gpu {
struct OpDetailInfo {
std::string op_type_;
std::string op_name_;
std::string op_full_name_;
std::shared_ptr<OpInfo> op_info_{nullptr};
float op_avg_time_{0};
float proportion_{0};
OpDetailInfo() = default;
OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion);
std::string GetHeader() const {
return "op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion,"
"cuda_activity_cost_time(us),cuda_activity_call_count";
}
friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) {
os << "Device," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ','
<< event.op_info_->op_count << ',' << event.op_info_->op_host_cost_time << ',' << event.op_avg_time_ << ','
<< event.proportion_ << ',' << event.op_info_->cupti_activity_time << ',' << event.op_info_->op_kernel_count;
return os;
}
};
struct OpType {
std::string op_type_;
int count_{0};
float total_time_{0};
float avg_time_{0};
float proportion_{0};
std::string GetHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; }
friend std::ostream &operator<<(std::ostream &os, const OpType &event) {
os << event.op_type_ << ',' << event.count_ << ',' << event.total_time_ << ',' << event.proportion_ << ','
<< event.avg_time_;
return os;
}
OpType &operator+=(const OpType &other) {
this->count_ += other.count_;
this->total_time_ += other.total_time_;
this->proportion_ += other.proportion_;
return *this;
}
};
struct ActivityData {
std::shared_ptr<Event> basic_info_{nullptr};
std::string block_dim_;
@ -105,25 +58,18 @@ struct ActivityData {
ActivityData &operator+=(const ActivityData &other);
};
using OpInfoMap = std::unordered_map<std::string, OpInfo>;
using DeviceActivityInfos = std::unordered_map<std::string, ActivityData>; // <device_id, ActivityData>
using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>; // <device_id, ActivityData>
using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype>
using OpDetailInfos = std::vector<OpDetailInfo>;
// <op_full_name, StartDuration>
using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>;
class DataSaver {
class GpuDataSaver : public DataSaver {
public:
DataSaver() = default;
GpuDataSaver() = default;
~DataSaver() = default;
~GpuDataSaver() = default;
DataSaver(const DataSaver &) = delete;
GpuDataSaver(const GpuDataSaver &) = delete;
DataSaver &operator=(const DataSaver &) = delete;
void ParseOpInfo(const OpInfoMap &op_info_maps);
GpuDataSaver &operator=(const GpuDataSaver &) = delete;
void SetStepTraceOpName(ProfilingTraceInfo trace_op_name);
@ -132,37 +78,21 @@ class DataSaver {
void WriteFile(std::string out_path, const BaseTime &start_time);
private:
void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info);
float GetTotalOpTime(const OpInfoMap &op_info_maps);
void AddKernelEvent(const Event &event);
void AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos);
void WriteOpType(const std::string &saver_base_dir);
void WriteOpDetail(const std::string &saver_base_dir);
void WriteActivity(const std::string &saver_base_dir);
void WriteOpTimestamp(const std::string &saver_base_dir);
void WriteStepTrace(const std::string &saver_base_dir);
void WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time);
void ChangeFileMode(const std::string &file_path);
std::string device_id_;
AllActivityInfos activity_infos_;
OpTypeInfos op_type_infos_;
OpDetailInfos op_detail_infos_;
OpTimestampInfo op_timestamps_map_;
ProfilingTraceInfo step_trace_op_name;
};
} // namespace gpu
} // namespace profiler
} // namespace mindspore
#endif // MINDSPORE_DATA_SAVER_H
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_DATA_SAVER_H

View File

@ -21,7 +21,7 @@
#include <chrono>
#include <cmath>
#include "profiler/device/gpu/cupti_interface.h"
#include "profiler/device/gpu/data_saver.h"
#include "profiler/device/gpu/gpu_data_saver.h"
#include "pybind_api/api_register.h"
#include "utils/log_adapter.h"
#include "utils/utils.h"
@ -92,14 +92,6 @@ uint64_t GetHostTimeStamp() {
return cur_time_stamp;
}
uint64_t GetHostMonoTimeStamp() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
constexpr uint64_t kNSecondInSecond = 1000000000;
uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec;
return cur_time_stamp;
}
std::string GetKernelFunc(const char *name) {
char *demangledName = abi::__cxa_demangle(name, nullptr, nullptr, nullptr);
if (demangledName != nullptr) {
@ -415,21 +407,6 @@ void GPUProfiler::SetRunTimeData(const std::string &op_name, void *stream) {
stream_ = stream;
}
void GPUProfiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) {
auto iter = op_info_map_.find(op_name);
if (iter != op_info_map_.end()) {
// The time unit is ms ,convert to us
iter->second.op_host_cost_time += time_elapsed;
}
}
void GPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) {
auto iter = op_info_map_.find(op_name);
if (iter != op_info_map_.end()) {
iter->second.start_duration.emplace_back(StartDuration({start, duration}));
}
}
void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) {
if (sync_enable_flag_) {
CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_start_), "cudaEventCreate op event start failed");
@ -463,8 +440,8 @@ void GPUProfiler::OpDataProducerEnd() {
op_time_elapsed = (op_host_time_stop_ - op_host_time_start_) / kTimeUnit;
}
MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed;
SetRunTimeData(op_name_, op_time_elapsed);
SetRunTimeData(op_name_, op_cupti_time_start_, op_time_elapsed);
Profiler::SetRunTimeData(op_name_, op_time_elapsed);
Profiler::SetRunTimeData(op_name_, op_cupti_time_start_, op_time_elapsed);
}
void GPUProfiler::StopCUPTI() {
@ -498,7 +475,7 @@ void GPUProfiler::SaveProfileData() {
if (profile_data_path_.empty()) {
MS_LOG(WARNING) << "Profile data path is empty, skip save profile data.";
} else {
DataSaver dataSaver;
GpuDataSaver dataSaver;
dataSaver.SetStepTraceOpName(step_trace_op_name);
dataSaver.ParseOpInfo(op_info_map_);
dataSaver.ParseEvent(events_);

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_GPU_PROFILING_H
#define MINDSPORE_GPU_PROFILING_H
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_H
#define MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_GPU_PROFILING_H
#include <cuda.h>
#include <cupti.h>
#include <algorithm>
@ -27,6 +27,7 @@
#include <unordered_map>
#include <utility>
#include <vector>
#include "profiler/device/profiling.h"
#include "profiler/device/gpu/gpu_profiling_utils.h"
namespace mindspore {
@ -86,23 +87,6 @@ struct Event {
};
};
struct StartDuration {
uint64_t start_timestamp = 0l;
float duration = 0l;
};
struct OpInfo {
std::string op_name;
float cupti_api_call_time = 0l;
float cupti_activity_time = 0l;
float op_host_cost_time = 0;
int op_kernel_api_count = 0;
int op_kernel_count = 0;
int op_count = 0;
std::vector<StartDuration> start_duration;
void *stream;
};
struct BaseTime {
// nanosecond
uint64_t host_start_time = 0l;
@ -124,17 +108,17 @@ class ProfilingOp {
std::string op_name_;
};
class GPUProfiler {
class GPUProfiler : public Profiler {
public:
static std::shared_ptr<GPUProfiler> GetInstance();
~GPUProfiler() { StopCUPTI(); }
GPUProfiler(const GPUProfiler &) = delete;
GPUProfiler &operator=(const GPUProfiler &) = delete;
void Init(const std::string &profileDataPath);
void Stop();
void Init(const std::string &profileDataPath) override;
void Stop() override;
void StopCUPTI();
void StepProfilingEnable(const bool enable_flag);
void StepProfilingEnable(const bool enable_flag) override;
void SyncEnable(const bool enable_flag);
bool GetEnableFlag() const { return enable_flag_; }
bool GetSyncEnableFlag() const { return sync_enable_flag_; }
@ -143,7 +127,7 @@ class GPUProfiler {
void CUPTIAPI AllocBuffer(uint8_t **buffer, size_t *size, size_t *maxNumRecords);
void CUPTIAPI ProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize);
void OpDataProducerBegin(const std::string op_name, void *stream);
void OpDataProducerEnd();
void OpDataProducerEnd() override;
void ProcessEvents();
void RegisterProfilingOp(std::shared_ptr<ProfilingOp> node);
void SetStepTraceOpName(ProfilingTraceInfo trace_op_name);
@ -153,24 +137,21 @@ class GPUProfiler {
GPUProfiler() = default;
void OpsParser();
void EventLog(const Event &event);
void ClearInst();
void ClearInst() override;
void HandleActivityRecord(CUpti_Activity *record);
void AddEvent(Event &&event);
void SetRunTimeData(const std::string &op_name, void *stream);
void SetRunTimeData(const std::string &op_name, const float time_elapsed);
void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration);
void FixOpNameByCorrelationId(Event *event);
static std::shared_ptr<GPUProfiler> profiler_inst_;
bool enable_flag_ = false;
bool sync_enable_flag_ = true;
std::unordered_map<std::string, OpInfo> op_info_map_;
std::unordered_map<uint32_t, std::string> op_name_map_;
std::vector<Event> events_;
BaseTime base_time_;
std::string op_name_;
void *stream_;
void SaveProfileData();
void SaveProfileData() override;
void SaveExtraProfileData();
std::mutex event_mutex_;
@ -198,4 +179,4 @@ class GPUProfiler {
} // namespace profiler
} // namespace mindspore
#endif // MINDSPORE_GPU_PROFILING_H
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_GPU_PROFILING_H

View File

@ -0,0 +1,56 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "profiler/device/profiling.h"
#include <time.h>
#include <cxxabi.h>
#include <cmath>
#include "profiler/device/cpu/cpu_data_saver.h"
#include "pybind_api/api_register.h"
#include "utils/log_adapter.h"
#include "utils/utils.h"
namespace mindspore {
namespace profiler {
uint64_t Profiler::GetHostMonoTimeStamp() {
struct timespec ts;
#if defined(_WIN32) || defined(_WIN64)
clock_gettime(CLOCK_MONOTONIC, &ts);
#else
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
#endif
constexpr uint64_t kNSecondInSecond = 1000000000;
uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec;
return cur_time_stamp;
}
void Profiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) {
auto iter = op_info_map_.find(op_name);
if (iter != op_info_map_.end()) {
// The time unit is ms, convert to us
iter->second.op_host_cost_time += time_elapsed;
}
}
void Profiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) {
auto iter = op_info_map_.find(op_name);
if (iter != op_info_map_.end()) {
iter->second.start_duration.emplace_back(StartDuration({start, duration}));
}
}
} // namespace profiler
} // namespace mindspore

View File

@ -0,0 +1,74 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H
#define MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H
#include <algorithm>
#include <cstdio>
#include <map>
#include <memory>
#include <mutex>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
namespace mindspore {
namespace profiler {
struct StartDuration {
uint64_t start_timestamp = 0l;
float duration = 0l;
};
struct OpInfo {
std::string op_name;
float cupti_api_call_time = 0l;
float cupti_activity_time = 0l;
float op_host_cost_time = 0;
int op_kernel_api_count = 0;
int op_kernel_count = 0;
int op_count = 0;
std::vector<StartDuration> start_duration;
void *stream;
uint32_t pid;
};
class Profiler {
public:
Profiler() = default;
virtual ~Profiler() = default;
virtual void Init(const std::string &profileDataPath) = 0;
virtual void Stop() = 0;
virtual void StepProfilingEnable(const bool enable_flag) = 0;
virtual void OpDataProducerEnd() = 0;
bool GetEnableFlag() const { return enable_flag_; }
std::string ProfileDataPath() const { return profile_data_path_; }
protected:
void SetRunTimeData(const std::string &op_name, const float time_elapsed);
void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration);
uint64_t GetHostMonoTimeStamp();
virtual void SaveProfileData() = 0;
virtual void ClearInst() = 0;
bool enable_flag_ = false;
std::string profile_data_path_;
std::unordered_map<std::string, OpInfo> op_info_map_;
};
} // namespace profiler
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PROFILER_DEVICE_PROFILING_H

View File

@ -642,7 +642,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
"""Generate gpu Timeline data from file."""
_display_filename = 'gpu_timeline_display_{}.json'
_timeline_summary_filename = 'gpu_timeline_summary_{}.json'
_output_op_execute_time_file_path = "op_execute_timestamp_{}.txt"
_output_op_execute_time_file_path = "gpu_op_execute_timestamp_{}.txt"
_output_activity_execute_time_file_path = "activity_execute_timestamp_{}.txt"
_output_gpu_activity_info_file_path = "gpu_activity_data_{}.csv"
_activity_keys_list = []

View File

@ -343,7 +343,7 @@ class BaseStepTraceParser:
row_data[FP_DURATION] += row_data[TAIL]
row_data = row_data[:BP_POINT] + row_data[BP_POINT+1:TAIL]
csv_writer.writerow(row_data)
os.chmod(self._output_path, stat.S_IRUSR)
os.chmod(self._output_path, stat.S_IREAD | stat.S_IWRITE)
except (IOError, OSError) as err:
log.warning('Failed to save step trace raw info. %s', err)
raise ProfilerIOException
@ -387,7 +387,7 @@ class GpuStepTraceParser(BaseStepTraceParser):
try:
with open(output_path, 'w') as json_file:
json.dump(points, json_file)
os.chmod(output_path, stat.S_IRUSR)
os.chmod(output_path, stat.S_IREAD | stat.S_IWRITE)
except (IOError, OSError) as err:
log.warning('Failed to save point info. %s', err)
raise ProfilerIOException
@ -506,7 +506,7 @@ class AscendStepTraceParser(BaseStepTraceParser):
try:
with open(output_path, 'w') as json_file:
json.dump(points, json_file)
os.chmod(output_path, stat.S_IRUSR)
os.chmod(output_path, stat.S_IREAD | stat.S_IWRITE)
except (IOError, OSError) as err:
log.warning('Failed to save point info. %s', err)
raise ProfilerIOException