forked from mindspore-Ecosystem/mindspore
add gpu timeline
This commit is contained in:
parent
8a71db07c2
commit
e9add03ca8
|
@ -50,6 +50,7 @@ ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) {
|
|||
avg_duration_ = total_duration_;
|
||||
max_duration_ = total_duration_;
|
||||
min_duration_ = total_duration_;
|
||||
start_duration.emplace_back(StartDuration({basic_info_->start_time_stamp, total_duration_}));
|
||||
}
|
||||
|
||||
ActivityData &ActivityData::operator+=(const ActivityData &other) {
|
||||
|
@ -68,6 +69,7 @@ void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
|
|||
op_detail_infos_.reserve(op_info_maps.size());
|
||||
float total_time_sum = GetTotalOpTime(op_info_maps);
|
||||
for (auto item : op_info_maps) {
|
||||
op_timestamps_map_[item.first] = item.second.start_duration;
|
||||
float proportion = item.second.op_host_cost_time / total_time_sum;
|
||||
auto op_info = std::make_shared<OpInfo>(item.second);
|
||||
OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
|
||||
|
@ -147,6 +149,7 @@ void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *
|
|||
device_activity_infos->emplace(kernel_name, activity_data);
|
||||
} else {
|
||||
iter->second += activity_data;
|
||||
iter->second.start_duration.emplace_back(StartDuration({event.start_time_stamp, activity_data.total_duration_}));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -164,6 +167,7 @@ void DataSaver::WriteFile(std::string out_path_dir) {
|
|||
WriteOpDetail(out_path_dir);
|
||||
WriteOpType(out_path_dir);
|
||||
WriteActivity(out_path_dir);
|
||||
WriteOpTimestamp(out_path_dir);
|
||||
}
|
||||
|
||||
void DataSaver::WriteOpType(const std::string &saver_base_dir) {
|
||||
|
@ -201,23 +205,58 @@ void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
|
|||
|
||||
void DataSaver::WriteActivity(const std::string &saver_base_dir) {
|
||||
std::string file_path_base = saver_base_dir + "/gpu_activity_data_";
|
||||
std::string timestamp_file_path_base = saver_base_dir + "/activity_execute_timestamp_";
|
||||
for (auto device_info : activity_infos_) {
|
||||
// write activity result csv
|
||||
std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv";
|
||||
std::ofstream ofs(file_path);
|
||||
if (!ofs.is_open()) {
|
||||
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
|
||||
return;
|
||||
}
|
||||
// write activity timestamp txt
|
||||
std::string timestamp_file_path = timestamp_file_path_base + std::to_string(device_info.first) + ".txt";
|
||||
std::ofstream activity_timestamp_ofs(timestamp_file_path);
|
||||
if (!activity_timestamp_ofs.is_open()) {
|
||||
MS_LOG(WARNING) << "Open file '" << timestamp_file_path << "' failed!";
|
||||
return;
|
||||
}
|
||||
// write activity data into file
|
||||
ofs << ActivityData().GetHeader() << std::endl;
|
||||
for (auto activity_data : device_info.second) {
|
||||
ofs << activity_data.second << std::endl;
|
||||
for (auto start_duration : activity_data.second.start_duration) {
|
||||
activity_timestamp_ofs << activity_data.second.basic_info_->kernel_name << ";";
|
||||
activity_timestamp_ofs << activity_data.second.basic_info_->stream_id << ";";
|
||||
activity_timestamp_ofs << start_duration.start_timestamp << ";";
|
||||
activity_timestamp_ofs << start_duration.duration << std::endl;
|
||||
}
|
||||
}
|
||||
ofs.close();
|
||||
activity_timestamp_ofs.close();
|
||||
MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path;
|
||||
}
|
||||
}
|
||||
|
||||
void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
|
||||
std::string file_path = saver_base_dir + "/op_execute_timestamp_" + device_id_ + ".txt";
|
||||
std::ofstream ofs(file_path);
|
||||
// check if the file is writable
|
||||
if (!ofs.is_open()) {
|
||||
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
|
||||
return;
|
||||
}
|
||||
// write op timestamp info into file
|
||||
for (const auto &op_timestamp_info : op_timestamps_map_) {
|
||||
ofs << op_timestamp_info.first << ";Ops;";
|
||||
for (auto start_end : op_timestamp_info.second) {
|
||||
ofs << start_end.start_timestamp << "," << start_end.duration << " ";
|
||||
}
|
||||
ofs << std::endl;
|
||||
}
|
||||
ofs.close();
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -83,6 +83,7 @@ struct ActivityData {
|
|||
float avg_duration_{0};
|
||||
float max_duration_{0};
|
||||
float min_duration_{0};
|
||||
std::vector<StartDuration> start_duration;
|
||||
|
||||
ActivityData() = default;
|
||||
|
||||
|
@ -109,6 +110,8 @@ using DeviceActivityInfos = std::unordered_map<std::string, ActivityData>; //
|
|||
using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>; // <device_id, ActivityData>
|
||||
using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype>
|
||||
using OpDetailInfos = std::vector<OpDetailInfo>;
|
||||
// <op_full_name, StartDuration>
|
||||
using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>;
|
||||
|
||||
class DataSaver {
|
||||
public:
|
||||
|
@ -141,10 +144,13 @@ class DataSaver {
|
|||
|
||||
void WriteActivity(const std::string &saver_base_dir);
|
||||
|
||||
void WriteOpTimestamp(const std::string &saver_base_dir);
|
||||
|
||||
std::string device_id_;
|
||||
AllActivityInfos activity_infos_;
|
||||
OpTypeInfos op_type_infos_;
|
||||
OpDetailInfos op_detail_infos_;
|
||||
OpTimestampInfo op_timestamps_map_;
|
||||
};
|
||||
} // namespace gpu
|
||||
} // namespace profiler
|
||||
|
|
|
@ -390,12 +390,20 @@ void GPUProfiler::SetRunTimeData(const std::string &op_name, const float time_el
|
|||
}
|
||||
}
|
||||
|
||||
void GPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) {
|
||||
auto iter = op_info_map_.find(op_name);
|
||||
if (iter != op_info_map_.end()) {
|
||||
iter->second.start_duration.emplace_back(StartDuration({start, duration}));
|
||||
}
|
||||
}
|
||||
|
||||
void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) {
|
||||
if (sync_enable_flag_) {
|
||||
CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_start_), "cudaEventCreate op event start failed");
|
||||
CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_stop_), "cudaEventCreate op event stop failed");
|
||||
CHECK_CUDA_RET_WITH_ERROR(cudaEventRecord(op_event_start_, (CUstream)stream_),
|
||||
"cudaEventRecord op event start failed");
|
||||
op_host_time_start_ = GetHostTimeStamp();
|
||||
} else {
|
||||
op_host_time_start_ = GetHostTimeStamp();
|
||||
}
|
||||
|
@ -414,12 +422,14 @@ void GPUProfiler::OpDataProducerEnd() {
|
|||
CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(op_event_start_), "cudaEventDestroy op event start failed");
|
||||
CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(op_event_stop_), "cudaEventDestroy op event stop failed");
|
||||
op_time_elapsed = op_time_elapsed * kTimeUnit;
|
||||
op_host_time_stop_ = GetHostTimeStamp();
|
||||
} else {
|
||||
op_host_time_stop_ = GetHostTimeStamp();
|
||||
op_time_elapsed = (op_host_time_stop_ - op_host_time_start_) / kTimeUnit;
|
||||
}
|
||||
MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed;
|
||||
SetRunTimeData(op_name_, op_time_elapsed);
|
||||
SetRunTimeData(op_name_, op_host_time_start_, op_time_elapsed);
|
||||
}
|
||||
|
||||
void GPUProfiler::StopCUPTI() {
|
||||
|
|
|
@ -84,6 +84,11 @@ struct Event {
|
|||
};
|
||||
};
|
||||
|
||||
struct StartDuration {
|
||||
uint64_t start_timestamp = 0l;
|
||||
float duration = 0l;
|
||||
};
|
||||
|
||||
struct OpInfo {
|
||||
std::string op_name;
|
||||
float cupti_api_call_time = 0l;
|
||||
|
@ -92,6 +97,7 @@ struct OpInfo {
|
|||
int op_kernel_api_count = 0;
|
||||
int op_kernel_count = 0;
|
||||
int op_count = 0;
|
||||
std::vector<StartDuration> start_duration;
|
||||
void *stream;
|
||||
};
|
||||
|
||||
|
@ -133,6 +139,7 @@ class GPUProfiler {
|
|||
void AddEvent(Event &&event);
|
||||
void SetRunTimeData(const std::string &op_name, void *stream);
|
||||
void SetRunTimeData(const std::string &op_name, const float time_elapsed);
|
||||
void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration);
|
||||
void FixOpNameByCorrelationId(Event *event);
|
||||
|
||||
static std::shared_ptr<GPUProfiler> profiler_inst_;
|
||||
|
|
Loading…
Reference in New Issue