add gpu timeline

This commit is contained in:
yuximiao 2020-09-09 15:00:52 +08:00
parent 8a71db07c2
commit e9add03ca8
4 changed files with 62 additions and 0 deletions

View File

@ -50,6 +50,7 @@ ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) {
avg_duration_ = total_duration_;
max_duration_ = total_duration_;
min_duration_ = total_duration_;
start_duration.emplace_back(StartDuration({basic_info_->start_time_stamp, total_duration_}));
}
ActivityData &ActivityData::operator+=(const ActivityData &other) {
@ -68,6 +69,7 @@ void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
op_detail_infos_.reserve(op_info_maps.size());
float total_time_sum = GetTotalOpTime(op_info_maps);
for (auto item : op_info_maps) {
op_timestamps_map_[item.first] = item.second.start_duration;
float proportion = item.second.op_host_cost_time / total_time_sum;
auto op_info = std::make_shared<OpInfo>(item.second);
OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
@ -147,6 +149,7 @@ void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *
device_activity_infos->emplace(kernel_name, activity_data);
} else {
iter->second += activity_data;
iter->second.start_duration.emplace_back(StartDuration({event.start_time_stamp, activity_data.total_duration_}));
}
}
@ -164,6 +167,7 @@ void DataSaver::WriteFile(std::string out_path_dir) {
WriteOpDetail(out_path_dir);
WriteOpType(out_path_dir);
WriteActivity(out_path_dir);
WriteOpTimestamp(out_path_dir);
}
void DataSaver::WriteOpType(const std::string &saver_base_dir) {
@ -201,23 +205,58 @@ void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
void DataSaver::WriteActivity(const std::string &saver_base_dir) {
std::string file_path_base = saver_base_dir + "/gpu_activity_data_";
std::string timestamp_file_path_base = saver_base_dir + "/activity_execute_timestamp_";
for (auto device_info : activity_infos_) {
// write activity result csv
std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv";
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
// write activity timestamp txt
std::string timestamp_file_path = timestamp_file_path_base + std::to_string(device_info.first) + ".txt";
std::ofstream activity_timestamp_ofs(timestamp_file_path);
if (!activity_timestamp_ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << timestamp_file_path << "' failed!";
return;
}
// write activity data into file
ofs << ActivityData().GetHeader() << std::endl;
for (auto activity_data : device_info.second) {
ofs << activity_data.second << std::endl;
for (auto start_duration : activity_data.second.start_duration) {
activity_timestamp_ofs << activity_data.second.basic_info_->kernel_name << ";";
activity_timestamp_ofs << activity_data.second.basic_info_->stream_id << ";";
activity_timestamp_ofs << start_duration.start_timestamp << ";";
activity_timestamp_ofs << start_duration.duration << std::endl;
}
}
ofs.close();
activity_timestamp_ofs.close();
MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path;
}
}
void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/op_execute_timestamp_" + device_id_ + ".txt";
std::ofstream ofs(file_path);
// check if the file is writable
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
// write op timestamp info into file
for (const auto &op_timestamp_info : op_timestamps_map_) {
ofs << op_timestamp_info.first << ";Ops;";
for (auto start_end : op_timestamp_info.second) {
ofs << start_end.start_timestamp << "," << start_end.duration << " ";
}
ofs << std::endl;
}
ofs.close();
}
} // namespace gpu
} // namespace profiler
} // namespace mindspore

View File

@ -83,6 +83,7 @@ struct ActivityData {
float avg_duration_{0};
float max_duration_{0};
float min_duration_{0};
std::vector<StartDuration> start_duration;
ActivityData() = default;
@ -109,6 +110,8 @@ using DeviceActivityInfos = std::unordered_map<std::string, ActivityData>; //
using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>; // <device_id, ActivityData>
using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype>
using OpDetailInfos = std::vector<OpDetailInfo>;
// <op_full_name, StartDuration>
using OpTimestampInfo = std::unordered_map<std::string, std::vector<StartDuration>>;
class DataSaver {
public:
@ -141,10 +144,13 @@ class DataSaver {
void WriteActivity(const std::string &saver_base_dir);
void WriteOpTimestamp(const std::string &saver_base_dir);
std::string device_id_;
AllActivityInfos activity_infos_;
OpTypeInfos op_type_infos_;
OpDetailInfos op_detail_infos_;
OpTimestampInfo op_timestamps_map_;
};
} // namespace gpu
} // namespace profiler

View File

@ -390,12 +390,20 @@ void GPUProfiler::SetRunTimeData(const std::string &op_name, const float time_el
}
}
void GPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) {
auto iter = op_info_map_.find(op_name);
if (iter != op_info_map_.end()) {
iter->second.start_duration.emplace_back(StartDuration({start, duration}));
}
}
void GPUProfiler::OpDataProducerBegin(const std::string op_name, void *stream) {
if (sync_enable_flag_) {
CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_start_), "cudaEventCreate op event start failed");
CHECK_CUDA_RET_WITH_ERROR(cudaEventCreate(&op_event_stop_), "cudaEventCreate op event stop failed");
CHECK_CUDA_RET_WITH_ERROR(cudaEventRecord(op_event_start_, (CUstream)stream_),
"cudaEventRecord op event start failed");
op_host_time_start_ = GetHostTimeStamp();
} else {
op_host_time_start_ = GetHostTimeStamp();
}
@ -414,12 +422,14 @@ void GPUProfiler::OpDataProducerEnd() {
CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(op_event_start_), "cudaEventDestroy op event start failed");
CHECK_CUDA_RET_WITH_ERROR(cudaEventDestroy(op_event_stop_), "cudaEventDestroy op event stop failed");
op_time_elapsed = op_time_elapsed * kTimeUnit;
op_host_time_stop_ = GetHostTimeStamp();
} else {
op_host_time_stop_ = GetHostTimeStamp();
op_time_elapsed = (op_host_time_stop_ - op_host_time_start_) / kTimeUnit;
}
MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed;
SetRunTimeData(op_name_, op_time_elapsed);
SetRunTimeData(op_name_, op_host_time_start_, op_time_elapsed);
}
void GPUProfiler::StopCUPTI() {

View File

@ -84,6 +84,11 @@ struct Event {
};
};
struct StartDuration {
uint64_t start_timestamp = 0l;
float duration = 0l;
};
struct OpInfo {
std::string op_name;
float cupti_api_call_time = 0l;
@ -92,6 +97,7 @@ struct OpInfo {
int op_kernel_api_count = 0;
int op_kernel_count = 0;
int op_count = 0;
std::vector<StartDuration> start_duration;
void *stream;
};
@ -133,6 +139,7 @@ class GPUProfiler {
void AddEvent(Event &&event);
void SetRunTimeData(const std::string &op_name, void *stream);
void SetRunTimeData(const std::string &op_name, const float time_elapsed);
void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration);
void FixOpNameByCorrelationId(Event *event);
static std::shared_ptr<GPUProfiler> profiler_inst_;