forked from mindspore-Ecosystem/mindspore
add data saver module
This commit is contained in:
parent
f20e68a879
commit
31e61f71eb
|
@ -126,7 +126,7 @@ CUptiResult CuptiGetStreamId(CUcontext context, CUstream stream, uint32_t *strea
|
|||
}
|
||||
|
||||
CUptiResult CuptiGetDeviceId(CUcontext context, uint32_t *deviceId) {
|
||||
static auto func_ptr = reinterpret_cast<CuptiGetDeviceIdFunc>(GetCUPTIFunc("cuptiSubscribe"));
|
||||
static auto func_ptr = reinterpret_cast<CuptiGetDeviceIdFunc>(GetCUPTIFunc("cuptiGetDeviceId"));
|
||||
return func_ptr(context, deviceId);
|
||||
}
|
||||
} // namespace gpu
|
||||
|
|
|
@ -0,0 +1,223 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "profiler/device/gpu/data_saver.h"
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace gpu {
|
||||
|
||||
OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion)
|
||||
: op_info_(op_info), proportion_(proportion) {
|
||||
// op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
|
||||
op_full_name_ = op_info->op_name;
|
||||
auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
|
||||
auto op_type_end_iter = op_full_name_.rfind('-');
|
||||
op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
|
||||
op_name_ = op_full_name_.substr(op_type_begin_iter);
|
||||
op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
|
||||
}
|
||||
|
||||
ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) {
|
||||
grid_dim_ = basic_info_->activity_type == ActivityType::kKernel
|
||||
? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' +
|
||||
std::to_string(basic_info_->kernel_info.grid_y) + ',' +
|
||||
std::to_string(basic_info_->kernel_info.grid_z) + "\""
|
||||
: "";
|
||||
block_dim_ = basic_info_->activity_type == ActivityType::kKernel
|
||||
? "\"" + std::to_string(basic_info_->kernel_info.block_x) + ',' +
|
||||
std::to_string(basic_info_->kernel_info.block_y) + ',' +
|
||||
std::to_string(basic_info_->kernel_info.block_z) + "\""
|
||||
: "";
|
||||
count_ = 1;
|
||||
total_duration_ = (basic_info_->end_time_stamp - basic_info_->start_time_stamp) / kTimeUnit;
|
||||
avg_duration_ = total_duration_;
|
||||
max_duration_ = total_duration_;
|
||||
min_duration_ = total_duration_;
|
||||
}
|
||||
|
||||
ActivityData &ActivityData::operator+=(const ActivityData &other) {
|
||||
this->count_ += other.count_;
|
||||
this->total_duration_ += other.total_duration_;
|
||||
// update max or min duration
|
||||
if (other.total_duration_ > this->max_duration_) {
|
||||
this->max_duration_ = other.total_duration_;
|
||||
} else if (other.max_duration_ < this->min_duration_) {
|
||||
this->min_duration_ = other.total_duration_;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
|
||||
op_detail_infos_.reserve(op_info_maps.size());
|
||||
float total_time_sum = GetTotalOpTime(op_info_maps);
|
||||
for (auto item : op_info_maps) {
|
||||
float proportion = item.second.op_host_cost_time / total_time_sum;
|
||||
auto op_info = std::make_shared<OpInfo>(item.second);
|
||||
OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
|
||||
op_detail_infos_.emplace_back(op_detail_info);
|
||||
AddOpDetailInfoForType(op_detail_info);
|
||||
}
|
||||
// update average time of op type
|
||||
for (auto &op_type : op_type_infos_) {
|
||||
// device_infos: <type_name, op_type_info>
|
||||
op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
|
||||
}
|
||||
MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
|
||||
MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
|
||||
}
|
||||
|
||||
void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
|
||||
// Construct OpType object according to op detail info
|
||||
OpType op_type = OpType{op_detail_info.op_type_, op_detail_info.op_info_->op_count,
|
||||
op_detail_info.op_info_->op_host_cost_time, 0, op_detail_info.proportion_};
|
||||
// Set the OpType into op_type_infos_ map
|
||||
std::string type_name = op_detail_info.op_type_;
|
||||
auto iter = op_type_infos_.find(type_name);
|
||||
if (iter == op_type_infos_.end()) {
|
||||
op_type_infos_.emplace(type_name, op_type);
|
||||
} else {
|
||||
iter->second += op_type;
|
||||
}
|
||||
}
|
||||
|
||||
float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
|
||||
float sum = 0;
|
||||
sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
|
||||
[](float i, auto iter) { return i + iter.second.op_host_cost_time; });
|
||||
MS_LOG(DEBUG) << "The total op time is " << sum;
|
||||
return sum;
|
||||
}
|
||||
|
||||
void DataSaver::ParseEvent(const std::vector<Event> &events) {
|
||||
// Put Kernel activity events into activity_infos_
|
||||
for (const auto &event : events) {
|
||||
if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity ||
|
||||
event.activity_type != ActivityType::kKernel) {
|
||||
continue;
|
||||
}
|
||||
AddKernelEvent(event);
|
||||
}
|
||||
// update average time of kernel op cost
|
||||
for (auto &device_infos : activity_infos_) {
|
||||
// device_infos: <device_id, DeviceActivityInfos>
|
||||
for (auto &activity_info : device_infos.second) {
|
||||
// activity_info: <kernel_name, Activity>
|
||||
activity_info.second.avg_duration_ = activity_info.second.total_duration_ / activity_info.second.count_;
|
||||
}
|
||||
MS_LOG(DEBUG) << "Get " << device_infos.second.size() << " activity items for device:" << device_infos.first;
|
||||
}
|
||||
}
|
||||
|
||||
void DataSaver::AddKernelEvent(const Event &event) {
|
||||
// Put kernel event to activity_infos according to device id
|
||||
uint32_t device_id = event.device_id;
|
||||
auto iter = activity_infos_.find(device_id);
|
||||
if (iter == activity_infos_.end()) {
|
||||
auto res_flag = activity_infos_.emplace(device_id, DeviceActivityInfos());
|
||||
AddKernelEventToDevice(event, &res_flag.first->second);
|
||||
} else {
|
||||
AddKernelEventToDevice(event, &iter->second);
|
||||
}
|
||||
}
|
||||
|
||||
void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) {
|
||||
// Combine kernel activity with same kernel name
|
||||
auto event_ptr = std::make_shared<Event>(event);
|
||||
ActivityData activity_data = ActivityData(event_ptr);
|
||||
std::string kernel_name = event.kernel_name;
|
||||
auto iter = device_activity_infos->find(kernel_name);
|
||||
if (iter == device_activity_infos->end()) {
|
||||
device_activity_infos->emplace(kernel_name, activity_data);
|
||||
} else {
|
||||
iter->second += activity_data;
|
||||
}
|
||||
}
|
||||
|
||||
void DataSaver::WriteFile(std::string out_path_dir) {
|
||||
if (out_path_dir.empty()) {
|
||||
MS_LOG(WARNING) << "Output directory. Ignore the writing data.";
|
||||
return;
|
||||
}
|
||||
if (op_detail_infos_.empty() || op_type_infos_.empty() || activity_infos_.empty()) {
|
||||
MS_LOG(WARNING) << "No operation detail infos to write.";
|
||||
return;
|
||||
}
|
||||
// not support multi-device for operator info per process yet
|
||||
device_id_ = std::to_string(activity_infos_.begin()->first);
|
||||
WriteOpDetail(out_path_dir);
|
||||
WriteOpType(out_path_dir);
|
||||
WriteActivity(out_path_dir);
|
||||
}
|
||||
|
||||
void DataSaver::WriteOpType(const std::string &saver_base_dir) {
|
||||
std::string file_path = saver_base_dir + "/gpu_op_type_info_" + device_id_ + ".csv";
|
||||
std::ofstream ofs(file_path);
|
||||
// check if the file is writable
|
||||
if (!ofs.is_open()) {
|
||||
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
|
||||
return;
|
||||
}
|
||||
// write op type info into file
|
||||
ofs << OpType().GetHeader() << std::endl;
|
||||
for (auto op_type_info : op_type_infos_) {
|
||||
ofs << op_type_info.second << std::endl;
|
||||
}
|
||||
ofs.close();
|
||||
MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
|
||||
}
|
||||
|
||||
void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
|
||||
std::string file_path = saver_base_dir + "/gpu_op_detail_info_" + device_id_ + ".csv";
|
||||
std::ofstream ofs(file_path);
|
||||
if (!ofs.is_open()) {
|
||||
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
|
||||
return;
|
||||
}
|
||||
// write op detail info into file
|
||||
ofs << OpDetailInfo().GetHeader() << std::endl;
|
||||
for (auto op_detail : op_detail_infos_) {
|
||||
ofs << op_detail << std::endl;
|
||||
}
|
||||
ofs.close();
|
||||
MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
|
||||
}
|
||||
|
||||
void DataSaver::WriteActivity(const std::string &saver_base_dir) {
|
||||
std::string file_path_base = saver_base_dir + "/gpu_activity_data_";
|
||||
for (auto device_info : activity_infos_) {
|
||||
std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv";
|
||||
std::ofstream ofs(file_path);
|
||||
if (!ofs.is_open()) {
|
||||
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
|
||||
return;
|
||||
}
|
||||
// write activity data into file
|
||||
ofs << ActivityData().GetHeader() << std::endl;
|
||||
for (auto activity_data : device_info.second) {
|
||||
ofs << activity_data.second << std::endl;
|
||||
}
|
||||
ofs.close();
|
||||
MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,153 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_DATA_SAVER_H
|
||||
#define MINDSPORE_DATA_SAVER_H
|
||||
#include <iostream>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace gpu {
|
||||
|
||||
struct OpDetailInfo {
|
||||
std::string op_type_;
|
||||
std::string op_name_;
|
||||
std::string op_full_name_;
|
||||
std::shared_ptr<OpInfo> op_info_{nullptr};
|
||||
float op_avg_time_{0};
|
||||
float proportion_{0};
|
||||
|
||||
OpDetailInfo() = default;
|
||||
|
||||
OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion);
|
||||
|
||||
std::string GetHeader() const {
|
||||
return "op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion,"
|
||||
"cuda_activity_cost_time(us),cuda_activity_call_count";
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) {
|
||||
os << "Device," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ','
|
||||
<< event.op_info_->op_count << ',' << event.op_info_->op_host_cost_time << ',' << event.op_avg_time_ << ','
|
||||
<< event.proportion_ << ',' << event.op_info_->cupti_activity_time << ',' << event.op_info_->op_kernel_count;
|
||||
return os;
|
||||
}
|
||||
};
|
||||
|
||||
struct OpType {
|
||||
std::string op_type_;
|
||||
int count_{0};
|
||||
float total_time_{0};
|
||||
float avg_time_{0};
|
||||
float proportion_{0};
|
||||
|
||||
std::string GetHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; }
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const OpType &event) {
|
||||
os << event.op_type_ << ',' << event.count_ << ',' << event.total_time_ << ',' << event.proportion_ << ','
|
||||
<< event.avg_time_;
|
||||
return os;
|
||||
}
|
||||
|
||||
OpType &operator+=(const OpType &other) {
|
||||
this->count_ += other.count_;
|
||||
this->total_time_ += other.total_time_;
|
||||
this->proportion_ += other.proportion_;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
struct ActivityData {
|
||||
std::shared_ptr<Event> basic_info_{nullptr};
|
||||
std::string block_dim_;
|
||||
std::string grid_dim_;
|
||||
int count_{0};
|
||||
float total_duration_{0};
|
||||
float avg_duration_{0};
|
||||
float max_duration_{0};
|
||||
float min_duration_{0};
|
||||
|
||||
ActivityData() = default;
|
||||
|
||||
explicit ActivityData(std::shared_ptr<Event> data);
|
||||
|
||||
std::string GetHeader() const {
|
||||
return "name,type,op_full_name,stream_id,block_dim,grid_dim,occurrences,"
|
||||
"total_duration(us),avg_duration(us),max_duration(us),min_duration(us)";
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const ActivityData &event) {
|
||||
os << "\"" << event.basic_info_->kernel_name << "\"," << event.basic_info_->kernel_type << ','
|
||||
<< event.basic_info_->op_name << ',' << event.basic_info_->stream_id << ',' << event.block_dim_ << ','
|
||||
<< event.grid_dim_ << ',' << event.count_ << ',' << event.total_duration_ << ',' << event.avg_duration_ << ','
|
||||
<< event.max_duration_ << ',' << event.min_duration_;
|
||||
return os;
|
||||
}
|
||||
|
||||
ActivityData &operator+=(const ActivityData &other);
|
||||
};
|
||||
|
||||
using OpInfoMap = std::unordered_map<std::string, OpInfo>;
|
||||
using DeviceActivityInfos = std::unordered_map<std::string, ActivityData>; // <device_id, ActivityData>
|
||||
using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>; // <device_id, ActivityData>
|
||||
using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype>
|
||||
using OpDetailInfos = std::vector<OpDetailInfo>;
|
||||
|
||||
class DataSaver {
|
||||
public:
|
||||
DataSaver() = default;
|
||||
|
||||
~DataSaver() = default;
|
||||
|
||||
DataSaver(const DataSaver &) = delete;
|
||||
|
||||
DataSaver &operator=(const DataSaver &) = delete;
|
||||
|
||||
void ParseOpInfo(const OpInfoMap &op_info_maps);
|
||||
|
||||
void ParseEvent(const std::vector<Event> &events);
|
||||
|
||||
void WriteFile(std::string out_path);
|
||||
|
||||
private:
|
||||
void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info);
|
||||
|
||||
float GetTotalOpTime(const OpInfoMap &op_info_maps);
|
||||
|
||||
void AddKernelEvent(const Event &event);
|
||||
|
||||
void AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos);
|
||||
|
||||
void WriteOpType(const std::string &saver_base_dir);
|
||||
|
||||
void WriteOpDetail(const std::string &saver_base_dir);
|
||||
|
||||
void WriteActivity(const std::string &saver_base_dir);
|
||||
|
||||
std::string device_id_;
|
||||
AllActivityInfos activity_infos_;
|
||||
OpTypeInfos op_type_infos_;
|
||||
OpDetailInfos op_detail_infos_;
|
||||
};
|
||||
} // namespace gpu
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_DATA_SAVER_H
|
|
@ -19,6 +19,7 @@
|
|||
#include <chrono>
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
#include "profiler/device/gpu/cupti_interface.h"
|
||||
#include "profiler/device/gpu/data_saver.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "pybind_api/api_register.h"
|
||||
|
||||
|
@ -478,7 +479,11 @@ void GPUProfiler::Stop() {
|
|||
void GPUProfiler::SaveProfileData() {
|
||||
if (profile_data_path_.empty()) {
|
||||
MS_LOG(WARNING) << "profile_data_path is empty, skip save profile data.";
|
||||
return;
|
||||
} else {
|
||||
DataSaver dataSaver;
|
||||
dataSaver.ParseOpInfo(op_info_map_);
|
||||
dataSaver.ParseEvent(events_);
|
||||
dataSaver.WriteFile(profile_data_path_);
|
||||
}
|
||||
op_info_map_.clear();
|
||||
op_name_map_.clear();
|
||||
|
|
|
@ -43,17 +43,21 @@ class MinddataParser:
|
|||
node_name, node_start, node_end, queue_size = "", 0, 0, 0
|
||||
if node_info:
|
||||
node_name = node_info[0].replace("Node:", "")
|
||||
if len(node_info) > 2:
|
||||
node_start = node_info[1].replace("Run start:", "")
|
||||
if node_start.isdigit():
|
||||
node_start = int(node_start)
|
||||
node_end = node_info[2].replace("Run end:", "")
|
||||
if node_end.isdigit():
|
||||
node_end = int(node_end)
|
||||
if len(node_info) > 3:
|
||||
|
||||
if len(node_info) > 3 and "queue" in node_info[1]:
|
||||
queue_size = node_info[1].replace("queue size:", "")
|
||||
queue_size = int(queue_size) if queue_size.isdigit() else queue_size
|
||||
node_start = node_info[2].replace("Run start:", "")
|
||||
node_start = int(node_start) if node_start.isdigit() else node_start
|
||||
node_end = node_info[3].replace("Run end:", "")
|
||||
node_end = int(node_end) if node_end.isdigit() else node_end
|
||||
elif len(node_info) > 3 and "Run" in node_info[1]:
|
||||
queue_size = node_info[3].replace("queue size:", "")
|
||||
if queue_size.isdigit():
|
||||
queue_size = int(queue_size)
|
||||
queue_size = int(queue_size) if queue_size.isdigit() else queue_size
|
||||
node_start = node_info[1].replace("Run start:", "")
|
||||
node_start = int(node_start) if node_start.isdigit() else node_start
|
||||
node_end = node_info[2].replace("Run end:", "")
|
||||
node_end = int(node_end) if node_end.isdigit() else node_end
|
||||
|
||||
one_step_list = [node_name, node_start, node_end, queue_size]
|
||||
result.append(one_step_list)
|
||||
|
|
|
@ -79,35 +79,42 @@ class Profiler:
|
|||
optypes_to_deal='', optypes_not_deal='Variable', job_id=""):
|
||||
# get device_id and device_target
|
||||
self._get_devid_and_devtarget()
|
||||
self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id)
|
||||
data_path = os.path.join(self._container_path, "data")
|
||||
if not os.path.exists(data_path):
|
||||
os.makedirs(data_path, exist_ok=True)
|
||||
self._output_path = validate_and_normalize_path(output_path)
|
||||
self._output_path = os.path.join(self._output_path, "profiler")
|
||||
if not os.path.exists(self._output_path):
|
||||
os.makedirs(self._output_path, exist_ok=True)
|
||||
|
||||
os.environ['PROFILING_MODE'] = 'true'
|
||||
os.environ['PROFILING_OPTIONS'] = 'training_trace:task_trace'
|
||||
os.environ['MINDDATA_PROFILING_DIR'] = self._output_path
|
||||
os.environ['DEVICE_ID'] = self._dev_id
|
||||
os.environ['AICPU_PROFILING_MODE'] = 'true'
|
||||
os.environ['PROFILING_DIR'] = str(self._container_path)
|
||||
if self._device_target and self._device_target == "GPU":
|
||||
from mindspore._c_expression import GPUProfiler
|
||||
self._gpu_profiler = GPUProfiler.get_instance()
|
||||
self._gpu_profiler.init(self._output_path)
|
||||
self._gpu_profiler.step_profiling_enable(True)
|
||||
elif self._device_target and (self._device_target == "Ascend" or self._device_target != "Davinci"):
|
||||
self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id)
|
||||
data_path = os.path.join(self._container_path, "data")
|
||||
if not os.path.exists(data_path):
|
||||
os.makedirs(data_path, exist_ok=True)
|
||||
|
||||
# use context interface to open profiling, for the new mindspore version(after 2020.5.21)
|
||||
context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace")
|
||||
os.environ['PROFILING_MODE'] = 'true'
|
||||
os.environ['PROFILING_OPTIONS'] = 'training_trace:task_trace'
|
||||
os.environ['MINDDATA_PROFILING_DIR'] = self._output_path
|
||||
os.environ['DEVICE_ID'] = self._dev_id
|
||||
os.environ['AICPU_PROFILING_MODE'] = 'true'
|
||||
os.environ['PROFILING_DIR'] = str(self._container_path)
|
||||
|
||||
self._subgraph = check_subgraph(subgraph)
|
||||
self._valid_optype_name = optypes_to_deal.split(",") if optypes_to_deal else []
|
||||
self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else []
|
||||
self._detail = check_bool(is_detail, 'is_detail')
|
||||
self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path')
|
||||
self._profiling_job_id = job_id
|
||||
# add job id env through user input later
|
||||
self._job_id_env = 0
|
||||
self._start_time = int(time.time() * 10000000)
|
||||
logger.info("Profiling: profiling start time: %d", self._start_time)
|
||||
# use context interface to open profiling, for the new mindspore version(after 2020.5.21)
|
||||
context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace")
|
||||
|
||||
self._subgraph = check_subgraph(subgraph)
|
||||
self._valid_optype_name = optypes_to_deal.split(",") if optypes_to_deal else []
|
||||
self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else []
|
||||
self._detail = check_bool(is_detail, 'is_detail')
|
||||
self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path')
|
||||
self._profiling_job_id = job_id
|
||||
# add job id env through user input later
|
||||
self._job_id_env = 0
|
||||
self._start_time = int(time.time() * 10000000)
|
||||
logger.info("Profiling: profiling start time: %d", self._start_time)
|
||||
|
||||
def analyse(self):
|
||||
"""
|
||||
|
@ -123,71 +130,74 @@ class Profiler:
|
|||
>>> model.train()
|
||||
>>> profiler.analyse()
|
||||
"""
|
||||
release()
|
||||
if self._device_target and self._device_target == "GPU":
|
||||
self._gpu_profiler.stop()
|
||||
elif self._device_target and (self._device_target == "Ascend" or self._device_target != "Davinci"):
|
||||
release()
|
||||
|
||||
job_id = self._get_profiling_job_id()
|
||||
logger.info("Profiling: job id is %s ", job_id)
|
||||
job_id = self._get_profiling_job_id()
|
||||
logger.info("Profiling: job id is %s ", job_id)
|
||||
|
||||
source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id)
|
||||
# parse hwts.log.data.45.dev file, and get task profiling data
|
||||
hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
|
||||
hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
|
||||
hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename)
|
||||
result = hwtslog_parser.execute()
|
||||
if not result:
|
||||
logger.error("Profiling: fail to parse hwts log file.")
|
||||
return
|
||||
source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id)
|
||||
# parse hwts.log.data.45.dev file, and get task profiling data
|
||||
hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
|
||||
hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
|
||||
hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename)
|
||||
result = hwtslog_parser.execute()
|
||||
if not result:
|
||||
logger.error("Profiling: fail to parse hwts log file.")
|
||||
return
|
||||
|
||||
# parse Framework file, and get the relation of op and tasks
|
||||
framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path)
|
||||
framework_parser.parse()
|
||||
op_task_dict = framework_parser.to_task_id_full_op_name_dict()
|
||||
if not op_task_dict:
|
||||
logger.error("Profiling: fail to parse framework files.")
|
||||
return
|
||||
# parse Framework file, and get the relation of op and tasks
|
||||
framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path)
|
||||
framework_parser.parse()
|
||||
op_task_dict = framework_parser.to_task_id_full_op_name_dict()
|
||||
if not op_task_dict:
|
||||
logger.error("Profiling: fail to parse framework files.")
|
||||
return
|
||||
|
||||
# get op compute time from hwts data and framework data, write output_op_compute_time.txt
|
||||
opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt"
|
||||
opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename)
|
||||
optime_parser = OPComputeTimeParser(
|
||||
hwts_output_filename, opcompute_output_filename,
|
||||
op_task_dict, self._output_path, self._dev_id
|
||||
)
|
||||
optime_parser.execute()
|
||||
# get op compute time from hwts data and framework data, write output_op_compute_time.txt
|
||||
opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt"
|
||||
opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename)
|
||||
optime_parser = OPComputeTimeParser(
|
||||
hwts_output_filename, opcompute_output_filename,
|
||||
op_task_dict, self._output_path, self._dev_id
|
||||
)
|
||||
optime_parser.execute()
|
||||
|
||||
# parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
|
||||
output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt"
|
||||
output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu)
|
||||
aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu)
|
||||
aicpu_data_parser.execute()
|
||||
# parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
|
||||
output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt"
|
||||
output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu)
|
||||
aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu)
|
||||
aicpu_data_parser.execute()
|
||||
|
||||
# Parsing minddata AICPU profiling
|
||||
MinddataParser.execute(source_path, self._output_path, self._dev_id)
|
||||
# Parsing minddata AICPU profiling
|
||||
MinddataParser.execute(source_path, self._output_path, self._dev_id)
|
||||
|
||||
# parse minddata pipeline operator and queue
|
||||
try:
|
||||
pipeline_parser = MinddataPipelineParser(self._output_path, self._dev_id, self._output_path)
|
||||
pipeline_parser.parse()
|
||||
except ProfilerException as err:
|
||||
logger.warning(err.message)
|
||||
# parse minddata pipeline operator and queue
|
||||
try:
|
||||
pipeline_parser = MinddataPipelineParser(self._output_path, self._dev_id, self._output_path)
|
||||
pipeline_parser.parse()
|
||||
except ProfilerException as err:
|
||||
logger.warning(err.message)
|
||||
|
||||
# analyse op compute time info
|
||||
try:
|
||||
self._analyser_op_info()
|
||||
except ProfilerException as err:
|
||||
logger.warning(err.message)
|
||||
# analyse op compute time info
|
||||
try:
|
||||
self._analyser_op_info()
|
||||
except ProfilerException as err:
|
||||
logger.warning(err.message)
|
||||
|
||||
# analyse step trace info
|
||||
try:
|
||||
self._analyse_step_trace(source_path, framework_parser)
|
||||
except ProfilerException as err:
|
||||
logger.warning(err.message)
|
||||
# analyse step trace info
|
||||
try:
|
||||
self._analyse_step_trace(source_path, framework_parser)
|
||||
except ProfilerException as err:
|
||||
logger.warning(err.message)
|
||||
|
||||
# analyse timeline info
|
||||
try:
|
||||
self._analyse_timeline(aicpu_data_parser, optime_parser)
|
||||
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
||||
logger.warning('Fail to write timeline data: %s', err)
|
||||
# analyse timeline info
|
||||
try:
|
||||
self._analyse_timeline(aicpu_data_parser, optime_parser)
|
||||
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
||||
logger.warning('Fail to write timeline data: %s', err)
|
||||
|
||||
def _analyse_step_trace(self, source_path, framework_parser):
|
||||
"""
|
||||
|
@ -416,12 +426,12 @@ class Profiler:
|
|||
dev_id = "0"
|
||||
logger.error("Fail to get DEVICE_ID, use 0 instead.")
|
||||
|
||||
if device_target and device_target != "Davinci" \
|
||||
and device_target != "Ascend":
|
||||
if device_target and device_target not in ["Davinci", "Ascend", "GPU"]:
|
||||
msg = "Profiling: unsupport backend: %s" % device_target
|
||||
raise RuntimeError(msg)
|
||||
|
||||
self._dev_id = dev_id
|
||||
self._device_target = device_target
|
||||
|
||||
@staticmethod
|
||||
def trainable_parameters(network):
|
||||
|
|
Loading…
Reference in New Issue