fix profiler pclint&codex
This commit is contained in:
parent
878cb6ac3b
commit
127e4d4068
|
@ -29,26 +29,6 @@ const int kIndent = 8;
|
|||
|
||||
AscendProfiler::AscendProfiler() : counter_(0) { Reset(); }
|
||||
|
||||
void AscendProfiler::RecordEvent(EventType event_type, const char *fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
|
||||
char buf[kEventDescMax];
|
||||
if (vsnprintf_s(buf, kEventDescMax, kEventDescMax - 1, fmt, args) == -1) {
|
||||
MS_LOG(ERROR) << "format failed:" << fmt;
|
||||
va_end(args);
|
||||
return;
|
||||
}
|
||||
|
||||
va_end(args);
|
||||
std::string event = buf;
|
||||
auto index = counter_++;
|
||||
auto &evt = events_[index];
|
||||
evt.timestamp = std::chrono::system_clock::now();
|
||||
evt.desc = std::move(event);
|
||||
evt.event_type = event_type;
|
||||
}
|
||||
|
||||
void AscendProfiler::Dump(std::ostream &output_stream) {
|
||||
MS_LOG(INFO) << "start dump async profiling info";
|
||||
if (events_.empty()) {
|
||||
|
@ -60,7 +40,7 @@ void AscendProfiler::Dump(std::ostream &output_stream) {
|
|||
std::vector<decltype(start)> prev_timestamps;
|
||||
prev_timestamps.resize(kMaxEventTypes, start);
|
||||
|
||||
for (int i = 0; i < counter_; ++i) {
|
||||
for (uint32_t i = 0; i < counter_; ++i) {
|
||||
auto &evt = events_[i];
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(evt.timestamp - start).count();
|
||||
auto &prev_ts = prev_timestamps[evt.event_type];
|
||||
|
|
|
@ -43,15 +43,13 @@ class AscendProfiler {
|
|||
return instance;
|
||||
}
|
||||
|
||||
void RecordEvent(EventType event_type, const char *fmt, ...);
|
||||
|
||||
void Reset();
|
||||
|
||||
void Dump(std::ostream &os);
|
||||
|
||||
private:
|
||||
std::vector<Event> events_;
|
||||
std::atomic_int counter_;
|
||||
std::atomic_uint32_t counter_;
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace profiler
|
||||
|
|
|
@ -23,17 +23,6 @@ namespace profiler {
|
|||
namespace ascend {
|
||||
CallbackManager::CallbackManager(rtStream_t stream) : stream_(stream) {}
|
||||
|
||||
Status CallbackManager::Init() {
|
||||
MS_LOG(INFO) << "CallbackManager init, Start to async process event";
|
||||
ret_future_ = std::async([&] { return CallbackProcess(); });
|
||||
if (!ret_future_.valid()) {
|
||||
MS_LOG(ERROR) << "Failed to init callback manager.";
|
||||
return kFail;
|
||||
}
|
||||
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status CallbackManager::CallbackProcess() {
|
||||
std::pair<rtEvent_t, std::pair<rtCallback_t, const void *>> entry;
|
||||
while (true) {
|
||||
|
@ -50,15 +39,15 @@ Status CallbackManager::CallbackProcess() {
|
|||
auto rt_err = rtEventSynchronize(event);
|
||||
if (rt_err != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "rtEventSynchronize failed. ret:" << rt_err;
|
||||
auto ret = rtEventDestroy(event);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
rt_err = rtEventDestroy(event);
|
||||
if (rt_err != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "rtEventDestroy failed";
|
||||
}
|
||||
return kFail;
|
||||
}
|
||||
|
||||
auto ret = rtEventDestroy(event);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
rt_err = rtEventDestroy(event);
|
||||
if (rt_err != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "rtEventDestroy failed";
|
||||
}
|
||||
|
||||
|
@ -120,7 +109,7 @@ void CallbackManager::RtCallbackFunc(const void *data) {
|
|||
}
|
||||
|
||||
Status CallbackManager::RegisterCallback(const std::function<void()> &callback) {
|
||||
auto func = std::unique_ptr<std::function<void()>>(new (std::nothrow) std::function<void()>(callback));
|
||||
auto func = std::make_unique<std::function<void()>>(callback);
|
||||
if (func == nullptr) {
|
||||
MS_LOG(ERROR) << "callback is nullptr";
|
||||
return kInvalidParam;
|
||||
|
|
|
@ -40,8 +40,6 @@ class CallbackManager {
|
|||
|
||||
~CallbackManager() = default;
|
||||
|
||||
Status Init();
|
||||
|
||||
Status Destroy();
|
||||
|
||||
Status RegisterCallback(rtCallback_t callback, const void *user_data);
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
||||
constexpr char kOutputPath[] = "output";
|
||||
|
||||
std::shared_ptr<GraphMemory> MemoryProfiling::AddGraphMemoryNode(uint32_t graph_id) {
|
||||
|
@ -33,7 +32,7 @@ std::shared_ptr<GraphMemory> MemoryProfiling::AddGraphMemoryNode(uint32_t graph_
|
|||
return node;
|
||||
}
|
||||
|
||||
std::shared_ptr<GraphMemory> MemoryProfiling::GetGraphMemoryNode(uint32_t graph_id) {
|
||||
std::shared_ptr<GraphMemory> MemoryProfiling::GetGraphMemoryNode(uint32_t graph_id) const {
|
||||
auto node = graph_memory_.find(graph_id);
|
||||
if (node != graph_memory_.end()) {
|
||||
return node->second;
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
||||
class NodeMemory {
|
||||
public:
|
||||
NodeMemory() : node_name_(""), node_id_(0) {}
|
||||
|
@ -107,9 +106,8 @@ class MemoryProfiling {
|
|||
return instance;
|
||||
}
|
||||
|
||||
MemoryProto &GetMemProto() { return memory_proto_; }
|
||||
std::shared_ptr<GraphMemory> AddGraphMemoryNode(uint32_t graph_id);
|
||||
std::shared_ptr<GraphMemory> GetGraphMemoryNode(uint32_t graph_id);
|
||||
std::shared_ptr<GraphMemory> GetGraphMemoryNode(uint32_t graph_id) const;
|
||||
void SetDeviceMemSize(uint64_t size) { device_mem_size_ = size; }
|
||||
void MemoryToPB();
|
||||
void SaveMemoryProfiling();
|
||||
|
|
|
@ -20,7 +20,7 @@ package mindspore.profiler;
|
|||
|
||||
message MemoryProto {
|
||||
repeated GraphMemProto graph_mem = 1; // memory usage of multiple graphs
|
||||
int64 total_mem = 2; // total allocated device memory
|
||||
uint64 total_mem = 2; // total allocated device memory
|
||||
}
|
||||
|
||||
message GraphMemProto {
|
||||
|
@ -34,17 +34,17 @@ message GraphMemProto {
|
|||
|
||||
message NodeMemProto {
|
||||
string node_name = 1; // node name
|
||||
int64 node_id = 2; // node id with respect to the execution order
|
||||
repeated int64 input_tensor_id = 3; // input tensor id
|
||||
repeated int64 output_tensor_id = 4; // output tensor id
|
||||
repeated int64 workspace_tensor_id = 5; // workspace tensor id
|
||||
uint64 node_id = 2; // node id with respect to the execution order
|
||||
repeated uint64 input_tensor_id = 3; // input tensor id
|
||||
repeated uint64 output_tensor_id = 4; // output tensor id
|
||||
repeated uint64 workspace_tensor_id = 5; // workspace tensor id
|
||||
}
|
||||
|
||||
message TensorMemProto {
|
||||
int64 tensor_id = 1; // tensor id
|
||||
int64 size = 2; // aligned tensor size
|
||||
uint64 tensor_id = 1; // tensor id
|
||||
uint64 size = 2; // aligned tensor size
|
||||
string type = 3; // tensor type, e.g. Common, OutputOnly
|
||||
int64 life_start = 4; // the exe node id at which tensor memory allocated
|
||||
int64 life_end = 5; // the exe node id at which tensor memory deallocated
|
||||
uint64 life_start = 4; // the exe node id at which tensor memory allocated
|
||||
uint64 life_end = 5; // the exe node id at which tensor memory deallocated
|
||||
string life_long = 6; // see LifeLongType enum
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace cpu {
|
||||
void CpuDataSaver::WriteFile(std::string out_path_dir) {
|
||||
void CpuDataSaver::WriteFile(const std::string out_path_dir) {
|
||||
if (op_detail_infos_.empty() || op_type_infos_.empty()) {
|
||||
MS_LOG(INFO) << "No cpu operation detail infos to write.";
|
||||
return;
|
||||
|
|
|
@ -37,7 +37,7 @@ class CpuDataSaver : public DataSaver {
|
|||
|
||||
CpuDataSaver &operator=(const CpuDataSaver &) = delete;
|
||||
|
||||
void WriteFile(std::string out_path);
|
||||
void WriteFile(const std::string out_path);
|
||||
};
|
||||
} // namespace cpu
|
||||
} // namespace profiler
|
||||
|
|
|
@ -16,9 +16,9 @@
|
|||
|
||||
#include "profiler/device/cpu/cpu_profiling.h"
|
||||
|
||||
#include <time.h>
|
||||
#include <cxxabi.h>
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
#include "profiler/device/cpu/cpu_data_saver.h"
|
||||
#include "pybind_api/api_register.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
@ -27,8 +27,7 @@
|
|||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace cpu {
|
||||
std::shared_ptr<CPUProfiler> CPUProfiler::profiler_inst_ =
|
||||
std::shared_ptr<CPUProfiler>(new (std::nothrow) CPUProfiler());
|
||||
std::shared_ptr<CPUProfiler> CPUProfiler::profiler_inst_ = std::make_shared<CPUProfiler>();
|
||||
|
||||
std::shared_ptr<CPUProfiler> &CPUProfiler::GetInstance() { return profiler_inst_; }
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ const float kNanosecondToMillisecond = 1000000;
|
|||
class CPUProfiler : public Profiler {
|
||||
public:
|
||||
static std::shared_ptr<CPUProfiler> &GetInstance();
|
||||
CPUProfiler() = default;
|
||||
~CPUProfiler() = default;
|
||||
CPUProfiler(const CPUProfiler &) = delete;
|
||||
CPUProfiler &operator=(const CPUProfiler &) = delete;
|
||||
|
@ -44,7 +45,6 @@ class CPUProfiler : public Profiler {
|
|||
void OpDataProducerEnd() override;
|
||||
|
||||
private:
|
||||
CPUProfiler() = default;
|
||||
void SetRunTimeData(const std::string &op_name, const uint32_t pid);
|
||||
void SaveProfileData() override;
|
||||
void ClearInst() override;
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion)
|
||||
OpDetailInfo::OpDetailInfo(const std::shared_ptr<OpInfo> op_info, float proportion)
|
||||
: op_info_(op_info), proportion_(proportion) {
|
||||
// op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
|
||||
op_full_name_ = op_info->op_name;
|
||||
|
@ -72,7 +72,7 @@ void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
|
|||
}
|
||||
}
|
||||
|
||||
float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
|
||||
float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) const {
|
||||
float sum = 0;
|
||||
sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
|
||||
[](float i, auto iter) { return i + iter.second.op_host_cost_time; });
|
||||
|
@ -80,7 +80,7 @@ float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
|
|||
return sum;
|
||||
}
|
||||
|
||||
void DataSaver::WriteOpType(const std::string &saver_base_dir) {
|
||||
void DataSaver::WriteOpType(const std::string &saver_base_dir) const {
|
||||
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_type_info_" + device_id_ + ".csv";
|
||||
std::ofstream ofs(file_path);
|
||||
// check if the file is writable
|
||||
|
@ -110,7 +110,7 @@ void DataSaver::WriteOpType(const std::string &saver_base_dir) {
|
|||
MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
|
||||
}
|
||||
|
||||
void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
|
||||
void DataSaver::WriteOpDetail(const std::string &saver_base_dir) const {
|
||||
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_detail_info_" + device_id_ + ".csv";
|
||||
std::ofstream ofs(file_path);
|
||||
if (!ofs.is_open()) {
|
||||
|
@ -139,7 +139,7 @@ void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
|
|||
MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
|
||||
}
|
||||
|
||||
void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
|
||||
void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) const {
|
||||
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_execute_timestamp_" + device_id_ + ".txt";
|
||||
std::ofstream ofs(file_path);
|
||||
// check if the file is writable
|
||||
|
@ -167,7 +167,7 @@ void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
|
|||
ChangeFileMode(file_path);
|
||||
}
|
||||
|
||||
void DataSaver::ChangeFileMode(const std::string &file_path) {
|
||||
void DataSaver::ChangeFileMode(const std::string &file_path) const {
|
||||
if (chmod(common::SafeCStr(file_path), S_IRUSR | S_IWUSR) == -1) {
|
||||
MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail.";
|
||||
return;
|
||||
|
|
|
@ -34,7 +34,7 @@ struct OpDetailInfo {
|
|||
float proportion_{0};
|
||||
|
||||
OpDetailInfo() = default;
|
||||
OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion);
|
||||
OpDetailInfo(const std::shared_ptr<OpInfo> op_info, float proportion);
|
||||
|
||||
std::string GetCpuHeader() const {
|
||||
return "op_side,op_type,op_name,full_op_name,op_occurrences,op_total_time(ms),"
|
||||
|
@ -45,13 +45,13 @@ struct OpDetailInfo {
|
|||
"cuda_activity_cost_time(us),cuda_activity_call_count";
|
||||
}
|
||||
|
||||
void OutputCpuOpDetailInfo(std::ostream &os) {
|
||||
void OutputCpuOpDetailInfo(std::ostream &os) const {
|
||||
os << "Host," << op_type_ << ',' << op_name_ << ',' << op_full_name_ << ',' << op_info_->op_count << ','
|
||||
<< op_info_->op_host_cost_time << ',' << op_avg_time_ << ',' << proportion_ << ",Default," << op_info_->pid
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
void OutputGpuOpDetailInfo(std::ostream &os) {
|
||||
void OutputGpuOpDetailInfo(std::ostream &os) const {
|
||||
os << "Device," << op_type_ << ',' << op_name_ << ',' << op_full_name_ << ',' << op_info_->op_count << ','
|
||||
<< op_info_->op_host_cost_time << ',' << op_avg_time_ << ',' << proportion_ << ','
|
||||
<< op_info_->cupti_activity_time << ',' << op_info_->op_kernel_count << std::endl;
|
||||
|
@ -72,12 +72,12 @@ struct OpType {
|
|||
}
|
||||
std::string GetGpuHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; }
|
||||
|
||||
void OutputCpuOpTypeInfo(std::ostream &os) {
|
||||
void OutputCpuOpTypeInfo(std::ostream &os) const {
|
||||
os << op_type_ << ',' << count_ << ',' << count_ / step_ << ',' << total_time_ << ',' << total_time_ / count_ << ','
|
||||
<< proportion_ << std::endl;
|
||||
}
|
||||
|
||||
void OutputGpuOpTypeInfo(std::ostream &os) {
|
||||
void OutputGpuOpTypeInfo(std::ostream &os) const {
|
||||
os << op_type_ << ',' << count_ << ',' << total_time_ << ',' << proportion_ << ',' << avg_time_ << std::endl;
|
||||
}
|
||||
|
||||
|
@ -105,15 +105,15 @@ class DataSaver {
|
|||
protected:
|
||||
void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info);
|
||||
|
||||
float GetTotalOpTime(const OpInfoMap &op_info_maps);
|
||||
float GetTotalOpTime(const OpInfoMap &op_info_maps) const;
|
||||
|
||||
void WriteOpType(const std::string &saver_base_dir);
|
||||
void WriteOpType(const std::string &saver_base_dir) const;
|
||||
|
||||
void WriteOpDetail(const std::string &saver_base_dir);
|
||||
void WriteOpDetail(const std::string &saver_base_dir) const;
|
||||
|
||||
void WriteOpTimestamp(const std::string &saver_base_dir);
|
||||
void WriteOpTimestamp(const std::string &saver_base_dir) const;
|
||||
|
||||
void ChangeFileMode(const std::string &file_path);
|
||||
void ChangeFileMode(const std::string &file_path) const;
|
||||
|
||||
OpTypeInfos op_type_infos_;
|
||||
OpDetailInfos op_detail_infos_;
|
||||
|
|
|
@ -43,23 +43,23 @@ inline void *GetCUPTIFunc(const char *name) {
|
|||
return func;
|
||||
}
|
||||
|
||||
typedef CUptiResult (*CuptiSubscribeFunc)(CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback,
|
||||
void *userdata);
|
||||
typedef CUptiResult (*CuptiEnableDomainFunc)(uint32_t enable, CUpti_SubscriberHandle subscriber,
|
||||
CUpti_CallbackDomain domain);
|
||||
typedef CUptiResult (*CuptiActivityEnableFunc)(CUpti_ActivityKind kind);
|
||||
typedef CUptiResult (*CuptiActivityRegisterCallbacksFunc)(CUpti_BuffersCallbackRequestFunc funcBufferRequested,
|
||||
CUpti_BuffersCallbackCompleteFunc funcBufferCompleted);
|
||||
typedef CUptiResult (*CuptiUnsubscribeFunc)(CUpti_SubscriberHandle subscriber);
|
||||
typedef CUptiResult (*CuptiActivityFlushAllFunc)(uint32_t flag);
|
||||
typedef CUptiResult (*CuptiActivityDisableFunc)(CUpti_ActivityKind kind);
|
||||
typedef CUptiResult (*CuptiActivityGetNextRecordFunc)(uint8_t *buffer, size_t validBufferSizeBytes,
|
||||
CUpti_Activity **record);
|
||||
typedef CUptiResult (*CuptiActivityGetNumDroppedRecordsFunc)(CUcontext context, uint32_t streamId, size_t *dropped);
|
||||
typedef CUptiResult (*CuptiGetTimestampFunc)(uint64_t *timestamp);
|
||||
typedef CUptiResult (*CuptiGetResultStringFunc)(CUptiResult result, const char **str);
|
||||
typedef CUptiResult (*CuptiGetStreamIdFunc)(CUcontext context, CUstream stream, uint32_t *streamId);
|
||||
typedef CUptiResult (*CuptiGetDeviceIdFunc)(CUcontext context, uint32_t *deviceId);
|
||||
using CuptiSubscribeFunc = CUptiResult (*)(CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback,
|
||||
void *userdata);
|
||||
using CuptiEnableDomainFunc = CUptiResult (*)(uint32_t enable, CUpti_SubscriberHandle subscriber,
|
||||
CUpti_CallbackDomain domain);
|
||||
using CuptiActivityEnableFunc = CUptiResult (*)(CUpti_ActivityKind kind);
|
||||
using CuptiActivityRegisterCallbacksFunc = CUptiResult (*)(CUpti_BuffersCallbackRequestFunc funcBufferRequested,
|
||||
CUpti_BuffersCallbackCompleteFunc funcBufferCompleted);
|
||||
using CuptiUnsubscribeFunc = CUptiResult (*)(CUpti_SubscriberHandle subscriber);
|
||||
using CuptiActivityFlushAllFunc = CUptiResult (*)(uint32_t flag);
|
||||
using CuptiActivityDisableFunc = CUptiResult (*)(CUpti_ActivityKind kind);
|
||||
using CuptiActivityGetNextRecordFunc = CUptiResult (*)(uint8_t *buffer, size_t validBufferSizeBytes,
|
||||
CUpti_Activity **record);
|
||||
using CuptiActivityGetNumDroppedRecordsFunc = CUptiResult (*)(CUcontext context, uint32_t streamId, size_t *dropped);
|
||||
using CuptiGetTimestampFunc = CUptiResult (*)(uint64_t *timestamp);
|
||||
using CuptiGetResultStringFunc = CUptiResult (*)(CUptiResult result, const char **str);
|
||||
using CuptiGetStreamIdFunc = CUptiResult (*)(CUcontext context, CUstream stream, uint32_t *streamId);
|
||||
using CuptiGetDeviceIdFunc = CUptiResult (*)(CUcontext context, uint32_t *deviceId);
|
||||
|
||||
CUptiResult CuptiSubscribe(CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback, void *userdata) {
|
||||
static auto func_ptr = reinterpret_cast<CuptiSubscribeFunc>(GetCUPTIFunc("cuptiSubscribe"));
|
||||
|
|
|
@ -16,10 +16,10 @@
|
|||
|
||||
#include "profiler/device/gpu/gpu_profiling.h"
|
||||
|
||||
#include <time.h>
|
||||
#include <cxxabi.h>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
#include "profiler/device/gpu/cupti_interface.h"
|
||||
#include "profiler/device/gpu/gpu_data_saver.h"
|
||||
#include "pybind_api/api_register.h"
|
||||
|
@ -29,29 +29,29 @@
|
|||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace gpu {
|
||||
#define BUF_SIZE (32 * 1024)
|
||||
#define ALIGN_SIZE (8)
|
||||
#define CHECK_CUPTI_RET_WITH_ERROR(expression, message) \
|
||||
if (expression != CUPTI_SUCCESS) { \
|
||||
const char *errstr; \
|
||||
CuptiGetResultString(expression, &errstr); \
|
||||
MS_LOG(ERROR) << "CUPTI Error:" << errstr << " function:" << message; \
|
||||
const size_t BUF_SIZE = 32 * 1024;
|
||||
const size_t ALIGN_SIZE = 8;
|
||||
#define CHECK_CUPTI_RET_WITH_ERROR(expression, message) \
|
||||
if ((expression) != CUPTI_SUCCESS) { \
|
||||
const char *errstr; \
|
||||
CuptiGetResultString(expression, &errstr); \
|
||||
MS_LOG(ERROR) << "CUPTI Error:" << errstr << " function:" << (message); \
|
||||
}
|
||||
|
||||
#define CHECK_CUPTI_RET_WITH_EXCEPT(expression, message) \
|
||||
if (expression != CUPTI_SUCCESS) { \
|
||||
const char *errstr; \
|
||||
CuptiGetResultString(expression, &errstr); \
|
||||
MS_LOG(EXCEPTION) << "CUPTI Error:" << errstr << " function:" << message; \
|
||||
}
|
||||
#define CHECK_CUDA_RET_WITH_ERROR(expression, message) \
|
||||
{ \
|
||||
cudaError_t status = (expression); \
|
||||
if (status != cudaSuccess) { \
|
||||
MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " \
|
||||
<< cudaGetErrorString(status); \
|
||||
} \
|
||||
#define CHECK_CUPTI_RET_WITH_EXCEPT(expression, message) \
|
||||
if ((expression) != CUPTI_SUCCESS) { \
|
||||
const char *errstr; \
|
||||
CuptiGetResultString(expression, &errstr); \
|
||||
MS_LOG(EXCEPTION) << "CUPTI Error:" << errstr << " function:" << (message); \
|
||||
}
|
||||
#define CHECK_CUDA_RET_WITH_ERROR(expression, message) \
|
||||
do { \
|
||||
cudaError_t status = (expression); \
|
||||
if (status != cudaSuccess) { \
|
||||
MS_LOG(ERROR) << "CUDA Error: " << (message) << " | Error Number: " << status << " " \
|
||||
<< cudaGetErrorString(status); \
|
||||
} \
|
||||
} while (0)
|
||||
#define PROFILER_ERROR_IF_NULLPTR(ptr) \
|
||||
do { \
|
||||
if ((ptr) == nullptr) { \
|
||||
|
@ -60,8 +60,7 @@ namespace gpu {
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
std::shared_ptr<GPUProfiler> GPUProfiler::profiler_inst_ =
|
||||
std::shared_ptr<GPUProfiler>(new (std::nothrow) GPUProfiler());
|
||||
std::shared_ptr<GPUProfiler> GPUProfiler::profiler_inst_ = std::make_shared<GPUProfiler>();
|
||||
|
||||
int32_t GetThreadID() {
|
||||
uint32_t thread_id = static_cast<uint32_t>(pthread_self());
|
||||
|
@ -114,6 +113,8 @@ bool IsMemcpyAsyncEvent(CUpti_CallbackId cb_id) {
|
|||
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2:
|
||||
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeerAsync:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -134,6 +135,8 @@ bool IsMemcpySyncEvent(CUpti_CallbackId cb_id) {
|
|||
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA_v2:
|
||||
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeer:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -111,6 +111,7 @@ class ProfilingOp {
|
|||
class GPUProfiler : public Profiler {
|
||||
public:
|
||||
static std::shared_ptr<GPUProfiler> &GetInstance();
|
||||
GPUProfiler() = default;
|
||||
~GPUProfiler() { StopCUPTI(); }
|
||||
GPUProfiler(const GPUProfiler &) = delete;
|
||||
GPUProfiler &operator=(const GPUProfiler &) = delete;
|
||||
|
@ -134,7 +135,6 @@ class GPUProfiler : public Profiler {
|
|||
std::string ProfileDataPath() const { return profile_data_path_; }
|
||||
|
||||
private:
|
||||
GPUProfiler() = default;
|
||||
void OpsParser();
|
||||
void EventLog(const Event &event);
|
||||
void ClearInst() override;
|
||||
|
|
|
@ -16,9 +16,9 @@
|
|||
|
||||
#include "profiler/device/profiling.h"
|
||||
|
||||
#include <time.h>
|
||||
#include <cxxabi.h>
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
#include "profiler/device/cpu/cpu_data_saver.h"
|
||||
#include "pybind_api/api_register.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
@ -26,7 +26,7 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
uint64_t Profiler::GetHostMonoTimeStamp() {
|
||||
uint64_t Profiler::GetHostMonoTimeStamp() const {
|
||||
struct timespec ts;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
|
|
|
@ -61,7 +61,7 @@ class Profiler {
|
|||
protected:
|
||||
void SetRunTimeData(const std::string &op_name, const float time_elapsed);
|
||||
void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration);
|
||||
uint64_t GetHostMonoTimeStamp();
|
||||
uint64_t GetHostMonoTimeStamp() const;
|
||||
virtual void SaveProfileData() = 0;
|
||||
virtual void ClearInst() = 0;
|
||||
bool enable_flag_ = false;
|
||||
|
|
|
@ -654,14 +654,6 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
|||
mindspore::RDR::RecordGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, kernels.size());
|
||||
size_t id = 0;
|
||||
#endif
|
||||
auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(profiler_inst);
|
||||
|
||||
if (profiler_inst->GetEnableFlag() && profiler::gpu::ProfilingUtils::IsFirstStep(graph->graph_id())) {
|
||||
profiler::gpu::ProfilingTraceInfo profiling_trace =
|
||||
profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph));
|
||||
profiler_inst->SetStepTraceOpName(profiling_trace);
|
||||
}
|
||||
CNodePtr last_kernel = GetLastKernel(graph);
|
||||
for (const auto &kernel : kernels) {
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
|
||||
|
@ -700,22 +692,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
|||
mindspore::RDR::UpdateGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info, id++);
|
||||
#endif
|
||||
if (!mock) {
|
||||
if (!profiling) {
|
||||
if (profiler_inst->GetEnableFlag()) {
|
||||
profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), stream_);
|
||||
}
|
||||
if (!kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_)) {
|
||||
MS_LOG(EXCEPTION) << "Launch kernel failed: " << kernel->fullname_with_scope();
|
||||
}
|
||||
if (profiler_inst->GetEnableFlag()) {
|
||||
profiler_inst->OpDataProducerEnd();
|
||||
if (profiler_inst->GetSyncEnableFlag()) {
|
||||
CHECK_OP_RET_WITH_ERROR(SyncStream(), "Profiler SyncStream failed.");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LaunchKernelWithTimeProfiling(kernel, kernel_inputs, kernel_workspaces, kernel_outputs);
|
||||
}
|
||||
LaunchKernelWithoutMock(graph, kernel, kernel_inputs, kernel_workspaces, kernel_outputs, profiling);
|
||||
|
||||
if (gpu_kernel && dynamic_kernel && dynamic_kernel->is_dynamic_shape()) {
|
||||
gpu_kernel->PostExecute();
|
||||
|
@ -748,6 +725,37 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
|
|||
return true;
|
||||
}
|
||||
|
||||
void GPUKernelRuntime::LaunchKernelWithoutMock(const session::KernelGraph *graph, const AnfNodePtr &kernel,
|
||||
const AddressPtrList &inputs, const AddressPtrList &workspaces,
|
||||
const AddressPtrList &outputs, bool profiling) {
|
||||
auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(profiler_inst);
|
||||
|
||||
if (profiler_inst->GetEnableFlag() && profiler::gpu::ProfilingUtils::IsFirstStep(graph->graph_id())) {
|
||||
profiler::gpu::ProfilingTraceInfo profiling_trace =
|
||||
profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph));
|
||||
profiler_inst->SetStepTraceOpName(profiling_trace);
|
||||
}
|
||||
|
||||
if (!profiling) {
|
||||
if (profiler_inst->GetEnableFlag()) {
|
||||
profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), stream_);
|
||||
}
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
|
||||
if (!kernel_mod->Launch(inputs, workspaces, outputs, stream_)) {
|
||||
MS_LOG(EXCEPTION) << "Launch kernel failed: " << kernel->fullname_with_scope();
|
||||
}
|
||||
if (profiler_inst->GetEnableFlag()) {
|
||||
profiler_inst->OpDataProducerEnd();
|
||||
if (profiler_inst->GetSyncEnableFlag()) {
|
||||
CHECK_OP_RET_WITH_ERROR(SyncStream(), "Profiler SyncStream failed.");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LaunchKernelWithTimeProfiling(kernel, inputs, workspaces, outputs);
|
||||
}
|
||||
}
|
||||
|
||||
bool GPUKernelRuntime::RunOpLaunchKernelDynamic(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
const auto &kernels = graph->execution_order();
|
||||
|
|
|
@ -111,6 +111,10 @@ class GPUKernelRuntime : public KernelRuntime {
|
|||
DeviceAddressPtr GetMutableOutputAddr(const AnfNodePtr &node, size_t i, bool visit_nop_node);
|
||||
session::KernelWithIndex GetPrevNodeOutput(const AnfNodePtr &node, size_t i);
|
||||
|
||||
void LaunchKernelWithoutMock(const session::KernelGraph *graph, const AnfNodePtr &kernel,
|
||||
const AddressPtrList &inputs, const AddressPtrList &workspaces,
|
||||
const AddressPtrList &outputs, bool profiling);
|
||||
|
||||
std::unordered_map<uint32_t, MemReuseUtilPtr> mem_reuse_util_map_;
|
||||
std::unordered_map<uint32_t, MemSwapManagerPtr> mem_swap_map_;
|
||||
std::unordered_map<uint32_t, bool> is_first_step_map_;
|
||||
|
|
|
@ -53,15 +53,12 @@ namespace mindspore {
|
|||
namespace profiler {
|
||||
namespace ascend {
|
||||
CallbackManager::CallbackManager(rtStream_t stream) : stream_(stream) {}
|
||||
Status CallbackManager::Init() { return kSuccess; }
|
||||
Status CallbackManager::Destroy() { return kSuccess; }
|
||||
Status CallbackManager::RegisterCallback(rtCallback_t callback, const void *user_data) { return kSuccess; }
|
||||
Status CallbackManager::RegisterCallback(const std::function<void()> &callback) { return kSuccess; }
|
||||
|
||||
AscendProfiler::AscendProfiler() : counter_(0) { Reset(); }
|
||||
|
||||
void AscendProfiler::RecordEvent(EventType event_type, const char *fmt, ...) {}
|
||||
|
||||
void AscendProfiler::Dump(std::ostream &output_stream) {}
|
||||
|
||||
void AscendProfiler::Reset() {}
|
||||
|
|
Loading…
Reference in New Issue