fix profiler pclint&codex

This commit is contained in:
yanghaitao1 2021-05-25 09:17:35 -04:00
parent 878cb6ac3b
commit 127e4d4068
21 changed files with 122 additions and 149 deletions

View File

@ -29,26 +29,6 @@ const int kIndent = 8;
AscendProfiler::AscendProfiler() : counter_(0) { Reset(); }
void AscendProfiler::RecordEvent(EventType event_type, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
char buf[kEventDescMax];
if (vsnprintf_s(buf, kEventDescMax, kEventDescMax - 1, fmt, args) == -1) {
MS_LOG(ERROR) << "format failed:" << fmt;
va_end(args);
return;
}
va_end(args);
std::string event = buf;
auto index = counter_++;
auto &evt = events_[index];
evt.timestamp = std::chrono::system_clock::now();
evt.desc = std::move(event);
evt.event_type = event_type;
}
void AscendProfiler::Dump(std::ostream &output_stream) {
MS_LOG(INFO) << "start dump async profiling info";
if (events_.empty()) {
@ -60,7 +40,7 @@ void AscendProfiler::Dump(std::ostream &output_stream) {
std::vector<decltype(start)> prev_timestamps;
prev_timestamps.resize(kMaxEventTypes, start);
for (int i = 0; i < counter_; ++i) {
for (uint32_t i = 0; i < counter_; ++i) {
auto &evt = events_[i];
auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(evt.timestamp - start).count();
auto &prev_ts = prev_timestamps[evt.event_type];

View File

@ -43,15 +43,13 @@ class AscendProfiler {
return instance;
}
void RecordEvent(EventType event_type, const char *fmt, ...);
void Reset();
void Dump(std::ostream &os);
private:
std::vector<Event> events_;
std::atomic_int counter_;
std::atomic_uint32_t counter_;
};
} // namespace ascend
} // namespace profiler

View File

@ -23,17 +23,6 @@ namespace profiler {
namespace ascend {
CallbackManager::CallbackManager(rtStream_t stream) : stream_(stream) {}
Status CallbackManager::Init() {
MS_LOG(INFO) << "CallbackManager init, Start to async process event";
ret_future_ = std::async([&] { return CallbackProcess(); });
if (!ret_future_.valid()) {
MS_LOG(ERROR) << "Failed to init callback manager.";
return kFail;
}
return kSuccess;
}
Status CallbackManager::CallbackProcess() {
std::pair<rtEvent_t, std::pair<rtCallback_t, const void *>> entry;
while (true) {
@ -50,15 +39,15 @@ Status CallbackManager::CallbackProcess() {
auto rt_err = rtEventSynchronize(event);
if (rt_err != RT_ERROR_NONE) {
MS_LOG(ERROR) << "rtEventSynchronize failed. ret:" << rt_err;
auto ret = rtEventDestroy(event);
if (ret != RT_ERROR_NONE) {
rt_err = rtEventDestroy(event);
if (rt_err != RT_ERROR_NONE) {
MS_LOG(ERROR) << "rtEventDestroy failed";
}
return kFail;
}
auto ret = rtEventDestroy(event);
if (ret != RT_ERROR_NONE) {
rt_err = rtEventDestroy(event);
if (rt_err != RT_ERROR_NONE) {
MS_LOG(ERROR) << "rtEventDestroy failed";
}
@ -120,7 +109,7 @@ void CallbackManager::RtCallbackFunc(const void *data) {
}
Status CallbackManager::RegisterCallback(const std::function<void()> &callback) {
auto func = std::unique_ptr<std::function<void()>>(new (std::nothrow) std::function<void()>(callback));
auto func = std::make_unique<std::function<void()>>(callback);
if (func == nullptr) {
MS_LOG(ERROR) << "callback is nullptr";
return kInvalidParam;

View File

@ -40,8 +40,6 @@ class CallbackManager {
~CallbackManager() = default;
Status Init();
Status Destroy();
Status RegisterCallback(rtCallback_t callback, const void *user_data);

View File

@ -24,7 +24,6 @@
namespace mindspore {
namespace profiler {
constexpr char kOutputPath[] = "output";
std::shared_ptr<GraphMemory> MemoryProfiling::AddGraphMemoryNode(uint32_t graph_id) {
@ -33,7 +32,7 @@ std::shared_ptr<GraphMemory> MemoryProfiling::AddGraphMemoryNode(uint32_t graph_
return node;
}
std::shared_ptr<GraphMemory> MemoryProfiling::GetGraphMemoryNode(uint32_t graph_id) {
std::shared_ptr<GraphMemory> MemoryProfiling::GetGraphMemoryNode(uint32_t graph_id) const {
auto node = graph_memory_.find(graph_id);
if (node != graph_memory_.end()) {
return node->second;

View File

@ -26,7 +26,6 @@
namespace mindspore {
namespace profiler {
class NodeMemory {
public:
NodeMemory() : node_name_(""), node_id_(0) {}
@ -107,9 +106,8 @@ class MemoryProfiling {
return instance;
}
MemoryProto &GetMemProto() { return memory_proto_; }
std::shared_ptr<GraphMemory> AddGraphMemoryNode(uint32_t graph_id);
std::shared_ptr<GraphMemory> GetGraphMemoryNode(uint32_t graph_id);
std::shared_ptr<GraphMemory> GetGraphMemoryNode(uint32_t graph_id) const;
void SetDeviceMemSize(uint64_t size) { device_mem_size_ = size; }
void MemoryToPB();
void SaveMemoryProfiling();

View File

@ -20,7 +20,7 @@ package mindspore.profiler;
message MemoryProto {
repeated GraphMemProto graph_mem = 1; // memory usage of multiple graphs
int64 total_mem = 2; // total allocated device memory
uint64 total_mem = 2; // total allocated device memory
}
message GraphMemProto {
@ -34,17 +34,17 @@ message GraphMemProto {
message NodeMemProto {
string node_name = 1; // node name
int64 node_id = 2; // node id with respect to the execution order
repeated int64 input_tensor_id = 3; // input tensor id
repeated int64 output_tensor_id = 4; // output tensor id
repeated int64 workspace_tensor_id = 5; // workspace tensor id
uint64 node_id = 2; // node id with respect to the execution order
repeated uint64 input_tensor_id = 3; // input tensor id
repeated uint64 output_tensor_id = 4; // output tensor id
repeated uint64 workspace_tensor_id = 5; // workspace tensor id
}
message TensorMemProto {
int64 tensor_id = 1; // tensor id
int64 size = 2; // aligned tensor size
uint64 tensor_id = 1; // tensor id
uint64 size = 2; // aligned tensor size
string type = 3; // tensor type, e.g. Common, OutputOnly
int64 life_start = 4; // the exe node id at which tensor memory allocated
int64 life_end = 5; // the exe node id at which tensor memory deallocated
uint64 life_start = 4; // the exe node id at which tensor memory allocated
uint64 life_end = 5; // the exe node id at which tensor memory deallocated
string life_long = 6; // see LifeLongType enum
}

View File

@ -24,7 +24,7 @@
namespace mindspore {
namespace profiler {
namespace cpu {
void CpuDataSaver::WriteFile(std::string out_path_dir) {
void CpuDataSaver::WriteFile(const std::string out_path_dir) {
if (op_detail_infos_.empty() || op_type_infos_.empty()) {
MS_LOG(INFO) << "No cpu operation detail infos to write.";
return;

View File

@ -37,7 +37,7 @@ class CpuDataSaver : public DataSaver {
CpuDataSaver &operator=(const CpuDataSaver &) = delete;
void WriteFile(std::string out_path);
void WriteFile(const std::string out_path);
};
} // namespace cpu
} // namespace profiler

View File

@ -16,9 +16,9 @@
#include "profiler/device/cpu/cpu_profiling.h"
#include <time.h>
#include <cxxabi.h>
#include <cmath>
#include <ctime>
#include "profiler/device/cpu/cpu_data_saver.h"
#include "pybind_api/api_register.h"
#include "utils/log_adapter.h"
@ -27,8 +27,7 @@
namespace mindspore {
namespace profiler {
namespace cpu {
std::shared_ptr<CPUProfiler> CPUProfiler::profiler_inst_ =
std::shared_ptr<CPUProfiler>(new (std::nothrow) CPUProfiler());
std::shared_ptr<CPUProfiler> CPUProfiler::profiler_inst_ = std::make_shared<CPUProfiler>();
std::shared_ptr<CPUProfiler> &CPUProfiler::GetInstance() { return profiler_inst_; }

View File

@ -33,6 +33,7 @@ const float kNanosecondToMillisecond = 1000000;
class CPUProfiler : public Profiler {
public:
static std::shared_ptr<CPUProfiler> &GetInstance();
CPUProfiler() = default;
~CPUProfiler() = default;
CPUProfiler(const CPUProfiler &) = delete;
CPUProfiler &operator=(const CPUProfiler &) = delete;
@ -44,7 +45,6 @@ class CPUProfiler : public Profiler {
void OpDataProducerEnd() override;
private:
CPUProfiler() = default;
void SetRunTimeData(const std::string &op_name, const uint32_t pid);
void SaveProfileData() override;
void ClearInst() override;

View File

@ -23,7 +23,7 @@
namespace mindspore {
namespace profiler {
OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion)
OpDetailInfo::OpDetailInfo(const std::shared_ptr<OpInfo> op_info, float proportion)
: op_info_(op_info), proportion_(proportion) {
// op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
op_full_name_ = op_info->op_name;
@ -72,7 +72,7 @@ void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
}
}
float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) const {
float sum = 0;
sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
[](float i, auto iter) { return i + iter.second.op_host_cost_time; });
@ -80,7 +80,7 @@ float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
return sum;
}
void DataSaver::WriteOpType(const std::string &saver_base_dir) {
void DataSaver::WriteOpType(const std::string &saver_base_dir) const {
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_type_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
// check if the file is writable
@ -110,7 +110,7 @@ void DataSaver::WriteOpType(const std::string &saver_base_dir) {
MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
}
void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
void DataSaver::WriteOpDetail(const std::string &saver_base_dir) const {
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_detail_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
@ -139,7 +139,7 @@ void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
}
void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) const {
std::string file_path = saver_base_dir + "/" + op_side_ + "_op_execute_timestamp_" + device_id_ + ".txt";
std::ofstream ofs(file_path);
// check if the file is writable
@ -167,7 +167,7 @@ void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) {
ChangeFileMode(file_path);
}
void DataSaver::ChangeFileMode(const std::string &file_path) {
void DataSaver::ChangeFileMode(const std::string &file_path) const {
if (chmod(common::SafeCStr(file_path), S_IRUSR | S_IWUSR) == -1) {
MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail.";
return;

View File

@ -34,7 +34,7 @@ struct OpDetailInfo {
float proportion_{0};
OpDetailInfo() = default;
OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion);
OpDetailInfo(const std::shared_ptr<OpInfo> op_info, float proportion);
std::string GetCpuHeader() const {
return "op_side,op_type,op_name,full_op_name,op_occurrences,op_total_time(ms),"
@ -45,13 +45,13 @@ struct OpDetailInfo {
"cuda_activity_cost_time(us),cuda_activity_call_count";
}
void OutputCpuOpDetailInfo(std::ostream &os) {
void OutputCpuOpDetailInfo(std::ostream &os) const {
os << "Host," << op_type_ << ',' << op_name_ << ',' << op_full_name_ << ',' << op_info_->op_count << ','
<< op_info_->op_host_cost_time << ',' << op_avg_time_ << ',' << proportion_ << ",Default," << op_info_->pid
<< std::endl;
}
void OutputGpuOpDetailInfo(std::ostream &os) {
void OutputGpuOpDetailInfo(std::ostream &os) const {
os << "Device," << op_type_ << ',' << op_name_ << ',' << op_full_name_ << ',' << op_info_->op_count << ','
<< op_info_->op_host_cost_time << ',' << op_avg_time_ << ',' << proportion_ << ','
<< op_info_->cupti_activity_time << ',' << op_info_->op_kernel_count << std::endl;
@ -72,12 +72,12 @@ struct OpType {
}
std::string GetGpuHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; }
void OutputCpuOpTypeInfo(std::ostream &os) {
void OutputCpuOpTypeInfo(std::ostream &os) const {
os << op_type_ << ',' << count_ << ',' << count_ / step_ << ',' << total_time_ << ',' << total_time_ / count_ << ','
<< proportion_ << std::endl;
}
void OutputGpuOpTypeInfo(std::ostream &os) {
void OutputGpuOpTypeInfo(std::ostream &os) const {
os << op_type_ << ',' << count_ << ',' << total_time_ << ',' << proportion_ << ',' << avg_time_ << std::endl;
}
@ -105,15 +105,15 @@ class DataSaver {
protected:
void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info);
float GetTotalOpTime(const OpInfoMap &op_info_maps);
float GetTotalOpTime(const OpInfoMap &op_info_maps) const;
void WriteOpType(const std::string &saver_base_dir);
void WriteOpType(const std::string &saver_base_dir) const;
void WriteOpDetail(const std::string &saver_base_dir);
void WriteOpDetail(const std::string &saver_base_dir) const;
void WriteOpTimestamp(const std::string &saver_base_dir);
void WriteOpTimestamp(const std::string &saver_base_dir) const;
void ChangeFileMode(const std::string &file_path);
void ChangeFileMode(const std::string &file_path) const;
OpTypeInfos op_type_infos_;
OpDetailInfos op_detail_infos_;

View File

@ -43,23 +43,23 @@ inline void *GetCUPTIFunc(const char *name) {
return func;
}
typedef CUptiResult (*CuptiSubscribeFunc)(CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback,
void *userdata);
typedef CUptiResult (*CuptiEnableDomainFunc)(uint32_t enable, CUpti_SubscriberHandle subscriber,
CUpti_CallbackDomain domain);
typedef CUptiResult (*CuptiActivityEnableFunc)(CUpti_ActivityKind kind);
typedef CUptiResult (*CuptiActivityRegisterCallbacksFunc)(CUpti_BuffersCallbackRequestFunc funcBufferRequested,
CUpti_BuffersCallbackCompleteFunc funcBufferCompleted);
typedef CUptiResult (*CuptiUnsubscribeFunc)(CUpti_SubscriberHandle subscriber);
typedef CUptiResult (*CuptiActivityFlushAllFunc)(uint32_t flag);
typedef CUptiResult (*CuptiActivityDisableFunc)(CUpti_ActivityKind kind);
typedef CUptiResult (*CuptiActivityGetNextRecordFunc)(uint8_t *buffer, size_t validBufferSizeBytes,
CUpti_Activity **record);
typedef CUptiResult (*CuptiActivityGetNumDroppedRecordsFunc)(CUcontext context, uint32_t streamId, size_t *dropped);
typedef CUptiResult (*CuptiGetTimestampFunc)(uint64_t *timestamp);
typedef CUptiResult (*CuptiGetResultStringFunc)(CUptiResult result, const char **str);
typedef CUptiResult (*CuptiGetStreamIdFunc)(CUcontext context, CUstream stream, uint32_t *streamId);
typedef CUptiResult (*CuptiGetDeviceIdFunc)(CUcontext context, uint32_t *deviceId);
using CuptiSubscribeFunc = CUptiResult (*)(CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback,
void *userdata);
using CuptiEnableDomainFunc = CUptiResult (*)(uint32_t enable, CUpti_SubscriberHandle subscriber,
CUpti_CallbackDomain domain);
using CuptiActivityEnableFunc = CUptiResult (*)(CUpti_ActivityKind kind);
using CuptiActivityRegisterCallbacksFunc = CUptiResult (*)(CUpti_BuffersCallbackRequestFunc funcBufferRequested,
CUpti_BuffersCallbackCompleteFunc funcBufferCompleted);
using CuptiUnsubscribeFunc = CUptiResult (*)(CUpti_SubscriberHandle subscriber);
using CuptiActivityFlushAllFunc = CUptiResult (*)(uint32_t flag);
using CuptiActivityDisableFunc = CUptiResult (*)(CUpti_ActivityKind kind);
using CuptiActivityGetNextRecordFunc = CUptiResult (*)(uint8_t *buffer, size_t validBufferSizeBytes,
CUpti_Activity **record);
using CuptiActivityGetNumDroppedRecordsFunc = CUptiResult (*)(CUcontext context, uint32_t streamId, size_t *dropped);
using CuptiGetTimestampFunc = CUptiResult (*)(uint64_t *timestamp);
using CuptiGetResultStringFunc = CUptiResult (*)(CUptiResult result, const char **str);
using CuptiGetStreamIdFunc = CUptiResult (*)(CUcontext context, CUstream stream, uint32_t *streamId);
using CuptiGetDeviceIdFunc = CUptiResult (*)(CUcontext context, uint32_t *deviceId);
CUptiResult CuptiSubscribe(CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback, void *userdata) {
static auto func_ptr = reinterpret_cast<CuptiSubscribeFunc>(GetCUPTIFunc("cuptiSubscribe"));

View File

@ -16,10 +16,10 @@
#include "profiler/device/gpu/gpu_profiling.h"
#include <time.h>
#include <cxxabi.h>
#include <chrono>
#include <cmath>
#include <ctime>
#include "profiler/device/gpu/cupti_interface.h"
#include "profiler/device/gpu/gpu_data_saver.h"
#include "pybind_api/api_register.h"
@ -29,29 +29,29 @@
namespace mindspore {
namespace profiler {
namespace gpu {
#define BUF_SIZE (32 * 1024)
#define ALIGN_SIZE (8)
#define CHECK_CUPTI_RET_WITH_ERROR(expression, message) \
if (expression != CUPTI_SUCCESS) { \
const char *errstr; \
CuptiGetResultString(expression, &errstr); \
MS_LOG(ERROR) << "CUPTI Error:" << errstr << " function:" << message; \
const size_t BUF_SIZE = 32 * 1024;
const size_t ALIGN_SIZE = 8;
#define CHECK_CUPTI_RET_WITH_ERROR(expression, message) \
if ((expression) != CUPTI_SUCCESS) { \
const char *errstr; \
CuptiGetResultString(expression, &errstr); \
MS_LOG(ERROR) << "CUPTI Error:" << errstr << " function:" << (message); \
}
#define CHECK_CUPTI_RET_WITH_EXCEPT(expression, message) \
if (expression != CUPTI_SUCCESS) { \
const char *errstr; \
CuptiGetResultString(expression, &errstr); \
MS_LOG(EXCEPTION) << "CUPTI Error:" << errstr << " function:" << message; \
}
#define CHECK_CUDA_RET_WITH_ERROR(expression, message) \
{ \
cudaError_t status = (expression); \
if (status != cudaSuccess) { \
MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " \
<< cudaGetErrorString(status); \
} \
#define CHECK_CUPTI_RET_WITH_EXCEPT(expression, message) \
if ((expression) != CUPTI_SUCCESS) { \
const char *errstr; \
CuptiGetResultString(expression, &errstr); \
MS_LOG(EXCEPTION) << "CUPTI Error:" << errstr << " function:" << (message); \
}
#define CHECK_CUDA_RET_WITH_ERROR(expression, message) \
do { \
cudaError_t status = (expression); \
if (status != cudaSuccess) { \
MS_LOG(ERROR) << "CUDA Error: " << (message) << " | Error Number: " << status << " " \
<< cudaGetErrorString(status); \
} \
} while (0)
#define PROFILER_ERROR_IF_NULLPTR(ptr) \
do { \
if ((ptr) == nullptr) { \
@ -60,8 +60,7 @@ namespace gpu {
} \
} while (0)
std::shared_ptr<GPUProfiler> GPUProfiler::profiler_inst_ =
std::shared_ptr<GPUProfiler>(new (std::nothrow) GPUProfiler());
std::shared_ptr<GPUProfiler> GPUProfiler::profiler_inst_ = std::make_shared<GPUProfiler>();
int32_t GetThreadID() {
uint32_t thread_id = static_cast<uint32_t>(pthread_self());
@ -114,6 +113,8 @@ bool IsMemcpyAsyncEvent(CUpti_CallbackId cb_id) {
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeerAsync:
return true;
default:
return false;
}
return false;
}
@ -134,6 +135,8 @@ bool IsMemcpySyncEvent(CUpti_CallbackId cb_id) {
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA_v2:
case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeer:
return true;
default:
return false;
}
return false;
}

View File

@ -111,6 +111,7 @@ class ProfilingOp {
class GPUProfiler : public Profiler {
public:
static std::shared_ptr<GPUProfiler> &GetInstance();
GPUProfiler() = default;
~GPUProfiler() { StopCUPTI(); }
GPUProfiler(const GPUProfiler &) = delete;
GPUProfiler &operator=(const GPUProfiler &) = delete;
@ -134,7 +135,6 @@ class GPUProfiler : public Profiler {
std::string ProfileDataPath() const { return profile_data_path_; }
private:
GPUProfiler() = default;
void OpsParser();
void EventLog(const Event &event);
void ClearInst() override;

View File

@ -16,9 +16,9 @@
#include "profiler/device/profiling.h"
#include <time.h>
#include <cxxabi.h>
#include <cmath>
#include <ctime>
#include "profiler/device/cpu/cpu_data_saver.h"
#include "pybind_api/api_register.h"
#include "utils/log_adapter.h"
@ -26,7 +26,7 @@
namespace mindspore {
namespace profiler {
uint64_t Profiler::GetHostMonoTimeStamp() {
uint64_t Profiler::GetHostMonoTimeStamp() const {
struct timespec ts;
#if defined(_WIN32) || defined(_WIN64)
clock_gettime(CLOCK_MONOTONIC, &ts);

View File

@ -61,7 +61,7 @@ class Profiler {
protected:
void SetRunTimeData(const std::string &op_name, const float time_elapsed);
void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration);
uint64_t GetHostMonoTimeStamp();
uint64_t GetHostMonoTimeStamp() const;
virtual void SaveProfileData() = 0;
virtual void ClearInst() = 0;
bool enable_flag_ = false;

View File

@ -654,14 +654,6 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
mindspore::RDR::RecordGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, kernels.size());
size_t id = 0;
#endif
auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
MS_EXCEPTION_IF_NULL(profiler_inst);
if (profiler_inst->GetEnableFlag() && profiler::gpu::ProfilingUtils::IsFirstStep(graph->graph_id())) {
profiler::gpu::ProfilingTraceInfo profiling_trace =
profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph));
profiler_inst->SetStepTraceOpName(profiling_trace);
}
CNodePtr last_kernel = GetLastKernel(graph);
for (const auto &kernel : kernels) {
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
@ -700,22 +692,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
mindspore::RDR::UpdateGPUMemAddressInfo(SubModuleId::SM_KERNEL, name, op_name, mem_info, id++);
#endif
if (!mock) {
if (!profiling) {
if (profiler_inst->GetEnableFlag()) {
profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), stream_);
}
if (!kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_)) {
MS_LOG(EXCEPTION) << "Launch kernel failed: " << kernel->fullname_with_scope();
}
if (profiler_inst->GetEnableFlag()) {
profiler_inst->OpDataProducerEnd();
if (profiler_inst->GetSyncEnableFlag()) {
CHECK_OP_RET_WITH_ERROR(SyncStream(), "Profiler SyncStream failed.");
}
}
} else {
LaunchKernelWithTimeProfiling(kernel, kernel_inputs, kernel_workspaces, kernel_outputs);
}
LaunchKernelWithoutMock(graph, kernel, kernel_inputs, kernel_workspaces, kernel_outputs, profiling);
if (gpu_kernel && dynamic_kernel && dynamic_kernel->is_dynamic_shape()) {
gpu_kernel->PostExecute();
@ -748,6 +725,37 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
return true;
}
void GPUKernelRuntime::LaunchKernelWithoutMock(const session::KernelGraph *graph, const AnfNodePtr &kernel,
const AddressPtrList &inputs, const AddressPtrList &workspaces,
const AddressPtrList &outputs, bool profiling) {
auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
MS_EXCEPTION_IF_NULL(profiler_inst);
if (profiler_inst->GetEnableFlag() && profiler::gpu::ProfilingUtils::IsFirstStep(graph->graph_id())) {
profiler::gpu::ProfilingTraceInfo profiling_trace =
profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph));
profiler_inst->SetStepTraceOpName(profiling_trace);
}
if (!profiling) {
if (profiler_inst->GetEnableFlag()) {
profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), stream_);
}
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
if (!kernel_mod->Launch(inputs, workspaces, outputs, stream_)) {
MS_LOG(EXCEPTION) << "Launch kernel failed: " << kernel->fullname_with_scope();
}
if (profiler_inst->GetEnableFlag()) {
profiler_inst->OpDataProducerEnd();
if (profiler_inst->GetSyncEnableFlag()) {
CHECK_OP_RET_WITH_ERROR(SyncStream(), "Profiler SyncStream failed.");
}
}
} else {
LaunchKernelWithTimeProfiling(kernel, inputs, workspaces, outputs);
}
}
bool GPUKernelRuntime::RunOpLaunchKernelDynamic(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(graph);
const auto &kernels = graph->execution_order();

View File

@ -111,6 +111,10 @@ class GPUKernelRuntime : public KernelRuntime {
DeviceAddressPtr GetMutableOutputAddr(const AnfNodePtr &node, size_t i, bool visit_nop_node);
session::KernelWithIndex GetPrevNodeOutput(const AnfNodePtr &node, size_t i);
void LaunchKernelWithoutMock(const session::KernelGraph *graph, const AnfNodePtr &kernel,
const AddressPtrList &inputs, const AddressPtrList &workspaces,
const AddressPtrList &outputs, bool profiling);
std::unordered_map<uint32_t, MemReuseUtilPtr> mem_reuse_util_map_;
std::unordered_map<uint32_t, MemSwapManagerPtr> mem_swap_map_;
std::unordered_map<uint32_t, bool> is_first_step_map_;

View File

@ -53,15 +53,12 @@ namespace mindspore {
namespace profiler {
namespace ascend {
CallbackManager::CallbackManager(rtStream_t stream) : stream_(stream) {}
Status CallbackManager::Init() { return kSuccess; }
Status CallbackManager::Destroy() { return kSuccess; }
Status CallbackManager::RegisterCallback(rtCallback_t callback, const void *user_data) { return kSuccess; }
Status CallbackManager::RegisterCallback(const std::function<void()> &callback) { return kSuccess; }
AscendProfiler::AscendProfiler() : counter_(0) { Reset(); }
void AscendProfiler::RecordEvent(EventType event_type, const char *fmt, ...) {}
void AscendProfiler::Dump(std::ostream &output_stream) {}
void AscendProfiler::Reset() {}