diff --git a/mindspore/ccsrc/profiler/device/ascend/ascend_profiling.cc b/mindspore/ccsrc/profiler/device/ascend/ascend_profiling.cc index 400aefb7a55..ae6e6a027df 100644 --- a/mindspore/ccsrc/profiler/device/ascend/ascend_profiling.cc +++ b/mindspore/ccsrc/profiler/device/ascend/ascend_profiling.cc @@ -16,6 +16,7 @@ #include "profiler/device/ascend/ascend_profiling.h" #include #include +#include "common/util/error_manager/error_manager.h" #include "pybind_api/api_register.h" #include "utils/log_adapter.h" #include "utils/utils.h" @@ -30,6 +31,8 @@ using mindspore::profiler::ascend::MemoryProfiling; namespace mindspore { namespace profiler { namespace ascend { +constexpr auto kUnknownErrorString = "Unknown error occurred"; + std::map kAicMetrics{ {"ArithmeticUtilization", ACL_AICORE_ARITHMETIC_UTILIZATION}, {"PipeUtilization", ACL_AICORE_PIPE_UTILIZATION}, @@ -42,6 +45,13 @@ std::shared_ptr AscendProfiler::ascend_profiler_ = std::make_sha std::shared_ptr &AscendProfiler::GetInstance() { return ascend_profiler_; } +void AscendProfiler::ReportErrorMessage() const { + const std::string &error_message = ErrorManager::GetInstance().GetErrorMessage(); + if (!error_message.empty() && error_message.find(kUnknownErrorString) == std::string::npos) { + MS_LOG(ERROR) << "Ascend error occurred, error message:\n" << error_message; + } +} + void AscendProfiler::StepProfilingEnable(const bool enable_flag) { MS_LOG(INFO) << "Start profiling"; enable_flag_ = enable_flag; @@ -59,6 +69,7 @@ void AscendProfiler::InitProfiling(const std::string &profiling_path, uint32_t d aclError aclRet = aclprofInit(profile_data_path_.c_str(), profile_data_path_.length()); if (aclRet != ACL_SUCCESS) { + ReportErrorMessage(); MS_LOG(EXCEPTION) << "Failed to call aclprofInit function."; } @@ -114,10 +125,12 @@ void AscendProfiler::Start() { aclprofAicoreMetrics aic_metrics = GetAicMetrics(); acl_config_ = aclprofCreateConfig(device_list, device_num, aic_metrics, nullptr, GetOptionsMask()); if (acl_config_ == nullptr) { + ReportErrorMessage(); MS_LOG(EXCEPTION) << "Failed to call aclprofCreateConfig function."; } aclError aclRet = aclprofStart(acl_config_); if (aclRet != ACL_SUCCESS) { + ReportErrorMessage(); MS_LOG(EXCEPTION) << "Failed to call aclprofStart function."; } MS_LOG(INFO) << "Start profiling, options mask is " << mask << " aic_metrics is " << aic_metrics; @@ -133,16 +146,18 @@ void AscendProfiler::Stop() { MS_LOG(INFO) << "Begin to stop profiling."; if (acl_config_ == nullptr) { MS_LOG(EXCEPTION) - << "Failed to stop profiling because of null acl config.Please make sure call Profiler.Start function " + << "Failed to stop profiling because of null aReportDatacl config.Please make sure call Profiler.Start function " "before call Profiler.Stop function."; } aclError aclRet = aclprofStop(acl_config_); if (aclRet != ACL_SUCCESS) { + ReportErrorMessage(); MS_LOG(EXCEPTION) << "Failed to call aclprofStop function."; } aclRet = aclprofDestroyConfig(acl_config_); if (aclRet != ACL_SUCCESS) { + ReportErrorMessage(); MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function."; } @@ -155,6 +170,7 @@ void AscendProfiler::Finalize() const { MS_LOG(INFO) << "Begin to finalize profiling"; aclError aclRet = aclprofFinalize(); if (aclRet != ACL_SUCCESS) { + ReportErrorMessage(); MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function."; } } diff --git a/mindspore/ccsrc/profiler/device/ascend/ascend_profiling.h b/mindspore/ccsrc/profiler/device/ascend/ascend_profiling.h index 6f3205cedbc..bcff1f74bc5 100644 --- a/mindspore/ccsrc/profiler/device/ascend/ascend_profiling.h +++ b/mindspore/ccsrc/profiler/device/ascend/ascend_profiling.h @@ -44,6 +44,7 @@ class AscendProfiler : public Profiler { aclprofAicoreMetrics GetAicMetrics() const; void Finalize() const; bool IsInitialized() { return init_flag_; } + void ReportErrorMessage() const; private: static std::shared_ptr ascend_profiler_; diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc index aef2dde81bd..9201aa1b501 100644 --- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc @@ -17,6 +17,7 @@ #include "runtime/device/ascend/profiling/profiling_manager.h" #include #include +#include "common/util/error_manager/error_manager.h" #include "securec/include/securec.h" #include "./prof_mgr_core.h" #include "utils/log_adapter.h" @@ -37,6 +38,8 @@ constexpr Status PROF_FAILED = 0xFFFFFFFF; namespace mindspore { namespace device { namespace ascend { +constexpr auto kUnknownErrorString = "Unknown error occurred"; + ProfilingManager &ProfilingManager::GetInstance() { static ProfilingManager inst{}; return inst; @@ -150,6 +153,13 @@ rtError_t CtrlCallbackHandle(uint32_t rt_type, void *data, uint32_t /* len */) { return RT_ERROR_NONE; } +void ProfilingManager::ReportErrorMessage() const { + const std::string &error_message = ErrorManager::GetInstance().GetErrorMessage(); + if (!error_message.empty() && error_message.find(kUnknownErrorString) == std::string::npos) { + MS_LOG(ERROR) << "Ascend error occurred, error message:\n" << error_message; + } +} + Status ProfilingManager::CallMsprofReport(const NotNull reporter_data) const { if (prof_cb_.msprofReporterCallback == nullptr) { MS_LOG(ERROR) << "MsprofReporterCallback callback is nullptr."; @@ -161,7 +171,7 @@ Status ProfilingManager::CallMsprofReport(const NotNull reporter static_cast(reporter_data.get()), sizeof(ReporterData)); if (ret != UintToInt(PROF_SUCCESS)) { - MS_LOG(ERROR) << "Call MsprofReporterCallback failed. ret: " << ret; + ReportErrorMessage(); return PROF_FAILED; } return PROF_SUCCESS; diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h index e719c74acce..caa2e683bea 100644 --- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h @@ -74,6 +74,7 @@ class ProfilingManager { Status ProfHandleStart(); Status ProfHandleStop(); Status ProfHandleFinalize(); + void ReportErrorMessage() const; protected: ProfilingManager();