forked from mindspore-Ecosystem/mindspore
!27773 report cann msg if cann api return error
Merge pull request !27773 from yuximiao/report_cann_errmsg
This commit is contained in:
commit
566a314cb5
|
@ -16,6 +16,7 @@
|
|||
#include "profiler/device/ascend/ascend_profiling.h"
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "common/util/error_manager/error_manager.h"
|
||||
#include "pybind_api/api_register.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/utils.h"
|
||||
|
@ -30,6 +31,8 @@ using mindspore::profiler::ascend::MemoryProfiling;
|
|||
namespace mindspore {
|
||||
namespace profiler {
|
||||
namespace ascend {
|
||||
constexpr auto kUnknownErrorString = "Unknown error occurred";
|
||||
|
||||
std::map<std::string, aclprofAicoreMetrics> kAicMetrics{
|
||||
{"ArithmeticUtilization", ACL_AICORE_ARITHMETIC_UTILIZATION},
|
||||
{"PipeUtilization", ACL_AICORE_PIPE_UTILIZATION},
|
||||
|
@ -42,6 +45,13 @@ std::shared_ptr<AscendProfiler> AscendProfiler::ascend_profiler_ = std::make_sha
|
|||
|
||||
std::shared_ptr<AscendProfiler> &AscendProfiler::GetInstance() { return ascend_profiler_; }
|
||||
|
||||
void AscendProfiler::ReportErrorMessage() const {
|
||||
const std::string &error_message = ErrorManager::GetInstance().GetErrorMessage();
|
||||
if (!error_message.empty() && error_message.find(kUnknownErrorString) == std::string::npos) {
|
||||
MS_LOG(ERROR) << "Ascend error occurred, error message:\n" << error_message;
|
||||
}
|
||||
}
|
||||
|
||||
void AscendProfiler::StepProfilingEnable(const bool enable_flag) {
|
||||
MS_LOG(INFO) << "Start profiling";
|
||||
enable_flag_ = enable_flag;
|
||||
|
@ -59,6 +69,7 @@ void AscendProfiler::InitProfiling(const std::string &profiling_path, uint32_t d
|
|||
|
||||
aclError aclRet = aclprofInit(profile_data_path_.c_str(), profile_data_path_.length());
|
||||
if (aclRet != ACL_SUCCESS) {
|
||||
ReportErrorMessage();
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofInit function.";
|
||||
}
|
||||
|
||||
|
@ -114,10 +125,12 @@ void AscendProfiler::Start() {
|
|||
aclprofAicoreMetrics aic_metrics = GetAicMetrics();
|
||||
acl_config_ = aclprofCreateConfig(device_list, device_num, aic_metrics, nullptr, GetOptionsMask());
|
||||
if (acl_config_ == nullptr) {
|
||||
ReportErrorMessage();
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofCreateConfig function.";
|
||||
}
|
||||
aclError aclRet = aclprofStart(acl_config_);
|
||||
if (aclRet != ACL_SUCCESS) {
|
||||
ReportErrorMessage();
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofStart function.";
|
||||
}
|
||||
MS_LOG(INFO) << "Start profiling, options mask is " << mask << " aic_metrics is " << aic_metrics;
|
||||
|
@ -133,16 +146,18 @@ void AscendProfiler::Stop() {
|
|||
MS_LOG(INFO) << "Begin to stop profiling.";
|
||||
if (acl_config_ == nullptr) {
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "Failed to stop profiling because of null acl config.Please make sure call Profiler.Start function "
|
||||
<< "Failed to stop profiling because of null aReportDatacl config.Please make sure call Profiler.Start function "
|
||||
"before call Profiler.Stop function.";
|
||||
}
|
||||
|
||||
aclError aclRet = aclprofStop(acl_config_);
|
||||
if (aclRet != ACL_SUCCESS) {
|
||||
ReportErrorMessage();
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofStop function.";
|
||||
}
|
||||
aclRet = aclprofDestroyConfig(acl_config_);
|
||||
if (aclRet != ACL_SUCCESS) {
|
||||
ReportErrorMessage();
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function.";
|
||||
}
|
||||
|
||||
|
@ -155,6 +170,7 @@ void AscendProfiler::Finalize() const {
|
|||
MS_LOG(INFO) << "Begin to finalize profiling";
|
||||
aclError aclRet = aclprofFinalize();
|
||||
if (aclRet != ACL_SUCCESS) {
|
||||
ReportErrorMessage();
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function.";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,6 +44,7 @@ class AscendProfiler : public Profiler {
|
|||
aclprofAicoreMetrics GetAicMetrics() const;
|
||||
void Finalize() const;
|
||||
bool IsInitialized() { return init_flag_; }
|
||||
void ReportErrorMessage() const;
|
||||
|
||||
private:
|
||||
static std::shared_ptr<AscendProfiler> ascend_profiler_;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "runtime/device/ascend/profiling/profiling_manager.h"
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
#include "common/util/error_manager/error_manager.h"
|
||||
#include "securec/include/securec.h"
|
||||
#include "./prof_mgr_core.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
@ -37,6 +38,8 @@ constexpr Status PROF_FAILED = 0xFFFFFFFF;
|
|||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
constexpr auto kUnknownErrorString = "Unknown error occurred";
|
||||
|
||||
ProfilingManager &ProfilingManager::GetInstance() {
|
||||
static ProfilingManager inst{};
|
||||
return inst;
|
||||
|
@ -150,6 +153,13 @@ rtError_t CtrlCallbackHandle(uint32_t rt_type, void *data, uint32_t /* len */) {
|
|||
return RT_ERROR_NONE;
|
||||
}
|
||||
|
||||
void ProfilingManager::ReportErrorMessage() const {
|
||||
const std::string &error_message = ErrorManager::GetInstance().GetErrorMessage();
|
||||
if (!error_message.empty() && error_message.find(kUnknownErrorString) == std::string::npos) {
|
||||
MS_LOG(ERROR) << "Ascend error occurred, error message:\n" << error_message;
|
||||
}
|
||||
}
|
||||
|
||||
Status ProfilingManager::CallMsprofReport(const NotNull<ReporterData *> reporter_data) const {
|
||||
if (prof_cb_.msprofReporterCallback == nullptr) {
|
||||
MS_LOG(ERROR) << "MsprofReporterCallback callback is nullptr.";
|
||||
|
@ -161,7 +171,7 @@ Status ProfilingManager::CallMsprofReport(const NotNull<ReporterData *> reporter
|
|||
static_cast<void *>(reporter_data.get()), sizeof(ReporterData));
|
||||
|
||||
if (ret != UintToInt(PROF_SUCCESS)) {
|
||||
MS_LOG(ERROR) << "Call MsprofReporterCallback failed. ret: " << ret;
|
||||
ReportErrorMessage();
|
||||
return PROF_FAILED;
|
||||
}
|
||||
return PROF_SUCCESS;
|
||||
|
|
|
@ -74,6 +74,7 @@ class ProfilingManager {
|
|||
Status ProfHandleStart();
|
||||
Status ProfHandleStop();
|
||||
Status ProfHandleFinalize();
|
||||
void ReportErrorMessage() const;
|
||||
|
||||
protected:
|
||||
ProfilingManager();
|
||||
|
|
Loading…
Reference in New Issue