!27773 report cann msg if cann api return error

Merge pull request !27773 from yuximiao/report_cann_errmsg
This commit is contained in:
i-robot 2021-12-17 02:42:34 +00:00 committed by Gitee
commit 566a314cb5
4 changed files with 30 additions and 2 deletions

View File

@ -16,6 +16,7 @@
#include "profiler/device/ascend/ascend_profiling.h"
#include <map>
#include <string>
#include "common/util/error_manager/error_manager.h"
#include "pybind_api/api_register.h"
#include "utils/log_adapter.h"
#include "utils/utils.h"
@ -30,6 +31,8 @@ using mindspore::profiler::ascend::MemoryProfiling;
namespace mindspore {
namespace profiler {
namespace ascend {
constexpr auto kUnknownErrorString = "Unknown error occurred";
std::map<std::string, aclprofAicoreMetrics> kAicMetrics{
{"ArithmeticUtilization", ACL_AICORE_ARITHMETIC_UTILIZATION},
{"PipeUtilization", ACL_AICORE_PIPE_UTILIZATION},
@ -42,6 +45,13 @@ std::shared_ptr<AscendProfiler> AscendProfiler::ascend_profiler_ = std::make_sha
std::shared_ptr<AscendProfiler> &AscendProfiler::GetInstance() { return ascend_profiler_; }
void AscendProfiler::ReportErrorMessage() const {
const std::string &error_message = ErrorManager::GetInstance().GetErrorMessage();
if (!error_message.empty() && error_message.find(kUnknownErrorString) == std::string::npos) {
MS_LOG(ERROR) << "Ascend error occurred, error message:\n" << error_message;
}
}
void AscendProfiler::StepProfilingEnable(const bool enable_flag) {
MS_LOG(INFO) << "Start profiling";
enable_flag_ = enable_flag;
@ -59,6 +69,7 @@ void AscendProfiler::InitProfiling(const std::string &profiling_path, uint32_t d
aclError aclRet = aclprofInit(profile_data_path_.c_str(), profile_data_path_.length());
if (aclRet != ACL_SUCCESS) {
ReportErrorMessage();
MS_LOG(EXCEPTION) << "Failed to call aclprofInit function.";
}
@ -114,10 +125,12 @@ void AscendProfiler::Start() {
aclprofAicoreMetrics aic_metrics = GetAicMetrics();
acl_config_ = aclprofCreateConfig(device_list, device_num, aic_metrics, nullptr, GetOptionsMask());
if (acl_config_ == nullptr) {
ReportErrorMessage();
MS_LOG(EXCEPTION) << "Failed to call aclprofCreateConfig function.";
}
aclError aclRet = aclprofStart(acl_config_);
if (aclRet != ACL_SUCCESS) {
ReportErrorMessage();
MS_LOG(EXCEPTION) << "Failed to call aclprofStart function.";
}
MS_LOG(INFO) << "Start profiling, options mask is " << mask << " aic_metrics is " << aic_metrics;
@ -133,16 +146,18 @@ void AscendProfiler::Stop() {
MS_LOG(INFO) << "Begin to stop profiling.";
if (acl_config_ == nullptr) {
MS_LOG(EXCEPTION)
<< "Failed to stop profiling because of null acl config.Please make sure call Profiler.Start function "
<< "Failed to stop profiling because of null aReportDatacl config.Please make sure call Profiler.Start function "
"before call Profiler.Stop function.";
}
aclError aclRet = aclprofStop(acl_config_);
if (aclRet != ACL_SUCCESS) {
ReportErrorMessage();
MS_LOG(EXCEPTION) << "Failed to call aclprofStop function.";
}
aclRet = aclprofDestroyConfig(acl_config_);
if (aclRet != ACL_SUCCESS) {
ReportErrorMessage();
MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function.";
}
@ -155,6 +170,7 @@ void AscendProfiler::Finalize() const {
MS_LOG(INFO) << "Begin to finalize profiling";
aclError aclRet = aclprofFinalize();
if (aclRet != ACL_SUCCESS) {
ReportErrorMessage();
MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function.";
}
}

View File

@ -44,6 +44,7 @@ class AscendProfiler : public Profiler {
aclprofAicoreMetrics GetAicMetrics() const;
void Finalize() const;
bool IsInitialized() { return init_flag_; }
void ReportErrorMessage() const;
private:
static std::shared_ptr<AscendProfiler> ascend_profiler_;

View File

@ -17,6 +17,7 @@
#include "runtime/device/ascend/profiling/profiling_manager.h"
#include <cstdlib>
#include <vector>
#include "common/util/error_manager/error_manager.h"
#include "securec/include/securec.h"
#include "./prof_mgr_core.h"
#include "utils/log_adapter.h"
@ -37,6 +38,8 @@ constexpr Status PROF_FAILED = 0xFFFFFFFF;
namespace mindspore {
namespace device {
namespace ascend {
constexpr auto kUnknownErrorString = "Unknown error occurred";
ProfilingManager &ProfilingManager::GetInstance() {
static ProfilingManager inst{};
return inst;
@ -150,6 +153,13 @@ rtError_t CtrlCallbackHandle(uint32_t rt_type, void *data, uint32_t /* len */) {
return RT_ERROR_NONE;
}
void ProfilingManager::ReportErrorMessage() const {
const std::string &error_message = ErrorManager::GetInstance().GetErrorMessage();
if (!error_message.empty() && error_message.find(kUnknownErrorString) == std::string::npos) {
MS_LOG(ERROR) << "Ascend error occurred, error message:\n" << error_message;
}
}
Status ProfilingManager::CallMsprofReport(const NotNull<ReporterData *> reporter_data) const {
if (prof_cb_.msprofReporterCallback == nullptr) {
MS_LOG(ERROR) << "MsprofReporterCallback callback is nullptr.";
@ -161,7 +171,7 @@ Status ProfilingManager::CallMsprofReport(const NotNull<ReporterData *> reporter
static_cast<void *>(reporter_data.get()), sizeof(ReporterData));
if (ret != UintToInt(PROF_SUCCESS)) {
MS_LOG(ERROR) << "Call MsprofReporterCallback failed. ret: " << ret;
ReportErrorMessage();
return PROF_FAILED;
}
return PROF_SUCCESS;

View File

@ -74,6 +74,7 @@ class ProfilingManager {
Status ProfHandleStart();
Status ProfHandleStop();
Status ProfHandleFinalize();
void ReportErrorMessage() const;
protected:
ProfilingManager();