diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc b/mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc index b77b4bb7f30..8228428c96c 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.cc @@ -102,7 +102,36 @@ const std::map error_msg = { constexpr auto kUnknowErrorString = "Unknown error occurred"; } // namespace -std::string GetErrorMessage(bool add_title) { +error_message::Context ErrorManagerAdapter::context_; +std::mutex ErrorManagerAdapter::initialized_mutex_; +bool ErrorManagerAdapter::initialized_ = false; + +bool ErrorManagerAdapter::Init() { + std::unique_lock lock(initialized_mutex_); + if (initialized_) { + MS_LOG(DEBUG) << "Ascend error manager has been initialized."; + return true; + } + const auto error_manager_init_ret = ErrorManager::GetInstance().Init(); + if (error_manager_init_ret != 0) { + MS_LOG(WARNING) << "Init ascend error manager failed, some ascend error log may be left out."; + return false; + } + ErrorManager::GetInstance().GenWorkStreamIdDefault(); + context_ = ErrorManager::GetInstance().GetErrorManagerContext(); + MS_LOG(DEBUG) << "Initialize ascend error manager successfully. Work stream id: " << context_.work_stream_id; + initialized_ = true; + LogWriter::SetMessageHandler(&MessageHandler); + return true; +} + +void ErrorManagerAdapter::BindToCurrentThread() { + if (initialized_) { + ErrorManager::GetInstance().SetErrorContext(context_); + } +} + +std::string ErrorManagerAdapter::GetErrorMessage(bool add_title) { const string &error_message = ErrorManager::GetInstance().GetErrorMessage(); if (error_message.empty() || error_message.find(kUnknowErrorString) != string::npos) { return ""; @@ -114,14 +143,26 @@ std::string GetErrorMessage(bool add_title) { return error_message; } -void SetErrorManagerContext() { ErrorManager::GetInstance().GenWorkStreamIdDefault(); } - -std::string GetWarningMessage() { +std::string ErrorManagerAdapter::GetWarningMessage(bool add_title) { const string &warning_message = ErrorManager::GetInstance().GetWarningMessage(); - if (!warning_message.empty()) { - return warning_message; + if (warning_message.empty()) { + return ""; + } + if (add_title) { + return "#umsg#Ascend Warning Message:#umsg#" + warning_message; + } + return warning_message; +} + +void ErrorManagerAdapter::MessageHandler(std::ostringstream *oss) { + const auto &error_message = GetErrorMessage(true); + if (!error_message.empty()) { + *oss << error_message; + } + const auto &warning_message = GetWarningMessage(true); + if (!warning_message.empty()) { + *oss << warning_message; } - return ""; } bool IsGraphMode() { diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.h b/mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.h index 0eb5cea3813..b60bc33297f 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/common/ascend_utils.h @@ -19,6 +19,7 @@ #include #include +#include "common/util/error_manager/error_manager.h" #include "plugin/device/ascend/hal/hardware/ascend_device_context.h" #include "backend/common/session/kernel_graph.h" @@ -39,9 +40,23 @@ std::string MapToString(const Map &value) { return buffer.str(); } -std::string GetErrorMessage(bool add_title = false); -std::string GetWarningMessage(); -void SetErrorManagerContext(); +class ErrorManagerAdapter { + public: + ErrorManagerAdapter() = default; + ~ErrorManagerAdapter() = default; + static bool Init(); + static std::string GetErrorMessage(bool add_title = false); + static std::string GetWarningMessage(bool add_title = false); + static void BindToCurrentThread(); + + private: + static void MessageHandler(std::ostringstream *oss); + + private: + static error_message::Context context_; + static std::mutex initialized_mutex_; + static bool initialized_; +}; bool IsGraphMode(); bool IsDynamicShapeGraph(const FuncGraphPtr &func_graph); diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_data_queue.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_data_queue.cc index 904289fde59..85761246963 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_data_queue.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_data_queue.cc @@ -19,7 +19,6 @@ #include #include #include "graph/def_types.h" -#include "common/util/error_manager/error_manager.h" #include "include/backend/data_queue/data_queue_mgr.h" #include "utils/log_adapter.h" #include "plugin/device/ascend/hal/common/ascend_utils.h" @@ -181,8 +180,8 @@ DataQueueStatus AscendDataQueueDynamic::Pop() { } AscendTdtQueue::AscendTdtQueue(const std::string &channel_name) : DataQueue(channel_name, 0), acl_handle_(nullptr) { - // init ErrorManager, 0 means success - if (ErrorManager::GetInstance().Init() != 0) { + // Init ErrorManager + if (!ascend::ErrorManagerAdapter::Init()) { MS_LOG(WARNING) << "[Internal Error] Init ErrorManager failed."; } // get device id @@ -195,8 +194,7 @@ AscendTdtQueue::AscendTdtQueue(const std::string &channel_name) : DataQueue(chan if (acl_handle_ == nullptr) { MS_LOG(EXCEPTION) << "Create channel for sending data failed. The details refer to 'Ascend Error Message'." "#umsg#User Help Message:#umsg#Please check DEVICE ID setting, DEVICE ID that passed" - "into dataset(from context) and training process should be the same." - << ascend::GetErrorMessage(true); + "into dataset(from context) and training process should be the same."; } tdt_handle::AddHandle(&acl_handle_, nullptr); } @@ -214,8 +212,7 @@ AscendTdtQueue::AscendTdtQueue(const std::string &channel_name) : DataQueue(chan AscendTdtQueue::~AscendTdtQueue() { if (acl_handle_ != nullptr) { if (acltdtDestroyChannel(acl_handle_) != ACL_SUCCESS) { - MS_LOG(EXCEPTION) << "Failed to destroy channel for tdt queue. The details refer to 'Ascend Error Message'." - << ascend::GetErrorMessage(true); + MS_LOG(EXCEPTION) << "Failed to destroy channel for tdt queue. The details refer to 'Ascend Error Message'."; } else { tdt_handle::DelHandle(&acl_handle_); acl_handle_ = nullptr; @@ -260,8 +257,7 @@ DataQueueStatus AscendTdtQueue::Push(std::vector data) { << "transmission channel on the device side. So we force the data transmission channel to stop."; return DataQueueStatus::SUCCESS; } - MS_LOG(EXCEPTION) << "Tdt Send data failed. The details refer to 'Ascend Error Message'." - << ascend::GetErrorMessage(true); + MS_LOG(EXCEPTION) << "Tdt Send data failed. The details refer to 'Ascend Error Message'."; } auto wingman = DataQueueMgr::GetInstance().GetDataQueue(channel_name_); if (wingman != nullptr && wingman->IsOpen() && !data.empty()) { @@ -370,8 +366,8 @@ void AscendTdtQueue::DestroyAclDataset(acltdtDataset *acl_dataset, bool include_ AscendHostQueue::AscendHostQueue(const std::string &channel_name) : DataQueue(channel_name, 0), queue_id_to_trans_id_map_(), queue_id_(0) { - // init ErrorManager, 0 means success - if (ErrorManager::GetInstance().Init() != 0) { + // Init ErrorManager + if (ascend::ErrorManagerAdapter::Init()) { MS_LOG(WARNING) << "[Internal Error] Init ErrorManager failed."; } // get device id diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc index 7aa86aec77a..95c2cadb6d6 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc @@ -53,7 +53,6 @@ #include "toolchain/adx_datadump_server.h" #include "utils/trace_base.h" #include "graphengine/inc/external/acl/error_codes/rt_error_codes.h" -#include "common/util/error_manager/error_manager.h" #include "include/common/debug/anf_ir_dump.h" #include "include/common/utils/parallel_context.h" #include "include/common/utils/comm_manager.h" @@ -155,6 +154,7 @@ AscendKernelRuntime::~AscendKernelRuntime() { } void AscendKernelRuntime::SetContext() { + ErrorManagerAdapter::BindToCurrentThread(); if (rt_context_ == nullptr) { return; } @@ -338,7 +338,7 @@ void AscendKernelRuntime::ReleaseDeviceRes() { #ifndef ENABLE_SECURITY void AscendKernelRuntime::PreInit() { - const auto error_manager_ret = ErrorManager::GetInstance().Init(); + const auto error_manager_ret = ErrorManagerAdapter::Init(); if (error_manager_ret != 0) { MS_LOG(WARNING) << "Init ErrorManager failed."; } @@ -366,7 +366,7 @@ bool AscendKernelRuntime::Init() { if (!mindspore::kernel::OpInfoUtils::GenerateOpInfos(soc_version)) { MS_LOG(EXCEPTION) << "Load op info form json config failed, version: " << soc_version; } - const auto error_manager_ret = ErrorManager::GetInstance().Init(); + const auto error_manager_ret = ErrorManagerAdapter::Init(); if (error_manager_ret != 0) { MS_LOG(WARNING) << "Init ErrorManager failed."; } @@ -404,7 +404,7 @@ bool AscendKernelRuntime::Init() { ResetDevice(device_id_); } MS_LOG(EXCEPTION) << "Ascend kernel runtime initialization failed. The details refer to 'Ascend Error Message'." - << GetErrorMessage(true) << "#dmsg#Framework Error Message:#dmsg#" << e.what(); + << "#dmsg#Framework Error Message:#dmsg#" << e.what(); } initialized_ = true; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_manager.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_manager.cc index 28d5f8721a7..a923e12ef72 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_manager.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_manager.cc @@ -16,7 +16,6 @@ #include "plugin/device/ascend/hal/device/profiling/profiling_manager.h" #include -#include "common/util/error_manager/error_manager.h" #include "securec/include/securec.h" #include "./prof_mgr_core.h" #include "utils/log_adapter.h" @@ -242,7 +241,7 @@ void ProfilingManager::QueryHashId(const int32_t &device_id, const std::string & &hash_data, sizeof(MsprofHashData)); if (ret != UintToInt(PROF_SUCCESS)) { MS_LOG(EXCEPTION) << "[Profiling] Query hash id of long string failed, src string is " << src_str.c_str() - << ", ret is " << ret << "." << GetErrorMessage(true); + << ", ret is " << ret << "."; } *hash_id = hash_data.hashId; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc index aa3eb79f8fe..d1c32558373 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc @@ -264,8 +264,7 @@ void ProfilingReporter::ReportData(uint32_t device_id, unsigned char *data, size auto report_ret = ProfilingManager::GetInstance().CallMsprofReport(NOT_NULL(&report_data)); if (report_ret != 0) { - MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name << ", ret: " << report_ret << "." - << GetErrorMessage(true); + MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name << ", ret: " << report_ret << "."; } } diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm_lib.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm_lib.cc index f9a1105a2f0..1b05579a8ca 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm_lib.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_collective_comm_lib.cc @@ -109,9 +109,8 @@ bool AscendCollectiveCommLib::Initialize(uint32_t global_rank, uint32_t global_r std::string rank_id_str = std::to_string(global_rank); (void)hccl::HcclAdapter::GetInstance().InitHccl(local_rank_id, rank_id_str); } catch (const std::exception &e) { - MS_LOG(EXCEPTION) << "Ascend collective communication initialization failed." << GetErrorMessage(true) - << "#dmsg#Framework Error Message:#dmsg#" << e.what(); - throw; + MS_LOG(EXCEPTION) << "Ascend collective communication initialization failed.#dmsg#Framework Error Message:#dmsg#" + << e.what(); } auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc index 28ff38ac878..51bf918700b 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc @@ -27,7 +27,6 @@ #include "acl/acl_base.h" #include "toolchain/plog.h" #include "framework/common/helper/model_helper.h" -#include "common/util/error_manager/error_manager.h" #include "plugin/device/ascend/hal/common/ascend_utils.h" #include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h" #include "plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.h" @@ -253,14 +252,12 @@ bool AscendDeprecatedInterface::OpenTsd(const std::shared_ptr &ms_con MS_LOG(WARNING) << "Init slog failed, ret = " << log_ret; } - if (ErrorManager::GetInstance().Init() != 0) { - MS_LOG(WARNING) << "Init ascend error manager failed, some ascend error log may be left out."; - } + (void)ErrorManagerAdapter::Init(); MS_LOG(INFO) << "Device id = " << device_id << ", rank size = " << rank_size << "."; auto ret = rtSetDevice(static_cast(device_id)); if (ret != RT_ERROR_NONE) { MS_LOG(EXCEPTION) << "Device " << device_id << " call rtSetDevice failed, ret[" << static_cast(ret) - << "]. The details refer to 'Ascend Error Message'." << GetErrorMessage(true); + << "]. The details refer to 'Ascend Error Message'."; } ms_context_ptr->increase_param(MS_CTX_TSD_REF); auto thread_crt = [](const std::string &path, const acltdtChannelHandle *acl_handle) { @@ -281,14 +278,12 @@ bool AscendDeprecatedInterface::CloseTsd(const std::shared_ptr &ms_co ms_context_ptr->set_param(MS_CTX_TSD_REF, 0); pybind11::gil_scoped_release gil_release; DestroyTensorPrintThread(); - if (ErrorManager::GetInstance().Init() != 0) { - MS_LOG(WARNING) << "Init ascend error manager failed, some ascend error log may be left out."; - } + (void)ErrorManagerAdapter::Init(); uint32_t device_id = ms_context_ptr->get_param(MS_CTX_DEVICE_ID); auto ret = rtDeviceReset(static_cast(device_id)); if (ret != RT_ERROR_NONE) { MS_LOG(EXCEPTION) << "Device " << device_id << " call rtDeviceReset failed, ret[" << static_cast(ret) - << "]. The details refer to 'Ascend Error Message'." << GetErrorMessage(true); + << "]. The details refer to 'Ascend Error Message'."; } ms_context_ptr->set_param(MS_CTX_IS_PYNATIVE_GE_INIT, false); MS_LOG(INFO) << "Call rtDeviceReset, destroy and close tsd successful, ret[" << static_cast(ret) << "]"; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_graph_executor.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_graph_executor.cc index 73125f86ea1..2b158e90155 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_graph_executor.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_graph_executor.cc @@ -233,15 +233,14 @@ bool AscendGraphExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector< PROF_START(launch_graph); MS_EXCEPTION_IF_NULL(runtime_instance_); runtime_instance_->SetContext(); - SetErrorManagerContext(); device::KernelAdjust::GetInstance().LoadDeviceLoopCtrlParameters(kernel_graph); auto ret = ExecuteGraph(kernel_graph); if (!ret) { MS_LOG(EXCEPTION) << "Run task for graph:" << kernel_graph->ToString() - << " error! The details refer to 'Ascend Error Message'." << GetErrorMessage(true); + << " error! The details refer to 'Ascend Error Message'."; } - if (auto warning_message = GetWarningMessage(); !warning_message.empty()) { - MS_LOG(WARNING) << "Ascend warning message:\n" << warning_message; + if (auto warning_message = ErrorManagerAdapter::GetWarningMessage(true); !warning_message.empty()) { + MS_LOG(WARNING) << warning_message; } PROF_END(launch_graph); MS_LOG(INFO) << "Status record: end launch graph. graph id: " << kernel_graph->graph_id(); diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_kernel_executor.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_kernel_executor.cc index 05accffd0c6..77aec29d105 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_kernel_executor.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_kernel_executor.cc @@ -194,7 +194,6 @@ void AscendKernelExecutor::PreprocessBeforeRunGraph(const KernelGraphPtr &graph) MS_EXCEPTION_IF_NULL(graph); MS_LOG(INFO) << "Status record: start preprocess before run graph. graph id: " << graph->graph_id(); PROF_START(preprocess_before_run_graph); - SetErrorManagerContext(); try { if (graph->is_graph_run_mode()) { graph_executor_->PreprocessBeforeRun(graph); @@ -213,8 +212,7 @@ void AscendKernelExecutor::PreprocessBeforeRunGraph(const KernelGraphPtr &graph) } } catch (const std::exception &e) { MS_LOG(EXCEPTION) << "Preprocess failed before run graph " << graph->graph_id() - << ". The details refer to 'Ascend Error Message'." << GetErrorMessage(true) - << "#dmsg#Framework Error Message:#dmsg#" << e.what(); + << ".#dmsg#Framework Error Message:#dmsg#" << e.what(); } const std::vector &kernels = graph->execution_order(); diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.cc b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.cc index f63b1a327f8..2308464ac67 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.cc @@ -17,7 +17,6 @@ #include #include #include -#include "common/util/error_manager/error_manager.h" #include "utils/log_adapter.h" #include "include/common/utils/utils.h" #include "plugin/device/ascend/hal/common/ascend_utils.h" @@ -29,7 +28,7 @@ #include "kernel/kernel.h" #include "acl/acl_rt.h" -using mindspore::device::ascend::GetErrorMessage; +using mindspore::device::ascend::ErrorManagerAdapter; using mindspore::device::ascend::ProfilingManager; using mindspore::device::ascend::ProfilingReporter; using mindspore::profiler::ascend::MemoryProfiling; @@ -83,9 +82,7 @@ void AscendProfiler::Init(const std::string &profiling_path, uint32_t device_id, } // Init ErrorManager instance in order to get error msg reported by Ascend. - if (ErrorManager::GetInstance().Init() != 0) { - MS_LOG(WARNING) << "[Internal Error] Failed to init ErrorManager class."; - } + (void)ErrorManagerAdapter::Init(); (void)ProfilingManager::GetInstance().InitProfiling(profiling_path, device_id); @@ -93,7 +90,7 @@ void AscendProfiler::Init(const std::string &profiling_path, uint32_t device_id, aclError aclRet = aclprofInit(profile_data_path_.c_str(), profile_data_path_.length()); if (aclRet != ACL_SUCCESS) { - MS_LOG(EXCEPTION) << "Failed to call aclprofInit function." << GetErrorMessage(true); + MS_LOG(EXCEPTION) << "Failed to call aclprofInit function."; } init_flag_ = true; @@ -150,11 +147,11 @@ void AscendProfiler::Start() { aclprofAicoreMetrics aic_metrics = GetAicMetrics(); acl_config_ = aclprofCreateConfig(device_list, device_num, aic_metrics, nullptr, GetOptionsMask()); if (acl_config_ == nullptr) { - MS_LOG(EXCEPTION) << "Failed to call aclprofCreateConfig function." << GetErrorMessage(true); + MS_LOG(EXCEPTION) << "Failed to call aclprofCreateConfig function."; } aclError aclRet = aclprofStart(acl_config_); if (aclRet != ACL_SUCCESS) { - MS_LOG(EXCEPTION) << "Failed to call aclprofStart function." << GetErrorMessage(true); + MS_LOG(EXCEPTION) << "Failed to call aclprofStart function."; } MS_LOG(INFO) << "Start profiling, options mask is " << mask << " aic_metrics is " << aic_metrics; @@ -175,11 +172,11 @@ void AscendProfiler::Stop() { aclError aclRet = aclprofStop(acl_config_); if (aclRet != ACL_SUCCESS) { - MS_LOG(EXCEPTION) << "Failed to call aclprofStop function." << GetErrorMessage(true); + MS_LOG(EXCEPTION) << "Failed to call aclprofStop function."; } aclRet = aclprofDestroyConfig(acl_config_); if (aclRet != ACL_SUCCESS) { - MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function." << GetErrorMessage(true); + MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function."; } MemoryProfiling::GetInstance().StopMemoryProfiling(); @@ -191,7 +188,7 @@ void AscendProfiler::Finalize() { MS_LOG(INFO) << "Begin to finalize profiling"; aclError aclRet = aclprofFinalize(); if (aclRet != ACL_SUCCESS) { - MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function." << GetErrorMessage(true); + MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function."; } } diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_utils.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_utils.cc index 26f3b46eb70..a41de8325e8 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_utils.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_utils.cc @@ -276,7 +276,7 @@ int KernelManager::BinaryRegister(const mindspore::kernel::FlexArray &kernel_buf auto ret = has_kernel_list ? rtRegisterAllKernel(&dev_bin, module) : rtDevBinaryRegister(&dev_bin, module); if (RT_ERROR_NONE != ret) { MS_LOG(INFO) << "Call runtime rtDevBinaryRegister error, ret: [" << ret - << "], error message: " << device::ascend::GetErrorMessage(true) + << "], error message: " << device::ascend::ErrorManagerAdapter::GetErrorMessage(true) << ". Try to delete kernel compile cache files, and restart you project again.(These cache files in " "the custom directory if you used the environment variable 'MS_COMPILER_CACHE_PATH', otherwise in " "the current directory)."; @@ -333,7 +333,8 @@ uintptr_t KernelManager::GenFuncStub(const mindspore::kernel::KernelPack &kernel uintptr_t func_stub = ++kernel_stub_gen_; if (RT_ERROR_NONE != rtFunctionRegister(module, reinterpret_cast(func_stub), func_name.c_str(), func_name.c_str(), 0)) { - MS_LOG(INFO) << "Call runtime rtFunctionRegister error, message:" << device::ascend::GetErrorMessage(true) + MS_LOG(INFO) << "Call runtime rtFunctionRegister error, message:" + << device::ascend::ErrorManagerAdapter::GetErrorMessage(true) << ". Try to delete kernel compile cache files, and restart you project again.(These cache files in " "the custom directory if you used the environment variable 'MS_COMPILER_CACHE_PATH', otherwise in " "the current directory)."; diff --git a/mindspore/core/utils/log_adapter.cc b/mindspore/core/utils/log_adapter.cc index 75aeab10b90..a0cf8da5851 100644 --- a/mindspore/core/utils/log_adapter.cc +++ b/mindspore/core/utils/log_adapter.cc @@ -172,6 +172,11 @@ LogWriter::ExceptionHandler &LogWriter::exception_handler() { return g_exception_handler; } +LogWriter::MessageHandler &LogWriter::message_handler() { + static LogWriter::MessageHandler g_message_handler = nullptr; + return g_message_handler; +} + LogWriter::TraceProvider &LogWriter::trace_provider() { static LogWriter::TraceProvider g_trace_provider = nullptr; return g_trace_provider; @@ -187,6 +192,12 @@ void LogWriter::SetExceptionHandler(const LogWriter::ExceptionHandler &new_excep exception_handler_tmp = new_exception_handler; } +const LogWriter::MessageHandler &LogWriter::GetMessageHandler() { return message_handler(); } + +void LogWriter::SetMessageHandler(const LogWriter::MessageHandler &new_message_handler) { + message_handler() = new_message_handler; +} + const LogWriter::TraceProvider &LogWriter::GetTraceProvider() { const auto &trace_provider_tmp = trace_provider(); return trace_provider_tmp; @@ -397,6 +408,12 @@ void LogWriter::operator<(const LogStream &stream) const noexcept { void LogWriter::operator^(const LogStream &stream) const { std::ostringstream msg; msg << stream.sstream_->rdbuf(); + + const auto &message_handler = GetMessageHandler(); + if (message_handler != nullptr) { + message_handler(&msg); + } + std::ostringstream oss; std::vector dmsg; std::vector umsg; diff --git a/mindspore/core/utils/log_adapter.h b/mindspore/core/utils/log_adapter.h index b916c3e4537..3214982b5cd 100644 --- a/mindspore/core/utils/log_adapter.h +++ b/mindspore/core/utils/log_adapter.h @@ -249,6 +249,7 @@ class TryCatchGuard { class MS_CORE_API LogWriter { public: using ExceptionHandler = void (*)(ExceptionType, const std::string &); + using MessageHandler = void (*)(std::ostringstream *oss); using TraceProvider = std::function; LogWriter(const LocationInfo &location, MsLogLevel log_level, SubModuleId submodule, @@ -278,6 +279,16 @@ class MS_CORE_API LogWriter { /// \param[in] A function pointer of converting exception types in c++. static void SetExceptionHandler(const ExceptionHandler &new_exception_handler); + /// \brief Get the function pointer of handling message for different device. + /// + /// \return A pointer of the function. + static const MessageHandler &GetMessageHandler(); + + /// \brief Set the function pointer of handling message for different device. + /// + /// \param[in] A function pointer of handling message for different device. + static void SetMessageHandler(const MessageHandler &new_message_handler); + /// \brief Get the function pointer of printing trace stacks. /// /// \return A pointer of the function. @@ -292,6 +303,7 @@ class MS_CORE_API LogWriter { void OutputLog(const std::ostringstream &msg) const; void RemoveLabelBeforeOutputLog(const std::ostringstream &msg) const; static ExceptionHandler &exception_handler(); + static MessageHandler &message_handler(); static TraceProvider &trace_provider(); LocationInfo location_; diff --git a/tests/ut/cpp/stub/error_manager/error_manager_stub.cc b/tests/ut/cpp/stub/error_manager/error_manager_stub.cc index 0c4ae6ec162..120f7fdc815 100644 --- a/tests/ut/cpp/stub/error_manager/error_manager_stub.cc +++ b/tests/ut/cpp/stub/error_manager/error_manager_stub.cc @@ -202,6 +202,11 @@ void ErrorManager::ClearWarningMsgContainerByWorkId(const uint64_t work_stream_i void ErrorManager::SetErrorContext(error_message::Context error_context) {} +error_message::Context &ErrorManager::GetErrorManagerContext() { + static error_message::Context context; + return context; +} + void ErrorManager::SetStage(const std::string &first_stage, const std::string &second_stage) {} void ErrorManager::SetStage(const error_message::char_t *first_stage, const size_t first_len,