forked from mindspore-Ecosystem/mindspore
!47377 ErrorManager refactor
Merge pull request !47377 from tanghuikang/error_manager_refacto
This commit is contained in:
commit
617e55ac6d
|
@ -102,7 +102,36 @@ const std::map<uint32_t, std::string> error_msg = {
|
|||
constexpr auto kUnknowErrorString = "Unknown error occurred";
|
||||
} // namespace
|
||||
|
||||
std::string GetErrorMessage(bool add_title) {
|
||||
error_message::Context ErrorManagerAdapter::context_;
|
||||
std::mutex ErrorManagerAdapter::initialized_mutex_;
|
||||
bool ErrorManagerAdapter::initialized_ = false;
|
||||
|
||||
bool ErrorManagerAdapter::Init() {
|
||||
std::unique_lock<std::mutex> lock(initialized_mutex_);
|
||||
if (initialized_) {
|
||||
MS_LOG(DEBUG) << "Ascend error manager has been initialized.";
|
||||
return true;
|
||||
}
|
||||
const auto error_manager_init_ret = ErrorManager::GetInstance().Init();
|
||||
if (error_manager_init_ret != 0) {
|
||||
MS_LOG(WARNING) << "Init ascend error manager failed, some ascend error log may be left out.";
|
||||
return false;
|
||||
}
|
||||
ErrorManager::GetInstance().GenWorkStreamIdDefault();
|
||||
context_ = ErrorManager::GetInstance().GetErrorManagerContext();
|
||||
MS_LOG(DEBUG) << "Initialize ascend error manager successfully. Work stream id: " << context_.work_stream_id;
|
||||
initialized_ = true;
|
||||
LogWriter::SetMessageHandler(&MessageHandler);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ErrorManagerAdapter::BindToCurrentThread() {
|
||||
if (initialized_) {
|
||||
ErrorManager::GetInstance().SetErrorContext(context_);
|
||||
}
|
||||
}
|
||||
|
||||
std::string ErrorManagerAdapter::GetErrorMessage(bool add_title) {
|
||||
const string &error_message = ErrorManager::GetInstance().GetErrorMessage();
|
||||
if (error_message.empty() || error_message.find(kUnknowErrorString) != string::npos) {
|
||||
return "";
|
||||
|
@ -114,14 +143,26 @@ std::string GetErrorMessage(bool add_title) {
|
|||
return error_message;
|
||||
}
|
||||
|
||||
void SetErrorManagerContext() { ErrorManager::GetInstance().GenWorkStreamIdDefault(); }
|
||||
|
||||
std::string GetWarningMessage() {
|
||||
std::string ErrorManagerAdapter::GetWarningMessage(bool add_title) {
|
||||
const string &warning_message = ErrorManager::GetInstance().GetWarningMessage();
|
||||
if (!warning_message.empty()) {
|
||||
return warning_message;
|
||||
if (warning_message.empty()) {
|
||||
return "";
|
||||
}
|
||||
if (add_title) {
|
||||
return "#umsg#Ascend Warning Message:#umsg#" + warning_message;
|
||||
}
|
||||
return warning_message;
|
||||
}
|
||||
|
||||
void ErrorManagerAdapter::MessageHandler(std::ostringstream *oss) {
|
||||
const auto &error_message = GetErrorMessage(true);
|
||||
if (!error_message.empty()) {
|
||||
*oss << error_message;
|
||||
}
|
||||
const auto &warning_message = GetWarningMessage(true);
|
||||
if (!warning_message.empty()) {
|
||||
*oss << warning_message;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
bool IsGraphMode() {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "common/util/error_manager/error_manager.h"
|
||||
#include "plugin/device/ascend/hal/hardware/ascend_device_context.h"
|
||||
#include "backend/common/session/kernel_graph.h"
|
||||
|
||||
|
@ -39,9 +40,23 @@ std::string MapToString(const Map &value) {
|
|||
return buffer.str();
|
||||
}
|
||||
|
||||
std::string GetErrorMessage(bool add_title = false);
|
||||
std::string GetWarningMessage();
|
||||
void SetErrorManagerContext();
|
||||
class ErrorManagerAdapter {
|
||||
public:
|
||||
ErrorManagerAdapter() = default;
|
||||
~ErrorManagerAdapter() = default;
|
||||
static bool Init();
|
||||
static std::string GetErrorMessage(bool add_title = false);
|
||||
static std::string GetWarningMessage(bool add_title = false);
|
||||
static void BindToCurrentThread();
|
||||
|
||||
private:
|
||||
static void MessageHandler(std::ostringstream *oss);
|
||||
|
||||
private:
|
||||
static error_message::Context context_;
|
||||
static std::mutex initialized_mutex_;
|
||||
static bool initialized_;
|
||||
};
|
||||
|
||||
bool IsGraphMode();
|
||||
bool IsDynamicShapeGraph(const FuncGraphPtr &func_graph);
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
#include <map>
|
||||
#include <utility>
|
||||
#include "graph/def_types.h"
|
||||
#include "common/util/error_manager/error_manager.h"
|
||||
#include "include/backend/data_queue/data_queue_mgr.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "plugin/device/ascend/hal/common/ascend_utils.h"
|
||||
|
@ -181,8 +180,8 @@ DataQueueStatus AscendDataQueueDynamic::Pop() {
|
|||
}
|
||||
|
||||
AscendTdtQueue::AscendTdtQueue(const std::string &channel_name) : DataQueue(channel_name, 0), acl_handle_(nullptr) {
|
||||
// init ErrorManager, 0 means success
|
||||
if (ErrorManager::GetInstance().Init() != 0) {
|
||||
// Init ErrorManager
|
||||
if (!ascend::ErrorManagerAdapter::Init()) {
|
||||
MS_LOG(WARNING) << "[Internal Error] Init ErrorManager failed.";
|
||||
}
|
||||
// get device id
|
||||
|
@ -195,8 +194,7 @@ AscendTdtQueue::AscendTdtQueue(const std::string &channel_name) : DataQueue(chan
|
|||
if (acl_handle_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Create channel for sending data failed. The details refer to 'Ascend Error Message'."
|
||||
"#umsg#User Help Message:#umsg#Please check DEVICE ID setting, DEVICE ID that passed"
|
||||
"into dataset(from context) and training process should be the same."
|
||||
<< ascend::GetErrorMessage(true);
|
||||
"into dataset(from context) and training process should be the same.";
|
||||
}
|
||||
tdt_handle::AddHandle(&acl_handle_, nullptr);
|
||||
}
|
||||
|
@ -214,8 +212,7 @@ AscendTdtQueue::AscendTdtQueue(const std::string &channel_name) : DataQueue(chan
|
|||
AscendTdtQueue::~AscendTdtQueue() {
|
||||
if (acl_handle_ != nullptr) {
|
||||
if (acltdtDestroyChannel(acl_handle_) != ACL_SUCCESS) {
|
||||
MS_LOG(EXCEPTION) << "Failed to destroy channel for tdt queue. The details refer to 'Ascend Error Message'."
|
||||
<< ascend::GetErrorMessage(true);
|
||||
MS_LOG(EXCEPTION) << "Failed to destroy channel for tdt queue. The details refer to 'Ascend Error Message'.";
|
||||
} else {
|
||||
tdt_handle::DelHandle(&acl_handle_);
|
||||
acl_handle_ = nullptr;
|
||||
|
@ -260,8 +257,7 @@ DataQueueStatus AscendTdtQueue::Push(std::vector<DataQueueItem> data) {
|
|||
<< "transmission channel on the device side. So we force the data transmission channel to stop.";
|
||||
return DataQueueStatus::SUCCESS;
|
||||
}
|
||||
MS_LOG(EXCEPTION) << "Tdt Send data failed. The details refer to 'Ascend Error Message'."
|
||||
<< ascend::GetErrorMessage(true);
|
||||
MS_LOG(EXCEPTION) << "Tdt Send data failed. The details refer to 'Ascend Error Message'.";
|
||||
}
|
||||
auto wingman = DataQueueMgr::GetInstance().GetDataQueue(channel_name_);
|
||||
if (wingman != nullptr && wingman->IsOpen() && !data.empty()) {
|
||||
|
@ -370,8 +366,8 @@ void AscendTdtQueue::DestroyAclDataset(acltdtDataset *acl_dataset, bool include_
|
|||
|
||||
AscendHostQueue::AscendHostQueue(const std::string &channel_name)
|
||||
: DataQueue(channel_name, 0), queue_id_to_trans_id_map_(), queue_id_(0) {
|
||||
// init ErrorManager, 0 means success
|
||||
if (ErrorManager::GetInstance().Init() != 0) {
|
||||
// Init ErrorManager
|
||||
if (ascend::ErrorManagerAdapter::Init()) {
|
||||
MS_LOG(WARNING) << "[Internal Error] Init ErrorManager failed.";
|
||||
}
|
||||
// get device id
|
||||
|
|
|
@ -53,7 +53,6 @@
|
|||
#include "toolchain/adx_datadump_server.h"
|
||||
#include "utils/trace_base.h"
|
||||
#include "graphengine/inc/external/acl/error_codes/rt_error_codes.h"
|
||||
#include "common/util/error_manager/error_manager.h"
|
||||
#include "include/common/debug/anf_ir_dump.h"
|
||||
#include "include/common/utils/parallel_context.h"
|
||||
#include "include/common/utils/comm_manager.h"
|
||||
|
@ -155,6 +154,7 @@ AscendKernelRuntime::~AscendKernelRuntime() {
|
|||
}
|
||||
|
||||
void AscendKernelRuntime::SetContext() {
|
||||
ErrorManagerAdapter::BindToCurrentThread();
|
||||
if (rt_context_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
@ -338,7 +338,7 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
|
|||
|
||||
#ifndef ENABLE_SECURITY
|
||||
void AscendKernelRuntime::PreInit() {
|
||||
const auto error_manager_ret = ErrorManager::GetInstance().Init();
|
||||
const auto error_manager_ret = ErrorManagerAdapter::Init();
|
||||
if (error_manager_ret != 0) {
|
||||
MS_LOG(WARNING) << "Init ErrorManager failed.";
|
||||
}
|
||||
|
@ -366,7 +366,7 @@ bool AscendKernelRuntime::Init() {
|
|||
if (!mindspore::kernel::OpInfoUtils::GenerateOpInfos(soc_version)) {
|
||||
MS_LOG(EXCEPTION) << "Load op info form json config failed, version: " << soc_version;
|
||||
}
|
||||
const auto error_manager_ret = ErrorManager::GetInstance().Init();
|
||||
const auto error_manager_ret = ErrorManagerAdapter::Init();
|
||||
if (error_manager_ret != 0) {
|
||||
MS_LOG(WARNING) << "Init ErrorManager failed.";
|
||||
}
|
||||
|
@ -404,7 +404,7 @@ bool AscendKernelRuntime::Init() {
|
|||
ResetDevice(device_id_);
|
||||
}
|
||||
MS_LOG(EXCEPTION) << "Ascend kernel runtime initialization failed. The details refer to 'Ascend Error Message'."
|
||||
<< GetErrorMessage(true) << "#dmsg#Framework Error Message:#dmsg#" << e.what();
|
||||
<< "#dmsg#Framework Error Message:#dmsg#" << e.what();
|
||||
}
|
||||
|
||||
initialized_ = true;
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
|
||||
#include "plugin/device/ascend/hal/device/profiling/profiling_manager.h"
|
||||
#include <cstdlib>
|
||||
#include "common/util/error_manager/error_manager.h"
|
||||
#include "securec/include/securec.h"
|
||||
#include "./prof_mgr_core.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
@ -242,7 +241,7 @@ void ProfilingManager::QueryHashId(const int32_t &device_id, const std::string &
|
|||
&hash_data, sizeof(MsprofHashData));
|
||||
if (ret != UintToInt(PROF_SUCCESS)) {
|
||||
MS_LOG(EXCEPTION) << "[Profiling] Query hash id of long string failed, src string is " << src_str.c_str()
|
||||
<< ", ret is " << ret << "." << GetErrorMessage(true);
|
||||
<< ", ret is " << ret << ".";
|
||||
}
|
||||
|
||||
*hash_id = hash_data.hashId;
|
||||
|
|
|
@ -264,8 +264,7 @@ void ProfilingReporter::ReportData(uint32_t device_id, unsigned char *data, size
|
|||
|
||||
auto report_ret = ProfilingManager::GetInstance().CallMsprofReport(NOT_NULL(&report_data));
|
||||
if (report_ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name << ", ret: " << report_ret << "."
|
||||
<< GetErrorMessage(true);
|
||||
MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name << ", ret: " << report_ret << ".";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -109,9 +109,8 @@ bool AscendCollectiveCommLib::Initialize(uint32_t global_rank, uint32_t global_r
|
|||
std::string rank_id_str = std::to_string(global_rank);
|
||||
(void)hccl::HcclAdapter::GetInstance().InitHccl(local_rank_id, rank_id_str);
|
||||
} catch (const std::exception &e) {
|
||||
MS_LOG(EXCEPTION) << "Ascend collective communication initialization failed." << GetErrorMessage(true)
|
||||
<< "#dmsg#Framework Error Message:#dmsg#" << e.what();
|
||||
throw;
|
||||
MS_LOG(EXCEPTION) << "Ascend collective communication initialization failed.#dmsg#Framework Error Message:#dmsg#"
|
||||
<< e.what();
|
||||
}
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
#include "acl/acl_base.h"
|
||||
#include "toolchain/plog.h"
|
||||
#include "framework/common/helper/model_helper.h"
|
||||
#include "common/util/error_manager/error_manager.h"
|
||||
#include "plugin/device/ascend/hal/common/ascend_utils.h"
|
||||
#include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h"
|
||||
#include "plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.h"
|
||||
|
@ -253,14 +252,12 @@ bool AscendDeprecatedInterface::OpenTsd(const std::shared_ptr<MsContext> &ms_con
|
|||
MS_LOG(WARNING) << "Init slog failed, ret = " << log_ret;
|
||||
}
|
||||
|
||||
if (ErrorManager::GetInstance().Init() != 0) {
|
||||
MS_LOG(WARNING) << "Init ascend error manager failed, some ascend error log may be left out.";
|
||||
}
|
||||
(void)ErrorManagerAdapter::Init();
|
||||
MS_LOG(INFO) << "Device id = " << device_id << ", rank size = " << rank_size << ".";
|
||||
auto ret = rtSetDevice(static_cast<int32_t>(device_id));
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "Device " << device_id << " call rtSetDevice failed, ret[" << static_cast<int>(ret)
|
||||
<< "]. The details refer to 'Ascend Error Message'." << GetErrorMessage(true);
|
||||
<< "]. The details refer to 'Ascend Error Message'.";
|
||||
}
|
||||
ms_context_ptr->increase_param<uint32_t>(MS_CTX_TSD_REF);
|
||||
auto thread_crt = [](const std::string &path, const acltdtChannelHandle *acl_handle) {
|
||||
|
@ -281,14 +278,12 @@ bool AscendDeprecatedInterface::CloseTsd(const std::shared_ptr<MsContext> &ms_co
|
|||
ms_context_ptr->set_param<uint32_t>(MS_CTX_TSD_REF, 0);
|
||||
pybind11::gil_scoped_release gil_release;
|
||||
DestroyTensorPrintThread();
|
||||
if (ErrorManager::GetInstance().Init() != 0) {
|
||||
MS_LOG(WARNING) << "Init ascend error manager failed, some ascend error log may be left out.";
|
||||
}
|
||||
(void)ErrorManagerAdapter::Init();
|
||||
uint32_t device_id = ms_context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
auto ret = rtDeviceReset(static_cast<int32_t>(device_id));
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "Device " << device_id << " call rtDeviceReset failed, ret[" << static_cast<int>(ret)
|
||||
<< "]. The details refer to 'Ascend Error Message'." << GetErrorMessage(true);
|
||||
<< "]. The details refer to 'Ascend Error Message'.";
|
||||
}
|
||||
ms_context_ptr->set_param<bool>(MS_CTX_IS_PYNATIVE_GE_INIT, false);
|
||||
MS_LOG(INFO) << "Call rtDeviceReset, destroy and close tsd successful, ret[" << static_cast<int>(ret) << "]";
|
||||
|
|
|
@ -233,15 +233,14 @@ bool AscendGraphExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<
|
|||
PROF_START(launch_graph);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance_);
|
||||
runtime_instance_->SetContext();
|
||||
SetErrorManagerContext();
|
||||
device::KernelAdjust::GetInstance().LoadDeviceLoopCtrlParameters(kernel_graph);
|
||||
auto ret = ExecuteGraph(kernel_graph);
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Run task for graph:" << kernel_graph->ToString()
|
||||
<< " error! The details refer to 'Ascend Error Message'." << GetErrorMessage(true);
|
||||
<< " error! The details refer to 'Ascend Error Message'.";
|
||||
}
|
||||
if (auto warning_message = GetWarningMessage(); !warning_message.empty()) {
|
||||
MS_LOG(WARNING) << "Ascend warning message:\n" << warning_message;
|
||||
if (auto warning_message = ErrorManagerAdapter::GetWarningMessage(true); !warning_message.empty()) {
|
||||
MS_LOG(WARNING) << warning_message;
|
||||
}
|
||||
PROF_END(launch_graph);
|
||||
MS_LOG(INFO) << "Status record: end launch graph. graph id: " << kernel_graph->graph_id();
|
||||
|
|
|
@ -194,7 +194,6 @@ void AscendKernelExecutor::PreprocessBeforeRunGraph(const KernelGraphPtr &graph)
|
|||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_LOG(INFO) << "Status record: start preprocess before run graph. graph id: " << graph->graph_id();
|
||||
PROF_START(preprocess_before_run_graph);
|
||||
SetErrorManagerContext();
|
||||
try {
|
||||
if (graph->is_graph_run_mode()) {
|
||||
graph_executor_->PreprocessBeforeRun(graph);
|
||||
|
@ -213,8 +212,7 @@ void AscendKernelExecutor::PreprocessBeforeRunGraph(const KernelGraphPtr &graph)
|
|||
}
|
||||
} catch (const std::exception &e) {
|
||||
MS_LOG(EXCEPTION) << "Preprocess failed before run graph " << graph->graph_id()
|
||||
<< ". The details refer to 'Ascend Error Message'." << GetErrorMessage(true)
|
||||
<< "#dmsg#Framework Error Message:#dmsg#" << e.what();
|
||||
<< ".#dmsg#Framework Error Message:#dmsg#" << e.what();
|
||||
}
|
||||
|
||||
const std::vector<CNodePtr> &kernels = graph->execution_order();
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "common/util/error_manager/error_manager.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "include/common/utils/utils.h"
|
||||
#include "plugin/device/ascend/hal/common/ascend_utils.h"
|
||||
|
@ -29,7 +28,7 @@
|
|||
#include "kernel/kernel.h"
|
||||
#include "acl/acl_rt.h"
|
||||
|
||||
using mindspore::device::ascend::GetErrorMessage;
|
||||
using mindspore::device::ascend::ErrorManagerAdapter;
|
||||
using mindspore::device::ascend::ProfilingManager;
|
||||
using mindspore::device::ascend::ProfilingReporter;
|
||||
using mindspore::profiler::ascend::MemoryProfiling;
|
||||
|
@ -83,9 +82,7 @@ void AscendProfiler::Init(const std::string &profiling_path, uint32_t device_id,
|
|||
}
|
||||
|
||||
// Init ErrorManager instance in order to get error msg reported by Ascend.
|
||||
if (ErrorManager::GetInstance().Init() != 0) {
|
||||
MS_LOG(WARNING) << "[Internal Error] Failed to init ErrorManager class.";
|
||||
}
|
||||
(void)ErrorManagerAdapter::Init();
|
||||
|
||||
(void)ProfilingManager::GetInstance().InitProfiling(profiling_path, device_id);
|
||||
|
||||
|
@ -93,7 +90,7 @@ void AscendProfiler::Init(const std::string &profiling_path, uint32_t device_id,
|
|||
|
||||
aclError aclRet = aclprofInit(profile_data_path_.c_str(), profile_data_path_.length());
|
||||
if (aclRet != ACL_SUCCESS) {
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofInit function." << GetErrorMessage(true);
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofInit function.";
|
||||
}
|
||||
|
||||
init_flag_ = true;
|
||||
|
@ -150,11 +147,11 @@ void AscendProfiler::Start() {
|
|||
aclprofAicoreMetrics aic_metrics = GetAicMetrics();
|
||||
acl_config_ = aclprofCreateConfig(device_list, device_num, aic_metrics, nullptr, GetOptionsMask());
|
||||
if (acl_config_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofCreateConfig function." << GetErrorMessage(true);
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofCreateConfig function.";
|
||||
}
|
||||
aclError aclRet = aclprofStart(acl_config_);
|
||||
if (aclRet != ACL_SUCCESS) {
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofStart function." << GetErrorMessage(true);
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofStart function.";
|
||||
}
|
||||
MS_LOG(INFO) << "Start profiling, options mask is " << mask << " aic_metrics is " << aic_metrics;
|
||||
|
||||
|
@ -175,11 +172,11 @@ void AscendProfiler::Stop() {
|
|||
|
||||
aclError aclRet = aclprofStop(acl_config_);
|
||||
if (aclRet != ACL_SUCCESS) {
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofStop function." << GetErrorMessage(true);
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofStop function.";
|
||||
}
|
||||
aclRet = aclprofDestroyConfig(acl_config_);
|
||||
if (aclRet != ACL_SUCCESS) {
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function." << GetErrorMessage(true);
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function.";
|
||||
}
|
||||
|
||||
MemoryProfiling::GetInstance().StopMemoryProfiling();
|
||||
|
@ -191,7 +188,7 @@ void AscendProfiler::Finalize() {
|
|||
MS_LOG(INFO) << "Begin to finalize profiling";
|
||||
aclError aclRet = aclprofFinalize();
|
||||
if (aclRet != ACL_SUCCESS) {
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function." << GetErrorMessage(true);
|
||||
MS_LOG(EXCEPTION) << "Failed to call aclprofDestroyConfig function.";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -276,7 +276,7 @@ int KernelManager::BinaryRegister(const mindspore::kernel::FlexArray &kernel_buf
|
|||
auto ret = has_kernel_list ? rtRegisterAllKernel(&dev_bin, module) : rtDevBinaryRegister(&dev_bin, module);
|
||||
if (RT_ERROR_NONE != ret) {
|
||||
MS_LOG(INFO) << "Call runtime rtDevBinaryRegister error, ret: [" << ret
|
||||
<< "], error message: " << device::ascend::GetErrorMessage(true)
|
||||
<< "], error message: " << device::ascend::ErrorManagerAdapter::GetErrorMessage(true)
|
||||
<< ". Try to delete kernel compile cache files, and restart you project again.(These cache files in "
|
||||
"the custom directory if you used the environment variable 'MS_COMPILER_CACHE_PATH', otherwise in "
|
||||
"the current directory).";
|
||||
|
@ -333,7 +333,8 @@ uintptr_t KernelManager::GenFuncStub(const mindspore::kernel::KernelPack &kernel
|
|||
uintptr_t func_stub = ++kernel_stub_gen_;
|
||||
if (RT_ERROR_NONE !=
|
||||
rtFunctionRegister(module, reinterpret_cast<void *>(func_stub), func_name.c_str(), func_name.c_str(), 0)) {
|
||||
MS_LOG(INFO) << "Call runtime rtFunctionRegister error, message:" << device::ascend::GetErrorMessage(true)
|
||||
MS_LOG(INFO) << "Call runtime rtFunctionRegister error, message:"
|
||||
<< device::ascend::ErrorManagerAdapter::GetErrorMessage(true)
|
||||
<< ". Try to delete kernel compile cache files, and restart you project again.(These cache files in "
|
||||
"the custom directory if you used the environment variable 'MS_COMPILER_CACHE_PATH', otherwise in "
|
||||
"the current directory).";
|
||||
|
|
|
@ -172,6 +172,11 @@ LogWriter::ExceptionHandler &LogWriter::exception_handler() {
|
|||
return g_exception_handler;
|
||||
}
|
||||
|
||||
LogWriter::MessageHandler &LogWriter::message_handler() {
|
||||
static LogWriter::MessageHandler g_message_handler = nullptr;
|
||||
return g_message_handler;
|
||||
}
|
||||
|
||||
LogWriter::TraceProvider &LogWriter::trace_provider() {
|
||||
static LogWriter::TraceProvider g_trace_provider = nullptr;
|
||||
return g_trace_provider;
|
||||
|
@ -187,6 +192,12 @@ void LogWriter::SetExceptionHandler(const LogWriter::ExceptionHandler &new_excep
|
|||
exception_handler_tmp = new_exception_handler;
|
||||
}
|
||||
|
||||
const LogWriter::MessageHandler &LogWriter::GetMessageHandler() { return message_handler(); }
|
||||
|
||||
void LogWriter::SetMessageHandler(const LogWriter::MessageHandler &new_message_handler) {
|
||||
message_handler() = new_message_handler;
|
||||
}
|
||||
|
||||
const LogWriter::TraceProvider &LogWriter::GetTraceProvider() {
|
||||
const auto &trace_provider_tmp = trace_provider();
|
||||
return trace_provider_tmp;
|
||||
|
@ -397,6 +408,12 @@ void LogWriter::operator<(const LogStream &stream) const noexcept {
|
|||
void LogWriter::operator^(const LogStream &stream) const {
|
||||
std::ostringstream msg;
|
||||
msg << stream.sstream_->rdbuf();
|
||||
|
||||
const auto &message_handler = GetMessageHandler();
|
||||
if (message_handler != nullptr) {
|
||||
message_handler(&msg);
|
||||
}
|
||||
|
||||
std::ostringstream oss;
|
||||
std::vector<std::string> dmsg;
|
||||
std::vector<std::string> umsg;
|
||||
|
|
|
@ -249,6 +249,7 @@ class TryCatchGuard {
|
|||
class MS_CORE_API LogWriter {
|
||||
public:
|
||||
using ExceptionHandler = void (*)(ExceptionType, const std::string &);
|
||||
using MessageHandler = void (*)(std::ostringstream *oss);
|
||||
using TraceProvider = std::function<void(std::ostringstream &oss, bool add_title)>;
|
||||
|
||||
LogWriter(const LocationInfo &location, MsLogLevel log_level, SubModuleId submodule,
|
||||
|
@ -278,6 +279,16 @@ class MS_CORE_API LogWriter {
|
|||
/// \param[in] A function pointer of converting exception types in c++.
|
||||
static void SetExceptionHandler(const ExceptionHandler &new_exception_handler);
|
||||
|
||||
/// \brief Get the function pointer of handling message for different device.
|
||||
///
|
||||
/// \return A pointer of the function.
|
||||
static const MessageHandler &GetMessageHandler();
|
||||
|
||||
/// \brief Set the function pointer of handling message for different device.
|
||||
///
|
||||
/// \param[in] A function pointer of handling message for different device.
|
||||
static void SetMessageHandler(const MessageHandler &new_message_handler);
|
||||
|
||||
/// \brief Get the function pointer of printing trace stacks.
|
||||
///
|
||||
/// \return A pointer of the function.
|
||||
|
@ -292,6 +303,7 @@ class MS_CORE_API LogWriter {
|
|||
void OutputLog(const std::ostringstream &msg) const;
|
||||
void RemoveLabelBeforeOutputLog(const std::ostringstream &msg) const;
|
||||
static ExceptionHandler &exception_handler();
|
||||
static MessageHandler &message_handler();
|
||||
static TraceProvider &trace_provider();
|
||||
|
||||
LocationInfo location_;
|
||||
|
|
|
@ -202,6 +202,11 @@ void ErrorManager::ClearWarningMsgContainerByWorkId(const uint64_t work_stream_i
|
|||
|
||||
void ErrorManager::SetErrorContext(error_message::Context error_context) {}
|
||||
|
||||
error_message::Context &ErrorManager::GetErrorManagerContext() {
|
||||
static error_message::Context context;
|
||||
return context;
|
||||
}
|
||||
|
||||
void ErrorManager::SetStage(const std::string &first_stage, const std::string &second_stage) {}
|
||||
|
||||
void ErrorManager::SetStage(const error_message::char_t *first_stage, const size_t first_len,
|
||||
|
|
Loading…
Reference in New Issue