forked from mindspore-Ecosystem/mindspore
!23245 Rectify the error trustlist
Merge pull request !23245 from hwjiaorui/error-trustlist
This commit is contained in:
commit
f18b986860
|
@ -127,7 +127,7 @@ void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel
|
|||
}
|
||||
|
||||
if (kernel_info_list->empty()) {
|
||||
MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for op ["
|
||||
MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for operator ["
|
||||
<< AnfAlgo::GetCNodeName(kernel_node) << ", " << kernel_node->fullname_with_scope()
|
||||
<< "]. Node DebugString:" << kernel_node->DebugString()
|
||||
<< ", maybe the operator can not supported on current platform. \n trace "
|
||||
|
|
|
@ -146,8 +146,8 @@ uint32_t GetProcessNum() {
|
|||
try {
|
||||
process_num = UlongToUint(std::stoul(env_process_num));
|
||||
} catch (std::invalid_argument &e) {
|
||||
MS_LOG(EXCEPTION) << "Invalid MS_BUILD_PROCESS_NUM env:" << env_process_num
|
||||
<< ". Please set the value of MS_BUILD_PROCESS_NUM in [0, 24]";
|
||||
MS_LOG(EXCEPTION) << "Invalid environment variable 'MS_BUILD_PROCESS_NUM', it should be in [1, 24], but got "
|
||||
<< env_process_num;
|
||||
}
|
||||
}
|
||||
return process_num;
|
||||
|
@ -177,7 +177,9 @@ std::string GetParaDebugPath() {
|
|||
if (realpath(save_path.c_str(), real_path)) {
|
||||
save_path = real_path;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid env PARA_DEBUG_PATH, path : " << save_path;
|
||||
MS_LOG(EXCEPTION) << "Invalid environment variable 'PARA_DEBUG_PATH', the path is " << save_path
|
||||
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
|
||||
"permission, (3) whether the path is too long.";
|
||||
}
|
||||
} else {
|
||||
save_path = "";
|
||||
|
@ -192,7 +194,9 @@ std::string GetTbePath() {
|
|||
if (realpath(save_path.c_str(), real_path)) {
|
||||
save_path = real_path;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid env TBE_IMPL_PATH, path : " << save_path;
|
||||
MS_LOG(EXCEPTION) << "Invalid environment variable 'TBE_IMPL_PATH', the path is " << save_path
|
||||
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
|
||||
"permission, (3) whether the path is too long. ";
|
||||
}
|
||||
} else {
|
||||
if (realpath(kDefPath, real_path)) {
|
||||
|
@ -624,7 +628,7 @@ bool AscendKernelCompileManager::AscendSingleOpCompile(const std::vector<AnfNode
|
|||
} else {
|
||||
ResetOldTask();
|
||||
single_processed_kernels_.clear();
|
||||
MS_LOG(EXCEPTION) << "Kernel compile failed, op [" << op_name << "], build result: " << build_result;
|
||||
MS_LOG(EXCEPTION) << "Kernel compile failed, operator [" << op_name << "], build result: " << build_result;
|
||||
}
|
||||
}
|
||||
QueryFinishJob(job_type);
|
||||
|
|
|
@ -48,7 +48,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
|
|||
offline_tune[j] = tolower(offline_tune[j]);
|
||||
}
|
||||
if (!(offline_tune == "true" || offline_tune == "false")) {
|
||||
MS_LOG(ERROR) << "The value of ENABLE_TUNE_DUMP must be 'true' or 'false'";
|
||||
MS_LOG(ERROR) << "Invalid environment variable 'ENABLE_TUNE_DUMP', it should be 'true' or 'false', but got "
|
||||
<< tune_mode;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -80,7 +80,9 @@ std::string TbeUtils::GetBankPath() {
|
|||
save_path = real_path;
|
||||
return save_path;
|
||||
}
|
||||
MS_LOG(EXCEPTION) << "Invalid env TUNE_BANK_PATH, path : " << save_path;
|
||||
MS_LOG(EXCEPTION) << "Invalid environment variable 'TUNE_BANK_PATH', the path is " << save_path
|
||||
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
|
||||
"permission, (3) whether the path is too long. ";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
@ -94,7 +96,9 @@ std::string TbeUtils::GetTuneDumpPath() {
|
|||
save_path = real_path;
|
||||
return save_path;
|
||||
}
|
||||
MS_LOG(EXCEPTION) << "Invalid env kTUNE_DUMP_PATH, path : " << save_path;
|
||||
MS_LOG(EXCEPTION) << "Invalid environment variable 'TUNE_DUMP_PATH', the path is " << save_path
|
||||
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
|
||||
"permission, (3) whether the path is too long. ";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
@ -107,7 +111,10 @@ std::string TbeUtils::GetOpDebugPath() {
|
|||
auto old_build = common::GetEnv("MS_OLD_BUILD_PROCESS");
|
||||
std::string config_path;
|
||||
if (!Common::CommonFuncForConfigPath("./", common::GetEnv(kCOMPILER_CACHE_PATH), &config_path)) {
|
||||
MS_LOG(EXCEPTION) << "Invalid env " << kCOMPILER_CACHE_PATH << " : " << common::GetEnv(kCOMPILER_CACHE_PATH);
|
||||
MS_LOG(EXCEPTION) << "Invalid environment variable 'MS_COMPILER_CACHE_PATH', the path is "
|
||||
<< common::GetEnv(kCOMPILER_CACHE_PATH)
|
||||
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
|
||||
"permission, (3) whether the path is too long. ";
|
||||
}
|
||||
if (!old_build.empty()) {
|
||||
if (config_path[config_path.length() - 1] == '/') {
|
||||
|
@ -171,11 +178,15 @@ nlohmann::json TbeUtils::GenSocInfo() {
|
|||
soc_info_json["deviceId"] = std::to_string(context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID));
|
||||
std::string config_path;
|
||||
if (!Common::CommonFuncForConfigPath("", common::GetEnv("OP_BANK_PATH"), &config_path)) {
|
||||
MS_LOG(EXCEPTION) << "Invalid env OP_BANK_PATH : " << common::GetEnv("OP_BANK_PATH");
|
||||
MS_LOG(EXCEPTION) << "Invalid environment variable 'OP_BANK_PATH', the path is " << common::GetEnv("OP_BANK_PATH")
|
||||
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
|
||||
"permission, (3) whether the path is too long. ";
|
||||
}
|
||||
soc_info_json["op_bank_path"] = config_path;
|
||||
if (!Common::CommonFuncForConfigPath("", common::GetEnv("MDL_BANK_PATH"), &config_path)) {
|
||||
MS_LOG(EXCEPTION) << "Invalid env MDL_BANK_PATH : " << common::GetEnv("MDL_BANK_PATH");
|
||||
MS_LOG(EXCEPTION) << "Invalid environment variable 'MDL_BANK_PATH', the path is " << common::GetEnv("MDL_BANK_PATH")
|
||||
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
|
||||
"permission, (3) whether the path is too long. ";
|
||||
}
|
||||
soc_info_json["mdl_bank_path"] = config_path;
|
||||
return soc_info_json;
|
||||
|
@ -438,7 +449,8 @@ bool TbeUtils::CheckOfflineTune() {
|
|||
offline_tune[j] = tolower(offline_tune[j]);
|
||||
}
|
||||
if (!(offline_tune == "true" || offline_tune == "false")) {
|
||||
MS_LOG(EXCEPTION) << "The value of ENABLE_TUNE_DUMP must be 'true' or 'false'";
|
||||
MS_LOG(ERROR) << "Invalid environment variable 'ENABLE_TUNE_DUMP', it should be 'true' or 'false', but got "
|
||||
<< offline_tune;
|
||||
}
|
||||
offline = (offline_tune == "true");
|
||||
}
|
||||
|
|
|
@ -2315,7 +2315,8 @@ void SessionBasic::UpdateAllGraphDynamicShapeAttr(const std::vector<KernelGraphP
|
|||
is_dynamic = graph->is_dynamic_shape() || is_dynamic;
|
||||
}
|
||||
if (is_dynamic && all_graphs.size() > 1) {
|
||||
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with control flow.";
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "Dynamic shape is not supported with control flow(loop control statements and condition control statements).";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2680,7 +2681,7 @@ void DumpGraphExeOrder(const std::string &file_name, const std::string &target_d
|
|||
std::string file_path = target_dir + "/execution_order/" + file_name;
|
||||
auto realpath = Common::CreatePrefixPath(file_path);
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Get real path: " << file_path << " filed.";
|
||||
MS_LOG(ERROR) << "Failed to get real path: [" << file_path << "] in dump graph execution order.";
|
||||
return;
|
||||
}
|
||||
file_path = realpath.value();
|
||||
|
@ -2689,7 +2690,9 @@ void DumpGraphExeOrder(const std::string &file_name, const std::string &target_d
|
|||
// write to csv file
|
||||
std::ofstream ofs(file_path);
|
||||
if (!ofs.is_open()) {
|
||||
MS_LOG(ERROR) << "Open file '" << file_path << "' failed!";
|
||||
MS_LOG(ERROR) << "Failed to open file [" << file_path
|
||||
<< "] in dump graph execution order, please check the file access permission and whether disk space "
|
||||
"is available.";
|
||||
return;
|
||||
}
|
||||
ofs << "NodeExecutionOrder-FullNameWithScope\n";
|
||||
|
|
|
@ -98,7 +98,7 @@ std::string GetRankIdStr() {
|
|||
std::string rank_id_str;
|
||||
rank_id_str = std::getenv("RANK_ID");
|
||||
if (rank_id_str.empty()) {
|
||||
MS_LOG(EXCEPTION) << "Get hccl rankid failed, please set env RANK_ID";
|
||||
MS_LOG(EXCEPTION) << "Invalid environment variable 'RANK_ID', it should not be empty.";
|
||||
}
|
||||
return rank_id_str;
|
||||
}
|
||||
|
@ -441,7 +441,7 @@ bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph *graph) {
|
|||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
auto dynamic_kernel = kernel_mod->GenDynamicKernel(cnode, stream_);
|
||||
if (dynamic_kernel == nullptr) {
|
||||
MS_LOG(EXCEPTION) << cnode->fullname_with_scope() << " does not support dynamic shape.";
|
||||
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with the operator [" << AnfAlgo::GetCNodeName(cnode) << "].";
|
||||
}
|
||||
dynamic_kernel->Initialize();
|
||||
dynamic_kernels.emplace_back(dynamic_kernel);
|
||||
|
@ -456,10 +456,10 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
|
|||
SetCurrentContext();
|
||||
if (graph->is_dynamic_shape()) {
|
||||
if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE && (ConfigManager::GetInstance().iter_num() > 1)) {
|
||||
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with sink mode.";
|
||||
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with dataset_sink_mode.";
|
||||
}
|
||||
if (DumpJsonParser::GetInstance().async_dump_enabled()) {
|
||||
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with asyn dump. Please use other debugging methods.";
|
||||
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with Asynchronous Dump. Please use Synchronous Dump.";
|
||||
}
|
||||
MS_LOG(INFO) << "Dynamic Shape Graph Generate Dynamic kernel";
|
||||
return GenDynamicKernel(graph);
|
||||
|
|
|
@ -57,7 +57,7 @@ void AscendMemoryManager::MallocDeviceMemory() {
|
|||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
unsigned int device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
MS_LOG(EXCEPTION) << "Malloc device memory failed, size[" << device_mem_size_ << "], ret[" << ret << "]"
|
||||
MS_LOG(EXCEPTION) << "Malloc device memory failed, size[" << device_mem_size_ << "], ret[" << ret << "], "
|
||||
<< "Device " << device_id
|
||||
<< " may be other processes occupying this card, check as: ps -ef|grep python";
|
||||
} else {
|
||||
|
@ -93,8 +93,10 @@ uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() {
|
|||
auto total_hbm_size_GB = GetDeviceHBMSize() >> kMemSizeGB;
|
||||
auto backend_max_size_GB = total_hbm_size_GB - 1; // reserved 1 GB for other component
|
||||
if (gb_var > backend_max_size_GB || gb_var == 0) {
|
||||
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << total_hbm_size_GB << " GB, variable_memory_max_size "
|
||||
<< gb_var << " GB is out of range (0-" << backend_max_size_GB << "]GB";
|
||||
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << total_hbm_size_GB
|
||||
<< " GB, variable_memory_max_size should be in range (0-" << backend_max_size_GB
|
||||
<< "]GB, but got " << gb_var
|
||||
<< "GB, please set the context key 'variable_memory_max_size' in valid range.";
|
||||
}
|
||||
return gb_var << kMemSizeGB;
|
||||
}
|
||||
|
|
|
@ -624,7 +624,8 @@ void AscendStreamAssign::AssignAllNodesStream(const NotNull<KernelGraphPtr> &gra
|
|||
<< ", independent stream number: " << independent_stream_num << ".";
|
||||
|
||||
if (total_stream_num > kMaxStreamNum) {
|
||||
MS_LOG(EXCEPTION) << "Total stream number " << total_stream_num << " exceeds the limit of " << kMaxStreamNum << ".";
|
||||
MS_LOG(EXCEPTION) << "Total stream number " << total_stream_num << " exceeds the limit of " << kMaxStreamNum
|
||||
<< ", search details information in mindspore's FAQ.";
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "After stream assign, total stream nums:" << resource_manager.get_cur_stream_num();
|
||||
|
|
|
@ -504,12 +504,12 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
|
|||
if (select_status == kNoMatched) {
|
||||
std::ostringstream buffer;
|
||||
PrintInputAndOutputInferType(buffer, kernel_node);
|
||||
MS_LOG(WARNING) << ">>> Candidates kernel info list:";
|
||||
MS_LOG(WARNING) << ">>> Candidates supported kernel info(input and output data type) list:";
|
||||
for (size_t index = 0; index < kernel_info_list.size(); ++index) {
|
||||
MS_LOG(WARNING) << "Kernel [" << index << "] :" << kernel_info_list[index]->ToString();
|
||||
MS_LOG(WARNING) << "Kernel info [" << index << "] :" << kernel_info_list[index]->ToString();
|
||||
}
|
||||
for (size_t index = 0; index < aicpu_kernel_info_list.size(); ++index) {
|
||||
MS_LOG(WARNING) << "Kernel [" << (kernel_info_list.size() + index)
|
||||
MS_LOG(WARNING) << "Kernel info [" << (kernel_info_list.size() + index)
|
||||
<< "] :" << aicpu_kernel_info_list[index]->ToString();
|
||||
}
|
||||
if (IsPrimitiveCNode(kernel_node, prim::kPrimLabelSwitch)) {
|
||||
|
@ -519,10 +519,11 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
|
|||
SetTensorDeviceInfo(kernel_node);
|
||||
} else {
|
||||
MS_LOG(WARNING) << " <<<";
|
||||
MS_EXCEPTION(TypeError) << "The node [" << kernel_node->DebugString()
|
||||
<< "] cannot find valid kernel info, not supported the type:" << buffer.str()
|
||||
<< ", please refer to the supported dtypes in candidates kernel info list."
|
||||
<< " trace: " << trace::DumpSourceLines(kernel_node);
|
||||
MS_EXCEPTION(TypeError)
|
||||
<< "The operator [" << AnfAlgo::GetCNodeName(kernel_node)
|
||||
<< "] cannot find valid kernel info(input and output data type), not supported the data type: " << buffer.str()
|
||||
<< ", please refer to the supported data types in candidates kernel info list."
|
||||
<< " trace: " << trace::DumpSourceLines(kernel_node) << ", Node DebugString: " << kernel_node->DebugString();
|
||||
}
|
||||
}
|
||||
return select_status;
|
||||
|
|
Loading…
Reference in New Issue