!23245 Rectify the error trustlist

Merge pull request !23245 from hwjiaorui/error-trustlist
This commit is contained in:
i-robot 2021-09-11 01:12:30 +00:00 committed by Gitee
commit f18b986860
9 changed files with 55 additions and 31 deletions

View File

@ -127,7 +127,7 @@ void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel
}
if (kernel_info_list->empty()) {
MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for op ["
MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for operator ["
<< AnfAlgo::GetCNodeName(kernel_node) << ", " << kernel_node->fullname_with_scope()
<< "]. Node DebugString:" << kernel_node->DebugString()
<< ", maybe the operator can not supported on current platform. \n trace "

View File

@ -146,8 +146,8 @@ uint32_t GetProcessNum() {
try {
process_num = UlongToUint(std::stoul(env_process_num));
} catch (std::invalid_argument &e) {
MS_LOG(EXCEPTION) << "Invalid MS_BUILD_PROCESS_NUM env:" << env_process_num
<< ". Please set the value of MS_BUILD_PROCESS_NUM in [0, 24]";
MS_LOG(EXCEPTION) << "Invalid environment variable 'MS_BUILD_PROCESS_NUM', it should be in [1, 24], but got "
<< env_process_num;
}
}
return process_num;
@ -177,7 +177,9 @@ std::string GetParaDebugPath() {
if (realpath(save_path.c_str(), real_path)) {
save_path = real_path;
} else {
MS_LOG(EXCEPTION) << "Invalid env PARA_DEBUG_PATH, path : " << save_path;
MS_LOG(EXCEPTION) << "Invalid environment variable 'PARA_DEBUG_PATH', the path is " << save_path
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
"permission, (3) whether the path is too long.";
}
} else {
save_path = "";
@ -192,7 +194,9 @@ std::string GetTbePath() {
if (realpath(save_path.c_str(), real_path)) {
save_path = real_path;
} else {
MS_LOG(EXCEPTION) << "Invalid env TBE_IMPL_PATH, path : " << save_path;
MS_LOG(EXCEPTION) << "Invalid environment variable 'TBE_IMPL_PATH', the path is " << save_path
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
"permission, (3) whether the path is too long. ";
}
} else {
if (realpath(kDefPath, real_path)) {
@ -624,7 +628,7 @@ bool AscendKernelCompileManager::AscendSingleOpCompile(const std::vector<AnfNode
} else {
ResetOldTask();
single_processed_kernels_.clear();
MS_LOG(EXCEPTION) << "Kernel compile failed, op [" << op_name << "], build result: " << build_result;
MS_LOG(EXCEPTION) << "Kernel compile failed, operator [" << op_name << "], build result: " << build_result;
}
}
QueryFinishJob(job_type);

View File

@ -48,7 +48,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
offline_tune[j] = tolower(offline_tune[j]);
}
if (!(offline_tune == "true" || offline_tune == "false")) {
MS_LOG(ERROR) << "The value of ENABLE_TUNE_DUMP must be 'true' or 'false'";
MS_LOG(ERROR) << "Invalid environment variable 'ENABLE_TUNE_DUMP', it should be 'true' or 'false', but got "
<< tune_mode;
return false;
}
}

View File

@ -80,7 +80,9 @@ std::string TbeUtils::GetBankPath() {
save_path = real_path;
return save_path;
}
MS_LOG(EXCEPTION) << "Invalid env TUNE_BANK_PATH, path : " << save_path;
MS_LOG(EXCEPTION) << "Invalid environment variable 'TUNE_BANK_PATH', the path is " << save_path
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
"permission, (3) whether the path is too long. ";
}
return "";
}
@ -94,7 +96,9 @@ std::string TbeUtils::GetTuneDumpPath() {
save_path = real_path;
return save_path;
}
MS_LOG(EXCEPTION) << "Invalid env kTUNE_DUMP_PATH, path : " << save_path;
MS_LOG(EXCEPTION) << "Invalid environment variable 'TUNE_DUMP_PATH', the path is " << save_path
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
"permission, (3) whether the path is too long. ";
}
return "";
}
@ -107,7 +111,10 @@ std::string TbeUtils::GetOpDebugPath() {
auto old_build = common::GetEnv("MS_OLD_BUILD_PROCESS");
std::string config_path;
if (!Common::CommonFuncForConfigPath("./", common::GetEnv(kCOMPILER_CACHE_PATH), &config_path)) {
MS_LOG(EXCEPTION) << "Invalid env " << kCOMPILER_CACHE_PATH << " : " << common::GetEnv(kCOMPILER_CACHE_PATH);
MS_LOG(EXCEPTION) << "Invalid environment variable 'MS_COMPILER_CACHE_PATH', the path is "
<< common::GetEnv(kCOMPILER_CACHE_PATH)
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
"permission, (3) whether the path is too long. ";
}
if (!old_build.empty()) {
if (config_path[config_path.length() - 1] == '/') {
@ -171,11 +178,15 @@ nlohmann::json TbeUtils::GenSocInfo() {
soc_info_json["deviceId"] = std::to_string(context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID));
std::string config_path;
if (!Common::CommonFuncForConfigPath("", common::GetEnv("OP_BANK_PATH"), &config_path)) {
MS_LOG(EXCEPTION) << "Invalid env OP_BANK_PATH : " << common::GetEnv("OP_BANK_PATH");
MS_LOG(EXCEPTION) << "Invalid environment variable 'OP_BANK_PATH', the path is " << common::GetEnv("OP_BANK_PATH")
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
"permission, (3) whether the path is too long. ";
}
soc_info_json["op_bank_path"] = config_path;
if (!Common::CommonFuncForConfigPath("", common::GetEnv("MDL_BANK_PATH"), &config_path)) {
MS_LOG(EXCEPTION) << "Invalid env MDL_BANK_PATH : " << common::GetEnv("MDL_BANK_PATH");
MS_LOG(EXCEPTION) << "Invalid environment variable 'MDL_BANK_PATH', the path is " << common::GetEnv("MDL_BANK_PATH")
<< ". Please check (1) whether the path exists, (2) whether the path has the access "
"permission, (3) whether the path is too long. ";
}
soc_info_json["mdl_bank_path"] = config_path;
return soc_info_json;
@ -438,7 +449,8 @@ bool TbeUtils::CheckOfflineTune() {
offline_tune[j] = tolower(offline_tune[j]);
}
if (!(offline_tune == "true" || offline_tune == "false")) {
MS_LOG(EXCEPTION) << "The value of ENABLE_TUNE_DUMP must be 'true' or 'false'";
MS_LOG(ERROR) << "Invalid environment variable 'ENABLE_TUNE_DUMP', it should be 'true' or 'false', but got "
<< offline_tune;
}
offline = (offline_tune == "true");
}

View File

@ -2315,7 +2315,8 @@ void SessionBasic::UpdateAllGraphDynamicShapeAttr(const std::vector<KernelGraphP
is_dynamic = graph->is_dynamic_shape() || is_dynamic;
}
if (is_dynamic && all_graphs.size() > 1) {
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with control flow.";
MS_LOG(EXCEPTION)
<< "Dynamic shape is not supported with control flow(loop control statements and condition control statements).";
}
}
@ -2680,7 +2681,7 @@ void DumpGraphExeOrder(const std::string &file_name, const std::string &target_d
std::string file_path = target_dir + "/execution_order/" + file_name;
auto realpath = Common::CreatePrefixPath(file_path);
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Get real path: " << file_path << " filed.";
MS_LOG(ERROR) << "Failed to get real path: [" << file_path << "] in dump graph execution order.";
return;
}
file_path = realpath.value();
@ -2689,7 +2690,9 @@ void DumpGraphExeOrder(const std::string &file_name, const std::string &target_d
// write to csv file
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
MS_LOG(ERROR) << "Open file '" << file_path << "' failed!";
MS_LOG(ERROR) << "Failed to open file [" << file_path
<< "] in dump graph execution order, please check the file access permission and whether disk space "
"is available.";
return;
}
ofs << "NodeExecutionOrder-FullNameWithScope\n";

View File

@ -98,7 +98,7 @@ std::string GetRankIdStr() {
std::string rank_id_str;
rank_id_str = std::getenv("RANK_ID");
if (rank_id_str.empty()) {
MS_LOG(EXCEPTION) << "Get hccl rankid failed, please set env RANK_ID";
MS_LOG(EXCEPTION) << "Invalid environment variable 'RANK_ID', it should not be empty.";
}
return rank_id_str;
}
@ -441,7 +441,7 @@ bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph *graph) {
MS_EXCEPTION_IF_NULL(kernel_mod);
auto dynamic_kernel = kernel_mod->GenDynamicKernel(cnode, stream_);
if (dynamic_kernel == nullptr) {
MS_LOG(EXCEPTION) << cnode->fullname_with_scope() << " does not support dynamic shape.";
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with the operator [" << AnfAlgo::GetCNodeName(cnode) << "].";
}
dynamic_kernel->Initialize();
dynamic_kernels.emplace_back(dynamic_kernel);
@ -456,10 +456,10 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
SetCurrentContext();
if (graph->is_dynamic_shape()) {
if (ConfigManager::GetInstance().dataset_mode() == DS_SINK_MODE && (ConfigManager::GetInstance().iter_num() > 1)) {
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with sink mode.";
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with dataset_sink_mode.";
}
if (DumpJsonParser::GetInstance().async_dump_enabled()) {
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with asyn dump. Please use other debugging methods.";
MS_LOG(EXCEPTION) << "Dynamic shape is not supported with Asynchronous Dump. Please use Synchronous Dump.";
}
MS_LOG(INFO) << "Dynamic Shape Graph Generate Dynamic kernel";
return GenDynamicKernel(graph);

View File

@ -57,7 +57,7 @@ void AscendMemoryManager::MallocDeviceMemory() {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
unsigned int device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
MS_LOG(EXCEPTION) << "Malloc device memory failed, size[" << device_mem_size_ << "], ret[" << ret << "]"
MS_LOG(EXCEPTION) << "Malloc device memory failed, size[" << device_mem_size_ << "], ret[" << ret << "], "
<< "Device " << device_id
<< " may be other processes occupying this card, check as: ps -ef|grep python";
} else {
@ -93,8 +93,10 @@ uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() {
auto total_hbm_size_GB = GetDeviceHBMSize() >> kMemSizeGB;
auto backend_max_size_GB = total_hbm_size_GB - 1; // reserved 1 GB for other component
if (gb_var > backend_max_size_GB || gb_var == 0) {
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << total_hbm_size_GB << " GB, variable_memory_max_size "
<< gb_var << " GB is out of range (0-" << backend_max_size_GB << "]GB";
MS_LOG(EXCEPTION) << "The Total Device Memory Size is " << total_hbm_size_GB
<< " GB, variable_memory_max_size should be in range (0-" << backend_max_size_GB
<< "]GB, but got " << gb_var
<< "GB, please set the context key 'variable_memory_max_size' in valid range.";
}
return gb_var << kMemSizeGB;
}

View File

@ -624,7 +624,8 @@ void AscendStreamAssign::AssignAllNodesStream(const NotNull<KernelGraphPtr> &gra
<< ", independent stream number: " << independent_stream_num << ".";
if (total_stream_num > kMaxStreamNum) {
MS_LOG(EXCEPTION) << "Total stream number " << total_stream_num << " exceeds the limit of " << kMaxStreamNum << ".";
MS_LOG(EXCEPTION) << "Total stream number " << total_stream_num << " exceeds the limit of " << kMaxStreamNum
<< ", search details information in mindspore's FAQ.";
}
MS_LOG(INFO) << "After stream assign, total stream nums:" << resource_manager.get_cur_stream_num();

View File

@ -504,12 +504,12 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
if (select_status == kNoMatched) {
std::ostringstream buffer;
PrintInputAndOutputInferType(buffer, kernel_node);
MS_LOG(WARNING) << ">>> Candidates kernel info list:";
MS_LOG(WARNING) << ">>> Candidates supported kernel info(input and output data type) list:";
for (size_t index = 0; index < kernel_info_list.size(); ++index) {
MS_LOG(WARNING) << "Kernel [" << index << "] :" << kernel_info_list[index]->ToString();
MS_LOG(WARNING) << "Kernel info [" << index << "] :" << kernel_info_list[index]->ToString();
}
for (size_t index = 0; index < aicpu_kernel_info_list.size(); ++index) {
MS_LOG(WARNING) << "Kernel [" << (kernel_info_list.size() + index)
MS_LOG(WARNING) << "Kernel info [" << (kernel_info_list.size() + index)
<< "] :" << aicpu_kernel_info_list[index]->ToString();
}
if (IsPrimitiveCNode(kernel_node, prim::kPrimLabelSwitch)) {
@ -519,10 +519,11 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
SetTensorDeviceInfo(kernel_node);
} else {
MS_LOG(WARNING) << " <<<";
MS_EXCEPTION(TypeError) << "The node [" << kernel_node->DebugString()
<< "] cannot find valid kernel info, not supported the type:" << buffer.str()
<< ", please refer to the supported dtypes in candidates kernel info list."
<< " trace: " << trace::DumpSourceLines(kernel_node);
MS_EXCEPTION(TypeError)
<< "The operator [" << AnfAlgo::GetCNodeName(kernel_node)
<< "] cannot find valid kernel info(input and output data type), not supported the data type: " << buffer.str()
<< ", please refer to the supported data types in candidates kernel info list."
<< " trace: " << trace::DumpSourceLines(kernel_node) << ", Node DebugString: " << kernel_node->DebugString();
}
}
return select_status;