forked from mindspore-Ecosystem/mindspore
!26722 vm err log modify
Merge pull request !26722 from zhaosida/code_clean_master
This commit is contained in:
commit
a78b4fd60f
|
@ -127,7 +127,8 @@ std::pair<bool, size_t> CPUKernelFactory::CPUKernelAttrCheck(const std::string &
|
|||
if (kernel_attrs[0].GetInputSize() == 0 && kernel_attrs[0].GetOutputSize() == 0) {
|
||||
auto op_info_ptr = mindspore::kernel::OpLib::FindOp(kernel_name, kernel::OpImplyType::kCPU);
|
||||
if (op_info_ptr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Not find op[" << kernel_name << "] in cpu";
|
||||
MS_LOG(EXCEPTION) << "Not find op[" << kernel_name << "] in cpu. For more details, "
|
||||
<< "please refer to the list of supported cpu operations at https://www.mindspore.cn.";
|
||||
}
|
||||
kernel_attrs.clear();
|
||||
SetKernelAttrs(op_info_ptr, &kernel_attrs);
|
||||
|
|
|
@ -172,7 +172,8 @@ bool OpLib::RegOpFromLocalInfo() {
|
|||
}
|
||||
char real_path[PATH_MAX] = {0};
|
||||
if (dir.size() >= PATH_MAX) {
|
||||
MS_LOG(ERROR) << "Op info path is invalid: " << dir;
|
||||
MS_LOG(ERROR) << "Invalid environment variable 'MINDSPORE_OP_INFO_PATH', the path length should be smaller than "
|
||||
<< PATH_MAX << ", but got " << dir;
|
||||
return false;
|
||||
}
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
|
@ -182,11 +183,14 @@ bool OpLib::RegOpFromLocalInfo() {
|
|||
}
|
||||
#else
|
||||
if (realpath(common::SafeCStr(dir), real_path) == nullptr) {
|
||||
MS_LOG(ERROR) << "Op info path is invalid: " << dir;
|
||||
MS_LOG(ERROR) << "Invalid environment variable 'MINDSPORE_OP_INFO_PATH', the path is: " << dir
|
||||
<< ". Please check (1) whether the path exists, (2) whether the path has the access permission, "
|
||||
<< "(3) whether the path is too long. ";
|
||||
return false;
|
||||
}
|
||||
if (strlen(real_path) >= PATH_MAX) {
|
||||
MS_LOG(ERROR) << "Op info path is invalid, the absolute path length is greater than PATH_MAX";
|
||||
MS_LOG(ERROR) << "Invalid environment variable 'MINDSPORE_OP_INFO_PATH', the absolute path length should be smaller"
|
||||
<< " than " << PATH_MAX << ", but got " << real_path;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -183,13 +183,13 @@ uint32_t GetProcessNum() {
|
|||
auto env_process_num = common::GetEnv(kMS_BUILD_PROCESS_NUM);
|
||||
if (!env_process_num.empty()) {
|
||||
if (!IsDigit(env_process_num)) {
|
||||
MS_LOG(EXCEPTION) << "Invalid environment of 'MS_BUILD_PROCESS_NUM',it should be a digit, but got: "
|
||||
MS_LOG(EXCEPTION) << "Invalid environment variable 'MS_BUILD_PROCESS_NUM',it should be a digit, but got: "
|
||||
<< env_process_num;
|
||||
}
|
||||
process_num = UlongToUint(std::stoul(env_process_num));
|
||||
if (process_num < 1 || process_num > kDEFAULT_PROCESS_NUM) {
|
||||
MS_LOG(EXCEPTION) << "Invalid environment of 'MS_BUILD_PROCESS_NUM', the value should be in [1, 24], but got: "
|
||||
<< process_num;
|
||||
MS_LOG(EXCEPTION) << "Invalid environment variable 'MS_BUILD_PROCESS_NUM', the value should be in [1, 24], "
|
||||
<< "but got: " << process_num;
|
||||
}
|
||||
}
|
||||
return process_num;
|
||||
|
@ -229,8 +229,9 @@ std::string GetTbePath() {
|
|||
} else if (realpath(kBkPath, real_path)) {
|
||||
save_path = real_path;
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Can not get access to [" << kDefPath << "] or [" << kBkPath
|
||||
<< "]. Check if the path exist, or use 'chmod {level} xxx' to change the path's access.";
|
||||
MS_LOG(WARNING) << "Invalid path is [" << kDefPath << "] or [" << kBkPath << "]. "
|
||||
<< "Please check (1) whether the path exists, (2) whether the path has the access "
|
||||
<< "permission, (3) whether the path is too long. ";
|
||||
}
|
||||
}
|
||||
return save_path;
|
||||
|
|
|
@ -178,7 +178,9 @@ void TbeUtils::SaveJsonInfo(const std::string &json_name, const std::string &inf
|
|||
std::string path = config_path + kCceKernelMeta + json_name + kInfoSuffix;
|
||||
auto realpath = Common::CreatePrefixPath(path);
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(WARNING) << "Get real path failed, invalid path: " << realpath.value();
|
||||
MS_LOG(WARNING) << "Invalid path is: " << realpath.value()
|
||||
<< "Please check (1) whether the path exists, (2) whether the path has the access "
|
||||
<< "permission, (3) whether the path is too long. ";
|
||||
return;
|
||||
}
|
||||
ChangeFileMode(realpath.value(), S_IWUSR);
|
||||
|
@ -391,7 +393,7 @@ void TbeUtils::GetCompileInfo(const AnfNodePtr &node, std::string *compile_info,
|
|||
auto config_path = TbeUtils::GetOpDebugPath();
|
||||
std::string path = config_path + kCceKernelMeta + json_name + kJsonSuffix;
|
||||
if (path.size() > PATH_MAX) {
|
||||
MS_LOG(WARNING) << "File path: " << path << "is too long.";
|
||||
MS_LOG(WARNING) << "File path length should be smaller than " << PATH_MAX << ", but got " << path;
|
||||
*get_flag = false;
|
||||
return;
|
||||
}
|
||||
|
@ -417,7 +419,7 @@ void TbeUtils::SaveCompileInfo(const std::string &json_name, const std::string &
|
|||
auto config_path = TbeUtils::GetOpDebugPath();
|
||||
std::string path = config_path + kCceKernelMeta + json_name + kJsonSuffix;
|
||||
if (path.size() > PATH_MAX) {
|
||||
MS_LOG(WARNING) << "File path: " << path << "is too long.";
|
||||
MS_LOG(WARNING) << "File path length should be smaller than " << PATH_MAX << ", but got " << path;
|
||||
*save_flag = false;
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -2450,8 +2450,9 @@ void PreProcessOnSplitIndex(const KernelGraphPtr &graph, vector<uint32_t> *split
|
|||
// obtain graph output tensor num
|
||||
auto grads_count = GetBpropGraphGradsCount(graph);
|
||||
if (split_index_num >= grads_count) {
|
||||
MS_LOG(WARNING) << "Invalid all_reduce_fusion_config:" << *split_index << " total grads count:" << grads_count
|
||||
<< ". All AllReduce operators will be fused into one.";
|
||||
MS_LOG(WARNING) << "The context configuration all_reduce_fusion_config's upper boundary value should be smaller "
|
||||
<< "than total grads count: " << grads_count << ", but got: " << *split_index
|
||||
<< ". Now all AllReduce operations will be fused into one AllReduce operation.";
|
||||
split_index->clear();
|
||||
split_index->push_back(grads_count - 1);
|
||||
} else if (split_index_num < grads_count - 1) {
|
||||
|
|
|
@ -673,7 +673,7 @@ std::string AscendKernelRuntime::GetDumpPath() {
|
|||
MS_EXCEPTION_IF_NULL(inst);
|
||||
if (inst->parallel_mode() != parallel::STAND_ALONE) {
|
||||
if (!CommManager::GetInstance().GetRankID(kHcclWorldGroup, &rank_id)) {
|
||||
MS_LOG(WARNING) << "Get rank id failed.";
|
||||
MS_LOG(WARNING) << "Get rank id failed, now using the default value 0.";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -681,7 +681,8 @@ std::string AscendKernelRuntime::GetDumpPath() {
|
|||
std::string path;
|
||||
const auto kSuffix = "/node_dump";
|
||||
if (ms_om_path.empty()) {
|
||||
MS_LOG(WARNING) << "MS_OM_PATH is null, so dump to process local path, as ./rank_id/node_dump/...";
|
||||
MS_LOG(WARNING) << "The environment variable 'MS_OM_PATH' is not set, the files of node dump will save to the "
|
||||
<< "process local path, as ./rank_id/node_dump/...";
|
||||
path = "./rank_" + std::to_string(rank_id) + kSuffix;
|
||||
} else {
|
||||
path = ms_om_path + "/rank_" + std::to_string(rank_id) + kSuffix;
|
||||
|
@ -1083,18 +1084,22 @@ bool AscendKernelRuntime::HcclInit() {
|
|||
if (config_path_str == nullptr) {
|
||||
config_path_str = std::getenv("RANK_TABLE_FILE");
|
||||
if (config_path_str == nullptr) {
|
||||
MS_LOG(ERROR) << "Get hccl json config failed, please set env MINDSPORE_HCCL_CONFIG_PATH or RANK_TABLE_FILE";
|
||||
MS_LOG(ERROR) << "The environment variable 'MINDSPORE_HCCL_CONFIG_PATH' or 'RANK_TABLE_FILE' is not set, so get"
|
||||
<< " hccl json config failed, please set env 'MINDSPORE_HCCL_CONFIG_PATH' or 'RANK_TABLE_FILE'";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (strlen(config_path_str) >= kPathMax) {
|
||||
MS_LOG(ERROR) << "File path oversize";
|
||||
MS_LOG(ERROR) << "Invalid environment variable 'MINDSPORE_HCCL_CONFIG_PATH' or 'RANK_TABLE_FILE', the path length"
|
||||
<< " should be smaller than " << kPathMax << ", but got " << config_path_str;
|
||||
return false;
|
||||
}
|
||||
std::string rank_id_str = GetRankIdStr();
|
||||
auto full_path = realpath(config_path_str, nullptr);
|
||||
if (full_path == nullptr) {
|
||||
MS_LOG(ERROR) << "File path " << config_path_str << " does not exist";
|
||||
MS_LOG(ERROR) << "Invalid environment variable 'MINDSPORE_HCCL_CONFIG_PATH' or 'RANK_TABLE_FILE', the path is: "
|
||||
<< config_path_str << ". Please check (1) whether the path exists, "
|
||||
<< "(2) whether the path has the access permission, (3) whether the path is too long. ";
|
||||
return false;
|
||||
}
|
||||
MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << rank_id_str;
|
||||
|
|
|
@ -486,7 +486,8 @@ KernelSelectStatus SelectCustomKernelInfo(const CNodePtr &kernel_node, KernelTyp
|
|||
} else if (kCustomTypeAkg.find(func_type) != kCustomTypeAkg.end()) {
|
||||
*kernel_type = KernelType::AKG_KERNEL;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Unsupported func type [" << func_type << "] for Custom op [" << op_name << "] on Ascend";
|
||||
MS_LOG(EXCEPTION) << "Unsupported func type for Custom op on Ascend, it should be 'tbe', 'ir_builder', "
|
||||
<< "'tvm_compute' or 'hybrid', but got [" << func_type << "] for Custom op [" << op_name << "]";
|
||||
}
|
||||
kernel::OpImplyType imply_type =
|
||||
*kernel_type == KernelType::TBE_KERNEL ? kernel::OpImplyType::kTBE : kernel::OpImplyType::kAKG;
|
||||
|
@ -496,7 +497,9 @@ KernelSelectStatus SelectCustomKernelInfo(const CNodePtr &kernel_node, KernelTyp
|
|||
return kNoMatched;
|
||||
}
|
||||
// If Custom op has not set reg info, then infer info from inputs
|
||||
MS_LOG(WARNING) << "Not find operator information for op[" << op_name << "]. Infer operator information from inputs.";
|
||||
MS_LOG(WARNING) << "Not find operator information for Custom op[" << op_name << "]. "
|
||||
<< "Infer operator information from inputs. For more details, "
|
||||
<< "please refer to 'mindspore.ops.Custom' at https://www.mindspore.cn.";
|
||||
auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
|
||||
builder->SetKernelType(*kernel_type);
|
||||
builder->SetProcessor(kernel::Processor::AICORE);
|
||||
|
|
|
@ -431,13 +431,15 @@ void SetKernelInfo(const CNodePtr &kernel_node) {
|
|||
kernel::CPUKernelRegistrar(op_name, KernelAttr(),
|
||||
[]() { return std::make_shared<kernel::CustomAOTCpuKernel>(); });
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Unsupported func type [" << tp << "] for Custom op [" << op_name << "] on CPU";
|
||||
MS_LOG(EXCEPTION) << "Unsupported func type for Custom op on CPU, it should be 'pyfunc' or 'aot', but got ["
|
||||
<< tp << "] for Custom op [" << op_name << "]";
|
||||
}
|
||||
}
|
||||
// If Custom op has not set reg info, then infer info from inputs
|
||||
if (mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kCPU) == nullptr) {
|
||||
MS_LOG(WARNING) << "Not find operator information for op[" << op_name
|
||||
<< "]. Infer operator information from inputs.";
|
||||
MS_LOG(WARNING) << "Not find operator information for Custom op[" << op_name << "]. "
|
||||
<< "Infer operator information from inputs. For more details, "
|
||||
<< "please refer to 'mindspore.ops.Custom' at https://www.mindspore.cn.";
|
||||
return UpdateCustomKernelBuildInfoAndAttrs(kernel_node);
|
||||
}
|
||||
} else if (IsDynamicParamKernel(op_name)) {
|
||||
|
@ -458,7 +460,8 @@ void SetKernelInfo(const CNodePtr &kernel_node) {
|
|||
MS_LOG(DEBUG) << "Operator[" << op_name << "] will get ops attr info.";
|
||||
auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kCPU);
|
||||
if (op_info_ptr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Not find op[" << op_name << "] in cpu";
|
||||
MS_LOG(EXCEPTION) << "Not find op[" << op_name << "] in cpu. For more details, "
|
||||
<< "please refer to the list of supported cpu operations at https://www.mindspore.cn.";
|
||||
}
|
||||
kernel_attrs.clear();
|
||||
kernel::CPUKernelFactory::GetInstance().SetKernelAttrs(op_info_ptr, &kernel_attrs);
|
||||
|
|
|
@ -44,7 +44,7 @@ static std::map<std::string, std::string> GenHcclOptions(uint32_t device_id, std
|
|||
std::string_view rank_file) {
|
||||
auto env_deploy_mode = mindspore::common::GetEnv(kHcclDeployModeEnv);
|
||||
if (env_deploy_mode.empty()) {
|
||||
MS_LOG(WARNING) << kHcclDeployModeEnv << " is not set in ENV. Now set to default value 0";
|
||||
MS_LOG(WARNING) << "The environment variable 'DEPLOY_MODE' is not set. Now set to default value 0";
|
||||
env_deploy_mode = "0";
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue