forked from mindspore-Ecosystem/mindspore
!618 clean ascend runtime codex
Merge pull request !618 from caifubi/clean-runtime-codex
This commit is contained in:
commit
bfab0bc856
|
@ -453,25 +453,26 @@ bool AscendKernelRuntime::HcclInit() {
|
|||
}
|
||||
|
||||
MS_LOG(INFO) << "do hcom init";
|
||||
std::string path;
|
||||
const char *config_path_str = std::getenv("MINDSPORE_HCCL_CONFIG_PATH");
|
||||
if (config_path_str == nullptr) {
|
||||
MS_LOG(ERROR) << "get hccl json config failed, please set env MINDSPORE_HCCL_CONFIG_PATH";
|
||||
return false;
|
||||
}
|
||||
path = config_path_str;
|
||||
char fullPath[PATH_MAX] = {0};
|
||||
if (path.size() > PATH_MAX || realpath(path.c_str(), fullPath) == nullptr) {
|
||||
MS_LOG(ERROR) << "file " << path << " is not exist";
|
||||
auto full_path = realpath(config_path_str, nullptr);
|
||||
if (full_path == nullptr) {
|
||||
MS_LOG(ERROR) << "file path " << config_path_str << " does not exist";
|
||||
return false;
|
||||
}
|
||||
|
||||
const char *identify = std::getenv("RANK_ID");
|
||||
if (identify == nullptr) {
|
||||
MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID";
|
||||
free(full_path);
|
||||
return false;
|
||||
}
|
||||
MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << fullPath << ", RANK_ID: " << identify;
|
||||
hcclResult_t res = hcom_init(fullPath, identify);
|
||||
MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << identify;
|
||||
hcclResult_t res = hcom_init(full_path, identify);
|
||||
free(full_path);
|
||||
if (res != HCCL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "hcom init failed, res is " << static_cast<int>(res);
|
||||
return false;
|
||||
|
|
|
@ -33,7 +33,7 @@ constexpr char kIterEndNode[] = "PROFILING_ITER_END";
|
|||
std::unordered_map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
|
||||
uint32_t ProfilingUtils::custom_node_index_ = 1;
|
||||
|
||||
ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr) {
|
||||
ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(const NotNull<session::KernelGraph *> graph_ptr) {
|
||||
MS_LOG(INFO) << "get env start";
|
||||
custom_node_index_ = 1;
|
||||
auto &cnode_exec_order = graph_ptr->execution_order();
|
||||
|
|
|
@ -94,7 +94,7 @@ class ProfilingUtils {
|
|||
// And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode'
|
||||
// GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode'
|
||||
// The variable i in PROFILING_CUSTOM_i should start from 1 without interruption.
|
||||
static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr);
|
||||
static ProfilingTraceInfo GetProfilingTraceFromEnv(const NotNull<session::KernelGraph *> graph_ptr);
|
||||
|
||||
// Insert two profiling trace points, one in front and one behind
|
||||
static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
||||
|
|
|
@ -121,8 +121,10 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
|
|||
LaunchAddrCleanKernel(anf_node_ptr, &kernel_inputs);
|
||||
}
|
||||
|
||||
std::vector<TaskInfoPtr> task_info_ptrs = dynamic_cast<kernel::AscendKernelMod *>(kernel_mod)
|
||||
->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id);
|
||||
auto ascend_kernel_mod = dynamic_cast<kernel::AscendKernelMod *>(kernel_mod);
|
||||
MS_EXCEPTION_IF_NULL(ascend_kernel_mod);
|
||||
std::vector<TaskInfoPtr> task_info_ptrs =
|
||||
ascend_kernel_mod->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id);
|
||||
task_info_list->insert(task_info_list->end(), task_info_ptrs.begin(), task_info_ptrs.end());
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -136,7 +136,7 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector<hcclDataTyp
|
|||
}
|
||||
}
|
||||
|
||||
if (total_size % type_size != 0) {
|
||||
if (type_size == 0 || total_size % type_size != 0) {
|
||||
MS_LOG(ERROR) << "Total_size[" << total_size << "],Type_size[" << type_size << "] != 0, fail!";
|
||||
return false;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue