new hccl interface

Signed-off-by: zhoufeng <zhoufeng54@huawei.com>
This commit is contained in:
zhoufeng 2020-11-09 15:44:55 +08:00
parent e86e990089
commit d532af3a9a
21 changed files with 518 additions and 278 deletions

View File

@ -6,10 +6,15 @@ include(${GE_SOURCE_DIR}/cmake/ge_utils.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/json.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/eigen.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/gtest.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/onnx.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/securec.cmake)
if (ENABLE_D)
set(AS_MS_COMP TRUE)
include(${GE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
unset(AS_MS_COMP)
else ()
include(${GE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
endif ()
# for UT, find slog and error_manager from local prebuild
if (NOT ENABLE_D AND NOT ENABLE_ACL)
set(GE_PREBUILD_PATH ${GE_SOURCE_DIR}/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR})
@ -79,8 +84,12 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__FILE__='\"$(subst $(realpath ${CMAKE
add_subdirectory(${GE_SOURCE_DIR}/src/common/graph)
if (ENABLE_ACL OR ENABLE_D)
add_subdirectory(${GE_SOURCE_DIR}/src/ge/common)
target_compile_definitions(graph PRIVATE google=ascend_private)
set_target_properties(graph PROPERTIES SKIP_BUILD_RPATH TRUE)
if (ENABLE_D)
add_subdirectory(${GE_SOURCE_DIR}/src/ge/ge_runtime)
target_compile_definitions(ge_runtime PRIVATE google=ascend_private)
set_target_properties(ge_runtime PROPERTIES SKIP_BUILD_RPATH TRUE)
endif ()
endif ()

View File

@ -216,7 +216,7 @@ if (NOT ENABLE_GE)
if (ENABLE_D)
install(
TARGETS ms_profile
TARGETS ms_profile hccl_adapter
DESTINATION ${INSTALL_LIB_DIR}
COMPONENT mindspore
)

@ -1 +1 @@
Subproject commit 412ebe82c96620b5f7c942a7ab87a45bf14c5621
Subproject commit 383f7f751d6612e9dbde9e22a2960098fdbf3792

View File

@ -174,7 +174,7 @@ foreach (_comp ${SUB_COMP})
string(REPLACE "/" "_" sub ${_comp})
if (TARGET _mindspore_${sub}_obj)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>)
add_dependencies(_mindspore_${sub}_obj proto_input )
add_dependencies(_mindspore_${sub}_obj proto_input)
endif ()
endforeach ()
@ -229,28 +229,26 @@ if (ENABLE_D)
endif()
MESSAGE("USE DAV LIB PATH: ${ASCEND_PATH}")
find_library(HCCL hccl ${ASCEND_RUNTIME_PATH})
find_library(CCE_LIB cce ${ASCEND_RUNTIME_PATH})
find_library(RUNTIME_LIB runtime ${ASCEND_RUNTIME_PATH})
find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
find_library(PROFILING msprofiler ${ASCEND_RUNTIME_PATH})
# for atlas env
find_library(HCCL hccl ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(CCE_LIB cce ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(RUNTIME_LIB runtime ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(TSDCLIENT tsdclient HINTS ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
find_library(DATATRANSFER datatransfer HINTS ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
find_library(PROFILING msprofiler ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(HCCL hccl ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(CCE_LIB cce ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(RUNTIME_LIB runtime ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
find_library(PROFILING msprofiler ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(PROFILING_SHARED msprof ${ASCEND_DRIVER_PATH})
find_library(REGISTER register ${ASCEND_RUNTIME_PATH})
find_library(REGISTER register ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(OPTILING optiling ${ASCEND_OPP_PATH})
add_library(ms_profile SHARED ${PROFILING})
set_target_properties(ms_profile PROPERTIES LINKER_LANGUAGE CXX)
target_link_libraries(ms_profile -Wl,--start-group ${PROFILING_SHARED} ${PROFILING} mindspore::protobuf -Wl,--end-group)
target_link_libraries(mindspore ms_profile ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${HCCL} ${DATATRANSFER} ${REGISTER} -Wl,--no-as-needed ${OPTILING} -Wl,--as-needed)
target_link_libraries(mindspore ms_profile ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${HCCL} ${DATATRANSFER}
${REGISTER} -Wl,--no-as-needed ${OPTILING} -Wl,--as-needed)
target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf -Wl,--end-group)
# hccl_adpter
find_library(HCCL_ADPTER hcom_graph_adaptor ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(HCCL_BUILDER hcom_opskernel_builder ${ASCEND_RUNTIME_PATH}/plugin/opskernel ${ASCEND_TOOLKIT_RUNTIME_PATH}/plugin/opskernel)
add_subdirectory(runtime/hccl_adapter)
target_link_libraries(hccl_adapter PRIVATE mindspore ${HCCL_ADPTER} ${REGISTER} -Wl,--no-as-needed ${HCCL_BUILDER})
elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece -Wl,--end-group)
else ()
@ -274,11 +272,14 @@ elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
else ()
MESSAGE(FATAL_ERROR "other platform: ${CMAKE_SYSTEM_NAME}")
endif ()
set(MINDSPORE_RPATH ${ORIGIN_PATH}/lib)
if (ENABLE_D)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/fwkacllib/lib64)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/fwkacllib/lib64)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/add-ons)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/op_tiling)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling)
@ -286,9 +287,16 @@ if (ENABLE_D)
elseif (ENABLE_GPU)
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/cuda/lib64)
endif ()
set(HCCL_ADPT_RPATH ${ORIGIN_PATH}:${MINDSPORE_RPATH})
set(MINDSPORE_RPATH ${ORIGIN_PATH}/lib:${MINDSPORE_RPATH})
set_target_properties(_c_expression PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
if (ENABLE_D)
set_target_properties(hccl_adapter PROPERTIES INSTALL_RPATH ${HCCL_ADPT_RPATH})
target_link_libraries(_c_expression PRIVATE hccl_adapter)
endif ()
if (CMAKE_SYSTEM_NAME MATCHES "Windows")
target_link_libraries(mindspore mindspore::pybind11_module)
target_link_libraries(mindspore mindspore_gvar)
@ -352,6 +360,7 @@ if (ENABLE_D)
find_library(adump_server libadump_server.a ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
target_link_libraries(_c_expression PRIVATE ${adump_server})
target_link_libraries(inference PRIVATE ${adump_server})
target_link_libraries(inference PRIVATE mindspore_core hccl_adapter)
endif()
if (ENABLE_CPU)

View File

@ -17,16 +17,15 @@
#include "backend/kernel_compiler/hccl/hccl_kernel.h"
#include <map>
#include "runtime/device/ascend/tasksink/runtime_utils.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/utils.h"
#include "utils/ms_context.h"
#include "runtime/device/kernel_runtime.h"
#include "runtime/device/ascend/executor/hccl_dynamic_kernel.h"
#include "runtime/hccl_adapter/hccl_adapter.h"
using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>;
using ge::model_runner::HcclTaskInfo;
using mindspore::device::ascend::tasksink::RuntimeUtils;
namespace {
static std::map<std::string, std::string> kMsOpNameToHcomHcclType = {
@ -145,35 +144,45 @@ const std::vector<size_t> &HcclKernel::GetOutputSizeList() const {
const std::vector<size_t> &HcclKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace,
std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "Inputs or outputs is empty";
}
stream_id_ = stream_id;
std::string hccl_type = AnfAlgo::GetCNodeName(anf_node_);
MS_EXCEPTION_IF_NULL(inputs.at(0));
auto input_data_addr = inputs.at(0)->addr;
MS_EXCEPTION_IF_NULL(outputs.at(0));
auto output_data_addr = outputs.at(0)->addr;
void *workspace_address = nullptr;
const int64_t workspace_num = 0;
std::vector<uint8_t> private_def;
HcclDataType data_type = hccl_data_type_list_[0];
MS_LOG(INFO) << "HCCL Task : stream_id=" << stream_id << ", ws_num=" << workspace_num << ", count=" << hccl_count_
<< ", root_id=" << root_id_ << ", op_type=" << static_cast<int>(op_type_)
<< ", data_type=" << static_cast<int>(data_type);
std::vector<hccl::HcclTaskInfo> task_info;
bool ret = hccl::GenTask(anf_node_, data_type, &task_info);
if (!ret) {
MS_LOG(EXCEPTION) << "Gen Task for " << anf_node_->DebugString() << " failed.";
}
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
HcclTaskInfoPtr task_info_ptr = std::make_shared<HcclTaskInfo>(
kernel_name_, stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0,
private_def, nullptr, hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel,
RuntimeUtils::HcomUnbindModel, RuntimeUtils::HcomDistribute, NeedDump());
MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr};
std::vector<TaskInfoPtr> results;
for (auto &task : task_info) {
MS_LOG(INFO) << "HCCL Task : stream_id=" << stream_id << ", count=" << hccl_count_ << ", root_id=" << root_id_
<< ", op_type=" << static_cast<int>(op_type_) << ", data_type=" << static_cast<int>(data_type)
<< ", workspace_size=" << task.workspace_size << ", stream_num=" << task.stream_num
<< ", private_def_size=" << task.private_def.size();
private_def.resize(task.private_def.size());
auto sec_ret = memcpy_s(private_def.data(), private_def.size(), task.private_def.data(), task.private_def.size());
if (sec_ret != 0) {
MS_LOG(EXCEPTION) << "Set data memcpy_s failed, ret = " << sec_ret;
}
results.emplace_back(std::make_shared<HcclTaskInfo>(
kernel_name_, stream_id, hccl::GetHcclType(anf_node_), input_data_addr, output_data_addr, task.workspace_size,
task.stream_num, private_def, hccl::GetHcclOpsKernelInfoStore(), hccl_count_, root_id_, op_type_, data_type,
group_, NeedDump()));
}
return results;
}
device::DynamicKernelPtr HcclKernel::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {

View File

@ -20,26 +20,10 @@
namespace mindspore {
namespace kernel {
bool HcomAllBroadCastKernel::Launch(const std::vector<AddressPtr> &inputs,
bool HcomAllBroadCastKernel::Launch(const std::vector<AddressPtr> & /*inputs*/,
const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> & /*outputs*/, void *stream_ptr) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK)) {
return true;
}
if (inputs.empty() || hccl_data_type_list_.empty()) {
MS_LOG(ERROR) << "BroadCast param is empty";
return false;
}
const char *tag = "Hccl-BroadCast";
MS_EXCEPTION_IF_NULL(inputs[0]);
HcclResult ret =
hcom_broadcast(tag, inputs[0]->addr, hccl_count_, hccl_data_type_list_[0], root_id_, nullptr, stream_ptr);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "HcomBroadcastOp : hcom_broadcast fail, return: " << static_cast<int>(ret);
return false;
}
const std::vector<AddressPtr> & /*outputs*/, void * /*stream_ptr*/) {
MS_LOG(INFO) << "HcomAllBroadCast launch";
return true;
}
} // namespace kernel

View File

@ -20,24 +20,10 @@
namespace mindspore {
namespace kernel {
bool HcomAllGatherKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK)) {
return true;
}
if (inputs.empty() || hccl_data_type_list_.empty()) {
MS_LOG(ERROR) << "AllGather param is empty";
return false;
}
const char *tag = "Hccl-AllGather";
HcclResult ret =
hcom_all_gather(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], nullptr, stream_ptr);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "HcomAllGatherKernelOp : hcom_all_gather fail, return: " << static_cast<int>(ret);
return false;
}
bool HcomAllGatherKernel::Launch(const std::vector<AddressPtr> & /*inputs*/,
const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> & /*outputs*/, void * /*stream_ptr*/) {
MS_LOG(INFO) << "HcomAllGather launch";
return true;
}
} // namespace kernel

View File

@ -20,24 +20,10 @@
namespace mindspore {
namespace kernel {
bool HcomAllReduceKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK)) {
return true;
}
if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) {
MS_LOG(ERROR) << "AllReduce param is empty";
return false;
}
const char *tag = "Hccl-AllReduce";
HcclResult ret = hcom_all_reduce(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0],
op_type_, nullptr, stream_ptr);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "HcomAllReduceKernelOp : hcom_all_reduce fail, return: " << static_cast<int>(ret);
return false;
}
bool HcomAllReduceKernel::Launch(const std::vector<AddressPtr> & /*inputs*/,
const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> & /*outputs*/, void * /*stream_ptr*/) {
MS_LOG(INFO) << "HcomAllReduce launch";
return true;
}
} // namespace kernel

View File

@ -20,25 +20,10 @@
namespace mindspore {
namespace kernel {
bool HcomAllReduceScatterKernel::Launch(const std::vector<AddressPtr> &inputs,
bool HcomAllReduceScatterKernel::Launch(const std::vector<AddressPtr> & /*inputs*/,
const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
if (context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK)) {
return true;
}
if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) {
MS_LOG(ERROR) << "ReduceScatter param is empty";
return false;
}
const char *tag = "Hccl-ReduceScatter";
HcclResult ret = hcom_reduce_scatter(tag, inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0],
op_type_, nullptr, stream_ptr);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "HcomReduceScatterOp : hcom_reduce_scatter fail, return: " << static_cast<int>(ret);
return false;
}
const std::vector<AddressPtr> & /*outputs*/, void * /*stream_ptr*/) {
MS_LOG(INFO) << "HcomAllReduceScatter launch";
return true;
}
} // namespace kernel

View File

@ -55,6 +55,7 @@
#include "profiler/device/ascend/rt_callback_manager.h"
#include "utils/config_manager.h"
#include "runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.h"
#include "runtime/hccl_adapter/hccl_adapter.h"
using ge::model_runner::ModelRunner;
using mindspore::device::ascend::ProfilingManager;
@ -796,10 +797,10 @@ bool AscendKernelRuntime::HcclInit() {
return false;
}
MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << rank_id_str;
HcclResult res = hcom_init(full_path, rank_id_str.c_str());
bool ret = hccl::InitHccl(context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID), rank_id_str, full_path);
free(full_path);
if (res != HCCL_SUCCESS) {
MS_LOG(ERROR) << "Hcom init failed, res is " << static_cast<int>(res);
if (!ret) {
MS_LOG(ERROR) << "Hcom init failed.";
return false;
}
return true;
@ -816,12 +817,14 @@ bool AscendKernelRuntime::DestroyHccl() {
if (!HcclExecutorManager::GetInstance().Finalize()) {
MS_LOG(ERROR) << "Dynamic Shape Hccl Finalize Failed";
}
HcclResult res = hcom_destroy();
if (res != HCCL_SUCCESS) {
bool res = hccl::FinalizeHccl();
if (!res) {
MS_LOG(ERROR) << "Hccl destroy failed";
return false;
}
MS_LOG(INFO) << "Hccl destroy successful, status = " << res << ".";
MS_LOG(INFO) << "Hccl destroy successful.";
context_ptr->set_param<bool>(MS_CTX_ENABLE_HCCL, false);
return true;
}
@ -855,7 +858,7 @@ void AscendKernelRuntime::KernelLaunchProfiling(const std::string &kernel_name)
auto try_emplace_ret = stream_id_task_id_op_name_map_.try_emplace(stream_task_pair, kernel_name);
if (!try_emplace_ret.second) {
MS_LOG(WARNING) << "Profiling duplicate key, task_id:" << stream_task_pair.second
<< " stream_id:" << stream_task_pair.first << " name:" << kernel_name;
<< " stream_id:" << stream_task_pair.first << " name:" << kernel_name;
}
if (stream_id_task_id_op_name_map_.size() > kProfilingMaxTaskIdInStream) {
MS_LOG(EXCEPTION) << "Too many profiling data";

View File

@ -1,106 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "runtime/device/ascend/tasksink/runtime_utils.h"
#include <string>
#include "hccl/hcom.h"
#include "utils/log_adapter.h"
#include "hccl/hccl_types.h"
#include "utils/utils.h"
constexpr auto kHcomBroadcast = "hcom_broadcast_";
constexpr auto kHcomAllGather = "hcom_all_gather_";
constexpr auto kHcomAllReduce = "hcom_all_reduce_";
constexpr auto kHcomReduceScatter = "hcom_reduce_scatter_";
constexpr auto kUnderline = "_";
namespace mindspore {
namespace device {
namespace ascend {
namespace tasksink {
bool RuntimeUtils::HcomBindModel(rtModel_t model, rtStream_t stream) {
HcclResult ret = hcom_bind_model(model, stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "Call hcom_bind_model failed, ret: 0x" << static_cast<int>(ret);
return false;
}
return true;
}
bool RuntimeUtils::HcomUnbindModel(rtModel_t model) {
HcclResult ret = hcom_unbind_model(model);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "Call hcom_unbind_model failed, ret: 0x" << static_cast<int>(ret);
return false;
}
return true;
}
bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info, rtStream_t stream) {
MS_LOG(INFO) << "hccl distribute start";
MS_EXCEPTION_IF_NULL(task_info);
HcclResult ret;
static uint32_t task_counter = 0;
auto hccl_group = task_info->group();
if (task_info->hccl_type() == kBroadcastOpName) {
// call hcom broadcast interface to run op
const string tag_broadcast = kHcomBroadcast + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_broadcast(tag_broadcast.c_str(), task_info->input_data_addr(), static_cast<u64>(task_info->count()),
static_cast<HcclDataType>(task_info->data_type()), static_cast<u32>(task_info->root_id()),
hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_broadcast fail, return ret: " << static_cast<int>(ret);
return false;
}
} else if (task_info->hccl_type() == kAllGatherOpName) {
// call hcom allgather interface to run op
const string tag_all_gather = kHcomAllGather + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_all_gather(tag_all_gather.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
static_cast<u64>(task_info->count()), static_cast<HcclDataType>(task_info->data_type()),
hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_all_gather fail, return ret: " << ret;
return false;
}
} else if (task_info->hccl_type() == kAllReduceOpName) {
// call hcom allreduce interface to run op
const string tag_all_reduce = kHcomAllReduce + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_all_reduce(tag_all_reduce.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
static_cast<u64>(task_info->count()), static_cast<HcclDataType>(task_info->data_type()),
static_cast<HcclReduceOp>(task_info->op_type()), hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_all_reduce fail, return ret: " << ret;
return false;
}
} else if (task_info->hccl_type() == kReduceScatterOpName) {
// call hcom reducescatter interface to run op
const string tag_reduce_scatter =
kHcomReduceScatter + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
static_cast<u64>(task_info->count()), static_cast<HcclDataType>(task_info->data_type()),
static_cast<HcclReduceOp>(task_info->op_type()), hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_reduce_scatter fail, return ret: " << ret;
return false;
}
}
return true;
}
} // namespace tasksink
} // namespace ascend
} // namespace device
} // namespace mindspore

View File

@ -1,39 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_TASKSINK_RUNTIME_UTILS_H_
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_TASKSINK_RUNTIME_UTILS_H_
#include <memory>
#include "runtime/rt.h"
#include "framework/ge_runtime/task_info.h"
using ge::model_runner::HcclTaskInfo;
namespace mindspore {
namespace device {
namespace ascend {
namespace tasksink {
class RuntimeUtils {
public:
static bool HcomBindModel(rtModel_t model, rtStream_t stream);
static bool HcomUnbindModel(rtModel_t model);
static bool HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info, rtStream_t stream);
};
} // namespace tasksink
} // namespace ascend
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_TASKSINK_RUNTIME_UTILS_H_

View File

@ -0,0 +1,8 @@
file(GLOB_RECURSE HCCL_ADAPTER_SRC_LIST ./*.cc)
set_property(SOURCE ${HCCL_ADAPTER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_HCCL_ADPT)
add_library(hccl_adapter SHARED ${HCCL_ADAPTER_SRC_LIST})
target_include_directories(hccl_adapter PRIVATE ${CMAKE_BINARY_DIR}/proto/ge)
add_dependencies(hccl_adapter graph)
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_options(hccl_adapter PRIVATE -Wl,-init,mindspore_log_init)
endif ()

View File

@ -0,0 +1,129 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "runtime/hccl_adapter/converter.h"
#include <map>
#include <algorithm>
#include <tuple>
#define google ascend_private
#include "register/ops_kernel_builder_registry.h"
#include "graph/compute_graph.h"
#include "graph/debug/ge_attr_define.h"
#undef google
#include "backend/session/anf_runtime_algorithm.h"
#include "utils/log_adapter.h"
#include "utils/ms_utils.h"
#include "mindspore/core/base/core_ops.h"
#include "transform/graph_ir/util.h"
static constexpr char kGeOpNameHcclAllRudece[] = "HcomAllReduce";
static constexpr char kGeOpNameHcclAllGather[] = "HcomAllGather";
static constexpr char kGeOpNameHcclBroadcast[] = "HcomBroadcast";
static constexpr char kGeOpNameHcclReduceScatter[] = "HcomReduceScatter";
static constexpr char kGeNodeAttrUsedStreamNum[] = "used_stream_num";
static constexpr char kStubDataStructureName[] = "any_name_can_work";
static ge::DataType ConvertHcclDTypeToGeDType(HcclDataType datatype) {
static map<HcclDataType, ge::DataType> kHcomDataTypeMap = {
{HCCL_DATA_TYPE_FP32, ge::DT_FLOAT},
{HCCL_DATA_TYPE_FP16, ge::DT_FLOAT16},
{HCCL_DATA_TYPE_INT8, ge::DT_INT8},
{HCCL_DATA_TYPE_INT32, ge::DT_INT32},
};
auto iter = kHcomDataTypeMap.find(datatype);
if (iter == kHcomDataTypeMap.end()) {
MS_LOG(EXCEPTION) << "Unknown hccl data type " << datatype;
}
return iter->second;
}
namespace mindspore::hccl {
std::string GetGeNodeName(const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
if (IsPrimitiveCNode(cnode, prim::kPrimAllReduce)) {
return kGeOpNameHcclAllRudece;
} else if (IsPrimitiveCNode(cnode, prim::kPrimAllGather)) {
return kGeOpNameHcclAllGather;
} else if (IsPrimitiveCNode(cnode, prim::kPrimBroadcast)) {
return kGeOpNameHcclBroadcast;
} else if (IsPrimitiveCNode(cnode, prim::kPrimReduceScatter)) {
return kGeOpNameHcclReduceScatter;
}
MS_LOG(EXCEPTION) << "Unknown hccl node type " << cnode->DebugString();
}
std::tuple<ge::NodePtr, ge::ComputeGraphPtr> GenerateStubGeNode(const AnfNodePtr &anf_node, HcclDataType datatype) {
MS_EXCEPTION_IF_NULL(anf_node);
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
std::string ge_node_name = GetGeNodeName(cnode);
ge::OpDescPtr op_desc = std::make_shared<ge::OpDesc>(kStubDataStructureName, ge_node_name);
MS_EXCEPTION_IF_NULL(op_desc);
for (size_t i = 1; i < cnode->size(); ++i) {
auto &input = cnode->input(i);
std::vector<int64_t> ge_shape;
auto ms_shape = AnfAlgo::GetOutputInferShape(input, 0);
std::transform(ms_shape.begin(), ms_shape.end(), std::back_inserter(ge_shape),
[](size_t in) { return static_cast<int64_t>(in); });
op_desc->AddInputDesc(
ge::GeTensorDesc(ge::GeShape(ge_shape), ge::Format::FORMAT_NCHW,
transform::TransformUtil::ConvertDataType(AnfAlgo::GetOutputInferDataType(input, 0))));
}
// set node data type
bool ret = ge::AttrUtils::SetDataType(*op_desc, ge::HCOM_ATTR_DATA_TYPE, ConvertHcclDTypeToGeDType(datatype));
if (!ret) {
MS_LOG(EXCEPTION) << "Set attr " << ge::HCOM_ATTR_DATA_TYPE << " for ge node of " << cnode->DebugString()
<< " failed.";
}
// set rank size
if (AnfAlgo::HasNodeAttr(kAttrRankSize, cnode)) {
auto rank_size = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrRankSize);
ret = ge::AttrUtils::SetInt(*op_desc, ge::HCOM_ATTR_RANK_SIZE, rank_size);
if (!ret) {
MS_LOG(EXCEPTION) << "Set attr " << ge::HCOM_ATTR_RANK_SIZE << " for ge node of " << cnode->DebugString()
<< " failed.";
}
}
ge::ComputeGraphPtr ge_graph = std::make_shared<ge::ComputeGraph>(kStubDataStructureName);
MS_EXCEPTION_IF_NULL(ge_graph);
auto ge_node = ge_graph->AddNode(op_desc);
return {ge_node, ge_graph};
}
HcclTaskInfo ParseDomiTask(const ge::OpDescPtr &op, const domi::TaskDef &task_def) {
MS_EXCEPTION_IF_NULL(op);
// workspace size
auto workspace_sizes = op->GetWorkspaceBytes();
if (workspace_sizes.size() != 1) {
MS_LOG(EXCEPTION) << "Unexpected workspace size " << workspace_sizes.size();
}
int64_t workspace_size = workspace_sizes[0];
// stream num
int64_t stream_num;
bool ret = ge::AttrUtils::GetInt(*op, kGeNodeAttrUsedStreamNum, stream_num);
if (!ret) {
MS_LOG(EXCEPTION) << "Get attr " << kGeNodeAttrUsedStreamNum << " for ge node " << op->GetType() << " failed.";
}
return {task_def.private_def(), workspace_size, stream_num};
}
} // namespace mindspore::hccl

View File

@ -0,0 +1,38 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_RUNTIME_HCCL_ADAPTER_CONVERTER_H
#define MINDSPORE_RUNTIME_HCCL_ADAPTER_CONVERTER_H
#include <string>
#include <vector>
#include <memory>
#include <tuple>
#define google ascend_private
#include "graph/node.h"
#include "common/opskernel/ops_kernel_info_types.h"
#include "proto/task.pb.h"
#undef google
#include "runtime/hccl_adapter/hccl_adapter.h"
#include "mindspore/core/ir/anf.h"
namespace mindspore::hccl {
// return graph ptr to keep reference count
std::tuple<ge::NodePtr, ge::ComputeGraphPtr> GenerateStubGeNode(const AnfNodePtr &anf_node, HcclDataType datatype);
HcclTaskInfo ParseDomiTask(const ge::OpDescPtr &op, const domi::TaskDef &task_def);
std::string GetGeNodeName(const CNodePtr &cnode);
} // namespace mindspore::hccl
#endif // MINDSPORE_RUNTIME_HCCL_ADAPTER_CONVERTER_H

View File

@ -0,0 +1,165 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "runtime/hccl_adapter/hccl_adapter.h"
#include <map>
#include <algorithm>
#define google ascend_private
#include "register/ops_kernel_builder_registry.h"
#include "common/opskernel/ops_kernel_info_store.h"
#include "external/ge/ge_api_types.h"
#undef google
#include "utils/log_adapter.h"
#include "utils/ms_utils.h"
#include "runtime/hccl_adapter/converter.h"
#include "runtime/hccl_adapter/hcom_graph_adaptor.h"
static constexpr const char *kHcclOpsKernelInfoStore = "ops_kernel_info_hccl";
static constexpr const char *kHcclDeployModeEnv = "DEPLOY_MODE";
// following global var, thread safety is not guaranteed
static std::shared_ptr<ge::OpsKernelInfoStore> ops_kernel_info_store = nullptr;
static ge::OpsKernelBuilderPtr ops_kernel_builder = nullptr;
namespace mindspore::hccl {
static std::map<std::string, std::string> GenHcclOptions(uint32_t device_id, std::string_view rank_id,
std::string_view rank_file) {
auto env_deploy_mode = common::GetEnv(kHcclDeployModeEnv);
if (env_deploy_mode.empty()) {
MS_LOG(WARNING) << kHcclDeployModeEnv << " is not set in ENV. Now set to default value 0";
env_deploy_mode = "0";
}
return std::map<std::string, std::string>({{ge::OPTION_EXEC_IS_USEHCOM, "1"},
{ge::OPTION_EXEC_IS_USEHVD, "0"},
{ge::OPTION_EXEC_HCCL_FLAG, "1"},
{ge::OPTION_EXEC_DEVICE_ID, std::to_string(device_id)},
{ge::OPTION_EXEC_RANK_ID, rank_id.data()},
{ge::OPTION_EXEC_POD_NAME, rank_id.data()},
{ge::OPTION_EXEC_RANK_TABLE_FILE, rank_file.data()},
{ge::OPTION_GRAPH_RUN_MODE, "1"},
{ge::OPTION_EXEC_HCCL_FLAG, "1"},
{ge::OPTION_EXEC_DEPLOY_MODE, env_deploy_mode}});
}
bool InitHccl(uint32_t device_id, std::string_view rank_id, std::string_view rank_file) {
MS_LOG(INFO) << "Start init hccl adapter.";
// get ops_kernel_builder
std::map<std::string, ge::OpsKernelBuilderPtr> all_builders = ge::OpsKernelBuilderRegistry::GetInstance().GetAll();
if (all_builders.size() != 1) {
MS_LOG(EXCEPTION) << "Builders size should be 1 (hccl builder), but is " << all_builders.size();
}
MS_LOG(INFO) << "Get builder " << all_builders.begin()->first;
ops_kernel_builder = all_builders.begin()->second;
MS_EXCEPTION_IF_NULL(ops_kernel_builder);
// init ops_kernel_builder
auto options = GenHcclOptions(device_id, rank_id, rank_file);
auto ret = ops_kernel_builder->Initialize(options);
if (ret != ge::SUCCESS) {
MS_LOG(EXCEPTION) << "Init builder failed, ret = " << ret;
}
// get ops_kernel_info_store
ret = ::Initialize(options);
if (ret != ge::SUCCESS) {
MS_LOG(EXCEPTION) << "Init plugin so failed, ret = " << ret;
}
std::map<std::string, std::shared_ptr<ge::OpsKernelInfoStore>> all_ops_kernel_info_stores;
::GetOpsKernelInfoStores(all_ops_kernel_info_stores);
for (auto &[name, ptr] : all_ops_kernel_info_stores) {
if (name == kHcclOpsKernelInfoStore) {
ops_kernel_info_store = ptr;
break;
}
}
MS_EXCEPTION_IF_NULL(ops_kernel_info_store);
ret = ops_kernel_info_store->Initialize(options);
if (ret != ge::SUCCESS) {
MS_LOG(EXCEPTION) << "Init info store failed, ret = " << ret;
}
MS_LOG(INFO) << "Init hccl adapter success.";
return true;
}
bool FinalizeHccl() {
MS_LOG(INFO) << "Start destroy hccl adapter.";
if (ops_kernel_info_store != nullptr) {
auto ret = ops_kernel_info_store->Finalize();
if (ret != ge::SUCCESS) {
MS_LOG(ERROR) << "Destory info store failed, ret = " << ret;
return false;
}
}
if (ops_kernel_builder != nullptr) {
auto ret = ops_kernel_builder->Finalize();
if (ret != ge::SUCCESS) {
MS_LOG(ERROR) << "Destory builder failed, ret = " << ret;
return false;
}
}
::Finalize();
ge::OpsKernelBuilderRegistry::GetInstance().UnregisterAll();
ops_kernel_info_store.reset();
ops_kernel_builder.reset();
MS_LOG(INFO) << "Destroy hccl adapter success.";
return true;
}
bool GenTask(const AnfNodePtr &node, HcclDataType datatype, std::vector<HcclTaskInfo> *task_info_lists) {
MS_EXCEPTION_IF_NULL(ops_kernel_builder);
MS_EXCEPTION_IF_NULL(task_info_lists);
MS_LOG(INFO) << "Start generate task for hccl node " << node->DebugString();
auto [ge_node, ge_graph] = GenerateStubGeNode(node, datatype);
MS_EXCEPTION_IF_NULL(ge_node);
auto op = ge_node->GetOpDesc();
MS_EXCEPTION_IF_NULL(op);
MS_LOG(INFO) << "Start to call CalcOpRunningParam";
ge::Status ret = ops_kernel_builder->CalcOpRunningParam(*ge_node);
if (ret != ge::SUCCESS) {
MS_LOG(ERROR) << "OpsKernelBuilder CalcOpRunningParam failed, ret = " << ret;
return false;
}
MS_LOG(INFO) << "Start to call GenerateTask";
ge::RunContext unused_ctx;
std::vector<domi::TaskDef> domi_tasks;
ret = ops_kernel_builder->GenerateTask(*ge_node, unused_ctx, domi_tasks);
if (ret != ge::SUCCESS) {
MS_LOG(ERROR) << "OpsKernelBuilder GenerateTask failed, ret = " << ret;
return false;
}
task_info_lists->clear();
std::transform(domi_tasks.begin(), domi_tasks.end(), std::back_inserter(*task_info_lists),
[&op](const domi::TaskDef &task_def) -> HcclTaskInfo { return ParseDomiTask(op, task_def); });
MS_LOG(INFO) << "Generate task for node " << node->DebugString() << " success.";
ge_graph.reset();
return true;
}
bool CalcOpRunningParam(const AnfNodePtr &node) { return true; }
void *GetHcclOpsKernelInfoStore() { return ops_kernel_info_store.get(); }
std::string GetHcclType(const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
return GetGeNodeName(cnode);
}
} // namespace mindspore::hccl

View File

@ -0,0 +1,43 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_RUNTIME_HCCL_ADAPTER_HCCL_ADAPTER_H
#define MINDSPORE_RUNTIME_HCCL_ADAPTER_HCCL_ADAPTER_H
#include <string>
#include <vector>
#include <memory>
#include "mindspore/core/ir/anf.h"
#include "external/hccl/hccl_types.h"
#define MS_API __attribute__((visibility("default")))
namespace mindspore::hccl {
struct MS_API HcclTaskInfo {
std::string private_def;
int64_t workspace_size;
int64_t stream_num;
};
MS_API bool InitHccl(uint32_t device_id, std::string_view rank_id, std::string_view rank_file);
MS_API bool FinalizeHccl();
MS_API bool GenTask(const AnfNodePtr &node, HcclDataType datatype, std::vector<HcclTaskInfo> *task_info_lists);
MS_API bool CalcOpRunningParam(const AnfNodePtr &node);
MS_API void *GetHcclOpsKernelInfoStore();
MS_API std::string GetHcclType(const AnfNodePtr &node);
} // namespace mindspore::hccl
#undef MS_API
#endif // MINDSPORE_RUNTIME_HCCL_ADAPTER_HCCL_ADAPTER_H

View File

@ -0,0 +1,32 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_RUNTIME_HCCL_ADAPTER_HCOM_GRAPH_ADAPTOR_H
#define MINDSPORE_RUNTIME_HCCL_ADAPTER_HCOM_GRAPH_ADAPTOR_H
#include <string>
#include <map>
#include <memory>
#include "mindspore/core/ir/anf.h"
#include "common/opskernel/ops_kernel_info_store.h"
extern "C" {
ge::Status Initialize(const std::map<std::string, std::string> &);
ge::Status Finalize();
void GetOpsKernelInfoStores(std::map<std::string, std::shared_ptr<ge::OpsKernelInfoStore>> &);
}
#endif // MINDSPORE_RUNTIME_HCCL_ADAPTER_HCOM_GRAPH_ADAPTOR_H

View File

@ -181,7 +181,8 @@ static const char *GetSubModuleName(SubModuleId module_id) {
"VM", // SM_VM
"PROFILER", // SM_PROFILER
"PS", // SM_PS
"LITE" // SM_LITE
"LITE", // SM_LITE
"HCCL_ADPT" // SM_HCCL_ADPT
};
return sub_module_names[module_id % NUM_SUBMODUES];

View File

@ -125,6 +125,7 @@ enum SubModuleId : int {
SM_PROFILER, // profiler
SM_PS, // Parameter Server
SM_LITE, // LITE
SM_HCCL_ADPT, // Hccl Adapter
NUM_SUBMODUES // number of submodules
};

View File

@ -15,7 +15,7 @@
*/
#include <vector>
#include "framework/ge_runtime/model_runner.h"
#include "runtime/device/ascend/tasksink/runtime_utils.h"
#include "runtime/hccl_adapter/hccl_adapter.h"
namespace ge {
namespace model_runner {
@ -60,15 +60,12 @@ const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRunti
} // namespace ge
namespace mindspore {
namespace device {
namespace ascend {
namespace tasksink {
bool RuntimeUtils::HcomBindModel(rtModel_t model, rtStream_t stream) { return true; }
bool RuntimeUtils::HcomUnbindModel(rtModel_t model) { return true; }
bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info, rtStream_t stream) { return true; }
} // namespace tasksink
} // namespace ascend
} // namespace device
namespace hccl {
bool InitHccl(uint32_t, std::string_view, std::string_view) { return true; }
bool FinalizeHccl() { return true; }
bool GenTask(const AnfNodePtr &, HcclDataType, std::vector<HcclTaskInfo> *) { return true; }
bool CalcOpRunningParam(const AnfNodePtr &) { return true; }
void *GetHcclOpsKernelInfoStore() { return nullptr; }
std::string GetHcclType(const AnfNodePtr &) { return ""; }
} // namespace hccl
} // namespace mindspore