forked from mindspore-Ecosystem/mindspore
Delete gpu_collective so for cloud side.
This commit is contained in:
parent
9887716115
commit
b7df5799b2
|
@ -176,13 +176,6 @@ if(ENABLE_CPU)
|
|||
endif()
|
||||
|
||||
if(ENABLE_MPI)
|
||||
if(ENABLE_GPU)
|
||||
install(
|
||||
TARGETS _ms_mpi
|
||||
DESTINATION ${INSTALL_BASE_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
if(ENABLE_CPU)
|
||||
install(
|
||||
TARGETS mpi_adapter
|
||||
|
@ -206,11 +199,6 @@ endif()
|
|||
|
||||
if(ENABLE_GPU)
|
||||
if(ENABLE_MPI)
|
||||
install(
|
||||
TARGETS gpu_collective
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
install(
|
||||
TARGETS nvidia_collective
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
|
|
|
@ -133,13 +133,6 @@ if(ENABLE_CPU)
|
|||
endif()
|
||||
|
||||
if(ENABLE_MPI)
|
||||
if(ENABLE_GPU)
|
||||
install(
|
||||
TARGETS _ms_mpi
|
||||
DESTINATION ${INSTALL_BASE_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
if(ENABLE_CPU)
|
||||
install(
|
||||
TARGETS mpi_adapter
|
||||
|
@ -150,13 +143,6 @@ if(ENABLE_MPI)
|
|||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
if(ENABLE_MPI)
|
||||
install(
|
||||
TARGETS gpu_collective
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
install(
|
||||
TARGETS gpu_queue
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
|
|
|
@ -185,13 +185,6 @@ if(ENABLE_CPU)
|
|||
endif()
|
||||
|
||||
if(ENABLE_MPI)
|
||||
if(ENABLE_GPU)
|
||||
install(
|
||||
TARGETS _ms_mpi
|
||||
DESTINATION ${INSTALL_BASE_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
if(ENABLE_CPU)
|
||||
install(
|
||||
TARGETS mpi_adapter
|
||||
|
@ -208,11 +201,6 @@ endif()
|
|||
|
||||
if(ENABLE_GPU)
|
||||
if(ENABLE_MPI)
|
||||
install(
|
||||
TARGETS gpu_collective
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
install(
|
||||
TARGETS nvidia_collective
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
#include "include/common/utils/parallel_context.h"
|
||||
#include "frontend/parallel/costmodel_context.h"
|
||||
#include "frontend/optimizer/ad/bprop_utils.h"
|
||||
#include "runtime/collective/gpu_collective_init.h"
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
||||
#include "ps/util.h"
|
||||
#endif
|
||||
|
@ -455,15 +454,6 @@ PYBIND11_MODULE(_c_expression, m) {
|
|||
.def(py::init())
|
||||
.def_static("reg_op", &OpLib::RegOp, "Register op info.");
|
||||
|
||||
(void)m.def("init_gpu_collective", &mindspore::device::gpu::GPUCollectiveInitializer::InitCollective,
|
||||
"Init gpu collective communication mode.");
|
||||
(void)m.def("finalize_gpu_collective", &mindspore::device::gpu::GPUCollectiveInitializer::FinalizeCollective,
|
||||
"Finalize gpu collective communication mode.");
|
||||
(void)m.def("get_rank_id", &mindspore::device::gpu::GPUCollectiveInitializer::GetRankID,
|
||||
"Finalize gpu collective communication mode.");
|
||||
(void)m.def("get_rank_size", &mindspore::device::gpu::GPUCollectiveInitializer::GetRankSize,
|
||||
"Finalize gpu collective communication mode.");
|
||||
|
||||
(void)py::class_<CollectiveManager, std::shared_ptr<CollectiveManager>>(m, "CollectiveManager")
|
||||
.def_static("get_instance", &CollectiveManager::instance, "Get collective manager instance.")
|
||||
.def("create_group", &CollectiveManager::CreateCommunicationGroup, "Create collective group.")
|
||||
|
|
|
@ -90,8 +90,6 @@ endif()
|
|||
|
||||
if(ENABLE_GPU)
|
||||
if(ENABLE_MPI)
|
||||
set_target_properties(_ms_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
|
||||
set_target_properties(nvidia_collective PROPERTIES INSTALL_RPATH ${ORIGIN_PATH})
|
||||
set_target_properties(gpu_collective PROPERTIES INSTALL_RPATH ${ORIGIN_PATH})
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -10,7 +10,6 @@ if(ENABLE_GPU)
|
|||
list(APPEND DEVICE_SRC_LIST ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/data_queue/data_queue.h)
|
||||
file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
list(REMOVE_ITEM DEVICE_SRC_LIST
|
||||
"mpi/mpi_initializer.cc"
|
||||
"distribution/collective_wrapper.cc"
|
||||
"distribution/mpi_wrapper.cc"
|
||||
"distribution/nccl_wrapper.cc"
|
||||
|
@ -29,13 +28,6 @@ if(ENABLE_GPU)
|
|||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
if(ENABLE_MPI)
|
||||
set_property(SOURCE "mpi/mpi_initializer.cc"
|
||||
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
|
||||
pybind11_add_module(_ms_mpi NO_EXTRAS "mpi/mpi_initializer.cc")
|
||||
target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cu")
|
||||
set_property(SOURCE ${CUDA_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
|
||||
|
||||
|
@ -47,21 +39,6 @@ if(ENABLE_GPU)
|
|||
list(REMOVE_DUPLICATES CUDA_NVCC_FLAGS)
|
||||
cuda_add_library(gpu_hash_table STATIC ${CUDA_SRC_LIST})
|
||||
endif()
|
||||
|
||||
set(GPU_COLLECTIVE_SRCS "distribution/collective_wrapper.cc"
|
||||
"distribution/mpi_wrapper.cc"
|
||||
"distribution/nccl_wrapper.cc")
|
||||
|
||||
|
||||
if(ENABLE_MPI)
|
||||
include(ExternalProject)
|
||||
# gpu_collective
|
||||
set_property(SOURCE ${GPU_COLLECTIVE_SRCS}
|
||||
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
|
||||
add_library(gpu_collective SHARED ${GPU_COLLECTIVE_SRCS})
|
||||
target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl)
|
||||
target_link_libraries(_ms_mpi PRIVATE gpu_collective)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_property(SOURCE ${DEVICE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/gpu/hal/device/mpi/mpi_initializer.h"
|
||||
#include <dlfcn.h>
|
||||
#include <mpi.h>
|
||||
#include <pybind11/operators.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace gpu {
|
||||
MPIInitializer &MPIInitializer::GetInstance() {
|
||||
static MPIInitializer instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
int MPIInitializer::get_rank_id(const std::string &group) { return GetRankIDByGroup(group); }
|
||||
|
||||
int MPIInitializer::get_rank_size(const std::string &group) { return GetGroupSize(group); }
|
||||
|
||||
PYBIND11_MODULE(_ms_mpi, mpi_initializer) {
|
||||
mpi_initializer.doc() = "mindspore mpi python wrapper";
|
||||
mpi_initializer.def("get_rank_id", &MPIInitializer::get_rank_id, "get rank id");
|
||||
mpi_initializer.def("get_rank_size", &MPIInitializer::get_rank_size, "get rank size");
|
||||
}
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -1,42 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_MPI_MPI_INITIALIZER_H_
|
||||
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_MPI_MPI_INITIALIZER_H_
|
||||
|
||||
#include <string>
|
||||
#include "plugin/device/gpu/hal/device/distribution/collective_wrapper.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace gpu {
|
||||
class MPIInitializer {
|
||||
public:
|
||||
MPIInitializer(MPIInitializer const &) = delete;
|
||||
MPIInitializer &operator=(const MPIInitializer &) = delete;
|
||||
static MPIInitializer &GetInstance();
|
||||
static int get_rank_id(const std::string &group);
|
||||
static int get_rank_size(const std::string &groups);
|
||||
|
||||
private:
|
||||
MPIInitializer() = default;
|
||||
~MPIInitializer() = default;
|
||||
};
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_MPI_MPI_INITIALIZER_H_
|
|
@ -18,7 +18,6 @@
|
|||
#include <cuda.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "plugin/device/gpu/hal/device/distribution/collective_init.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
|
@ -61,18 +60,6 @@ int GPUDeprecatedInterface::GetGPUMultiProcessorCount() {
|
|||
}
|
||||
return sm_count;
|
||||
}
|
||||
|
||||
void GPUDeprecatedInterface::GPUInitCollective() { CollectiveInitializer::InitCollective(); }
|
||||
|
||||
void GPUDeprecatedInterface::GPUFinalizeCollective() { CollectiveInitializer::FinalizeCollective(); }
|
||||
|
||||
uint32_t GPUDeprecatedInterface::GPUGetRankID(const std::string &group_name) {
|
||||
return CollectiveInitializer::GetRankID(group_name);
|
||||
}
|
||||
|
||||
uint32_t GPUDeprecatedInterface::GPUGetRankSize(const std::string &group_name) {
|
||||
return CollectiveInitializer::GetRankSize(group_name);
|
||||
}
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -35,10 +35,6 @@ class GPUDeprecatedInterface : public DeprecatedInterface {
|
|||
int GetGPUCapabilityMajor() override;
|
||||
int GetGPUCapabilityMinor() override;
|
||||
int GetGPUMultiProcessorCount() override;
|
||||
void GPUInitCollective() override;
|
||||
void GPUFinalizeCollective() override;
|
||||
uint32_t GPUGetRankID(const std::string &group_name) override;
|
||||
uint32_t GPUGetRankSize(const std::string &group_name) override;
|
||||
};
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
|
|
|
@ -20,14 +20,8 @@
|
|||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void NcclGpuKernelMod::SelectCollectiveHandle() {
|
||||
use_mpi_ = false;
|
||||
if (use_mpi_) {
|
||||
collective_handle_ = device::gpu::CollectiveInitializer::instance().collective_handle();
|
||||
MS_EXCEPTION_IF_NULL(collective_handle_);
|
||||
} else {
|
||||
if (!LoadNvidiaCommLib()) {
|
||||
MS_LOG(EXCEPTION) << "Failed to load nivdia communication library.";
|
||||
}
|
||||
if (!LoadNvidiaCommLib()) {
|
||||
MS_LOG(EXCEPTION) << "Failed to load nivdia communication library.";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -46,128 +40,65 @@ bool NcclGpuKernelMod::LoadNvidiaCommLib() {
|
|||
|
||||
bool NcclGpuKernelMod::AllReduce(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
|
||||
ncclRedOp_t reduce_op, cudaStream_t stream, const std::string &group_name) {
|
||||
if (use_mpi_) {
|
||||
auto all_reduce_funcptr =
|
||||
reinterpret_cast<kernel::AllReduce>(dlsym(const_cast<void *>(collective_handle_), "AllReduce"));
|
||||
MS_EXCEPTION_IF_NULL(all_reduce_funcptr);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(
|
||||
kernel_node_, (*all_reduce_funcptr)(input_addr, output_addr, count, data_type, reduce_op, stream, group_name),
|
||||
"ncclAllReduce failed");
|
||||
} else {
|
||||
auto allreduce_func = DlsymFuncObj(AllReduce, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
|
||||
allreduce_func(input_addr, output_addr, count, data_type, reduce_op, group_name, stream),
|
||||
"ncclAllReduce failed");
|
||||
}
|
||||
auto allreduce_func = DlsymFuncObj(AllReduce, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
|
||||
allreduce_func(input_addr, output_addr, count, data_type, reduce_op, group_name, stream),
|
||||
"ncclAllReduce failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NcclGpuKernelMod::AllGather(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
|
||||
cudaStream_t stream, const std::string &group_name) {
|
||||
if (use_mpi_) {
|
||||
auto all_gather_funcptr =
|
||||
reinterpret_cast<kernel::AllGather>(dlsym(const_cast<void *>(collective_handle_), "AllGather"));
|
||||
MS_EXCEPTION_IF_NULL(all_gather_funcptr);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
|
||||
(*all_gather_funcptr)(input_addr, output_addr, count, data_type, stream, group_name),
|
||||
"ncclAllGather failed");
|
||||
} else {
|
||||
auto allgather_func = DlsymFuncObj(AllGather, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
|
||||
allgather_func(input_addr, output_addr, count, data_type, group_name, stream),
|
||||
"ncclAllGather failed");
|
||||
}
|
||||
auto allgather_func = DlsymFuncObj(AllGather, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
|
||||
allgather_func(input_addr, output_addr, count, data_type, group_name, stream),
|
||||
"ncclAllGather failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NcclGpuKernelMod::ReduceScatter(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
|
||||
ncclRedOp_t reduce_op, cudaStream_t stream, const std::string &group_name) {
|
||||
if (use_mpi_) {
|
||||
auto reduce_scatter_funcptr =
|
||||
reinterpret_cast<kernel::ReduceScatter>(dlsym(const_cast<void *>(collective_handle_), "ReduceScatter"));
|
||||
MS_EXCEPTION_IF_NULL(reduce_scatter_funcptr);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(
|
||||
kernel_node_, (*reduce_scatter_funcptr)(input_addr, output_addr, count, data_type, reduce_op, stream, group_name),
|
||||
"ncclReduceScatter failed");
|
||||
} else {
|
||||
auto reducescatter_func = DlsymFuncObj(ReduceScatter, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(
|
||||
kernel_node_, reducescatter_func(input_addr, output_addr, count, data_type, reduce_op, group_name, stream),
|
||||
"ncclReduceScatter failed");
|
||||
}
|
||||
auto reducescatter_func = DlsymFuncObj(ReduceScatter, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(
|
||||
kernel_node_, reducescatter_func(input_addr, output_addr, count, data_type, reduce_op, group_name, stream),
|
||||
"ncclReduceScatter failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NcclGpuKernelMod::Broadcast(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
|
||||
int root, cudaStream_t stream, const std::string &group_name) {
|
||||
if (use_mpi_) {
|
||||
auto broadcast_funcptr =
|
||||
reinterpret_cast<kernel::Broadcast>(dlsym(const_cast<void *>(collective_handle_), "Broadcast"));
|
||||
MS_EXCEPTION_IF_NULL(broadcast_funcptr);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(
|
||||
kernel_node_, (*broadcast_funcptr)(input_addr, output_addr, count, data_type, root, stream, group_name),
|
||||
"ncclBroadcast failed");
|
||||
} else {
|
||||
auto broadcast_func = DlsymFuncObj(Broadcast, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
|
||||
broadcast_func(input_addr, output_addr, count, data_type, root, group_name, stream),
|
||||
"ncclBroadcast failed");
|
||||
}
|
||||
auto broadcast_func = DlsymFuncObj(Broadcast, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
|
||||
broadcast_func(input_addr, output_addr, count, data_type, root, group_name, stream),
|
||||
"ncclBroadcast failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NcclGpuKernelMod::Send(const void *send_addr, size_t count, ncclDataType_t data_type, int peer_rank,
|
||||
cudaStream_t stream, const std::string &group_name) {
|
||||
if (use_mpi_) {
|
||||
auto nccl_send_func = reinterpret_cast<kernel::Send>(dlsym(const_cast<void *>(collective_handle_), "Send"));
|
||||
MS_EXCEPTION_IF_NULL(nccl_send_func);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(
|
||||
kernel_node_, (*nccl_send_func)(send_addr, count, data_type, peer_rank, stream, group_name), "ncclSend failed");
|
||||
} else {
|
||||
auto send_func = DlsymFuncObj(Send, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, send_func(send_addr, count, data_type, peer_rank, group_name, stream),
|
||||
"ncclSend failed");
|
||||
}
|
||||
auto send_func = DlsymFuncObj(Send, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, send_func(send_addr, count, data_type, peer_rank, group_name, stream),
|
||||
"ncclSend failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NcclGpuKernelMod::Recv(void *recv_addr, size_t count, ncclDataType_t data_type, int peer_rank, cudaStream_t stream,
|
||||
const std::string &group_name) {
|
||||
if (use_mpi_) {
|
||||
auto nccl_recv_func = reinterpret_cast<kernel::Recv>(dlsym(const_cast<void *>(collective_handle_), "Recv"));
|
||||
MS_EXCEPTION_IF_NULL(nccl_recv_func);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(
|
||||
kernel_node_, (*nccl_recv_func)(recv_addr, count, data_type, peer_rank, stream, group_name), "ncclRecv failed");
|
||||
} else {
|
||||
auto recv_func = DlsymFuncObj(Recv, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, recv_func(recv_addr, count, data_type, peer_rank, group_name, stream),
|
||||
"ncclRecv failed");
|
||||
}
|
||||
auto recv_func = DlsymFuncObj(Recv, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, recv_func(recv_addr, count, data_type, peer_rank, group_name, stream),
|
||||
"ncclRecv failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NcclGpuKernelMod::GroupStart() {
|
||||
if (use_mpi_) {
|
||||
auto nccl_gstart_func =
|
||||
reinterpret_cast<kernel::GroupStart>(dlsym(const_cast<void *>(collective_handle_), "GroupStart"));
|
||||
MS_EXCEPTION_IF_NULL(nccl_gstart_func);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, (*nccl_gstart_func)(), "ncclGroupStart failed");
|
||||
} else {
|
||||
auto groupstart_func = DlsymFuncObj(GroupStart, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, groupstart_func(), "ncclGroupStart failed");
|
||||
}
|
||||
auto groupstart_func = DlsymFuncObj(GroupStart, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, groupstart_func(), "ncclGroupStart failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NcclGpuKernelMod::GroupEnd() {
|
||||
if (use_mpi_) {
|
||||
auto nccl_gend_func = reinterpret_cast<kernel::GroupEnd>(dlsym(const_cast<void *>(collective_handle_), "GroupEnd"));
|
||||
MS_EXCEPTION_IF_NULL(nccl_gend_func);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, (*nccl_gend_func)(), "ncclGroupEnd failed");
|
||||
} else {
|
||||
auto groupend_func = DlsymFuncObj(GroupEnd, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, groupend_func(), "ncclGroupEnd failed");
|
||||
}
|
||||
auto groupend_func = DlsymFuncObj(GroupEnd, nvidia_collective_handle_);
|
||||
CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, groupend_func(), "ncclGroupEnd failed");
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
#include "plugin/device/gpu/kernel/gpu_kernel.h"
|
||||
#include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
|
||||
#include "plugin/device/gpu/kernel/kernel_constants.h"
|
||||
#include "plugin/device/gpu/hal/device/distribution/collective_init.h"
|
||||
#include "plugin/device/gpu/hal/hardware/nvidia_collective_comm_lib.h"
|
||||
#include "runtime/collective/collective_comm_lib_loader.h"
|
||||
|
||||
|
|
|
@ -1,89 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/collective/gpu_collective_init.h"
|
||||
#include "runtime/hardware/device_context_manager.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace gpu {
|
||||
void GPUCollectiveInitializer::InitCollective() {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (device_target != kGPUDevice) {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'init('nccl')', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
|
||||
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
auto deprecated_ptr = device_context->GetDeprecatedInterface();
|
||||
MS_EXCEPTION_IF_NULL(deprecated_ptr);
|
||||
deprecated_ptr->GPUInitCollective();
|
||||
}
|
||||
|
||||
void GPUCollectiveInitializer::FinalizeCollective() {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (device_target != kGPUDevice) {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'finalize('nccl')', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
|
||||
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
auto deprecated_ptr = device_context->GetDeprecatedInterface();
|
||||
MS_EXCEPTION_IF_NULL(deprecated_ptr);
|
||||
deprecated_ptr->GPUFinalizeCollective();
|
||||
}
|
||||
|
||||
uint32_t GPUCollectiveInitializer::GetRankID(const std::string &group_name) {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (device_target != kGPUDevice) {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'GetRankID', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
|
||||
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
auto deprecated_ptr = device_context->GetDeprecatedInterface();
|
||||
MS_EXCEPTION_IF_NULL(deprecated_ptr);
|
||||
return deprecated_ptr->GPUGetRankID(group_name);
|
||||
}
|
||||
|
||||
uint32_t GPUCollectiveInitializer::GetRankSize(const std::string &group_name) {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (device_target != kGPUDevice) {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'GetRankSize', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
|
||||
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
auto deprecated_ptr = device_context->GetDeprecatedInterface();
|
||||
MS_EXCEPTION_IF_NULL(deprecated_ptr);
|
||||
return deprecated_ptr->GPUGetRankSize(group_name);
|
||||
}
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -1,41 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_
|
||||
#define MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_
|
||||
|
||||
#include <string>
|
||||
#include "include/backend/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace gpu {
|
||||
class BACKEND_EXPORT GPUCollectiveInitializer {
|
||||
public:
|
||||
GPUCollectiveInitializer() = default;
|
||||
~GPUCollectiveInitializer() = default;
|
||||
GPUCollectiveInitializer(GPUCollectiveInitializer const &) = delete;
|
||||
GPUCollectiveInitializer &operator=(const GPUCollectiveInitializer &) = delete;
|
||||
static void InitCollective();
|
||||
static void FinalizeCollective();
|
||||
static uint32_t GetRankID(const std::string &group_name);
|
||||
static uint32_t GetRankSize(const std::string &group_name);
|
||||
};
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_
|
|
@ -61,10 +61,6 @@ class DeprecatedInterface {
|
|||
virtual int GetGPUCapabilityMajor() { return -1; }
|
||||
virtual int GetGPUCapabilityMinor() { return -1; }
|
||||
virtual int GetGPUMultiProcessorCount() { return -1; }
|
||||
virtual void GPUInitCollective() {}
|
||||
virtual void GPUFinalizeCollective() {}
|
||||
virtual uint32_t GPUGetRankID(const std::string &group_name) { return 0; }
|
||||
virtual uint32_t GPUGetRankSize(const std::string &group_name) { return 0; }
|
||||
};
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
|
Loading…
Reference in New Issue