Delete gpu_collective so for cloud side.

2022-11-11 22:36:29 +08:00 · 2022-11-11 22:36:29 +08:00 · b7df5799b2
parent 9887716115
commit b7df5799b2
15 changed files with 28 additions and 408 deletions
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@ -176,13 +176,6 @@ if(ENABLE_CPU)
 endif()

 if(ENABLE_MPI)
-    if(ENABLE_GPU)
-        install(
-            TARGETS _ms_mpi
-            DESTINATION ${INSTALL_BASE_DIR}
-            COMPONENT mindspore
-        )
-    endif()
    if(ENABLE_CPU)
        install(
            TARGETS mpi_adapter
@ -206,11 +199,6 @@ endif()

 if(ENABLE_GPU)
    if(ENABLE_MPI)
-        install(
-            TARGETS gpu_collective
-            DESTINATION ${INSTALL_LIB_DIR}
-            COMPONENT mindspore
-        )
        install(
          TARGETS nvidia_collective
          DESTINATION ${INSTALL_LIB_DIR}
--- a/cmake/package_mac.cmake
+++ b/cmake/package_mac.cmake
@ -133,13 +133,6 @@ if(ENABLE_CPU)
 endif()

 if(ENABLE_MPI)
-    if(ENABLE_GPU)
-        install(
-            TARGETS _ms_mpi
-            DESTINATION ${INSTALL_BASE_DIR}
-            COMPONENT mindspore
-        )
-  endif()
    if(ENABLE_CPU)
        install(
            TARGETS mpi_adapter
@ -150,13 +143,6 @@ if(ENABLE_MPI)
 endif()

 if(ENABLE_GPU)
-    if(ENABLE_MPI)
-        install(
-            TARGETS gpu_collective
-            DESTINATION ${INSTALL_LIB_DIR}
-            COMPONENT mindspore
-        )
-    endif()
    install(
        TARGETS gpu_queue
        DESTINATION ${INSTALL_LIB_DIR}
--- a/cmake/package_win.cmake
+++ b/cmake/package_win.cmake
@ -185,13 +185,6 @@ if(ENABLE_CPU)
 endif()

 if(ENABLE_MPI)
-  if(ENABLE_GPU)
-    install(
-      TARGETS _ms_mpi
-      DESTINATION ${INSTALL_BASE_DIR}
-      COMPONENT mindspore
-    )
-  endif()
  if(ENABLE_CPU)
    install(
      TARGETS mpi_adapter
@ -208,11 +201,6 @@ endif()

 if(ENABLE_GPU)
  if(ENABLE_MPI)
-    install(
-      TARGETS gpu_collective
-      DESTINATION ${INSTALL_LIB_DIR}
-      COMPONENT mindspore
-    )
    install(
      TARGETS nvidia_collective
      DESTINATION ${INSTALL_LIB_DIR}
--- a/mindspore/ccsrc/pipeline/jit/init.cc
+++ b/mindspore/ccsrc/pipeline/jit/init.cc
@ -32,7 +32,6 @@
 #include "include/common/utils/parallel_context.h"
 #include "frontend/parallel/costmodel_context.h"
 #include "frontend/optimizer/ad/bprop_utils.h"
-#include "runtime/collective/gpu_collective_init.h"
 #if ((defined ENABLE_CPU) && (!defined _WIN32))
 #include "ps/util.h"
 #endif
@ -455,15 +454,6 @@ PYBIND11_MODULE(_c_expression, m) {
    .def(py::init())
    .def_static("reg_op", &OpLib::RegOp, "Register op info.");

-  (void)m.def("init_gpu_collective", &mindspore::device::gpu::GPUCollectiveInitializer::InitCollective,
-              "Init gpu collective communication mode.");
-  (void)m.def("finalize_gpu_collective", &mindspore::device::gpu::GPUCollectiveInitializer::FinalizeCollective,
-              "Finalize gpu collective communication mode.");
-  (void)m.def("get_rank_id", &mindspore::device::gpu::GPUCollectiveInitializer::GetRankID,
-              "Finalize gpu collective communication mode.");
-  (void)m.def("get_rank_size", &mindspore::device::gpu::GPUCollectiveInitializer::GetRankSize,
-              "Finalize gpu collective communication mode.");
-
  (void)py::class_<CollectiveManager, std::shared_ptr<CollectiveManager>>(m, "CollectiveManager")
    .def_static("get_instance", &CollectiveManager::instance, "Get collective manager instance.")
    .def("create_group", &CollectiveManager::CreateCommunicationGroup, "Create collective group.")
--- a/mindspore/ccsrc/plugin/device/gpu/CMakeLists.txt
+++ b/mindspore/ccsrc/plugin/device/gpu/CMakeLists.txt
@ -90,8 +90,6 @@ endif()

 if(ENABLE_GPU)
    if(ENABLE_MPI)
-        set_target_properties(_ms_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
        set_target_properties(nvidia_collective PROPERTIES INSTALL_RPATH ${ORIGIN_PATH})
-        set_target_properties(gpu_collective PROPERTIES INSTALL_RPATH ${ORIGIN_PATH})
    endif()
 endif()
--- a/mindspore/ccsrc/plugin/device/gpu/hal/device/CMakeLists.txt
+++ b/mindspore/ccsrc/plugin/device/gpu/hal/device/CMakeLists.txt
@ -10,7 +10,6 @@ if(ENABLE_GPU)
    list(APPEND DEVICE_SRC_LIST ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/data_queue/data_queue.h)
    file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
    list(REMOVE_ITEM DEVICE_SRC_LIST
-            "mpi/mpi_initializer.cc"
            "distribution/collective_wrapper.cc"
            "distribution/mpi_wrapper.cc"
            "distribution/nccl_wrapper.cc"
@ -29,13 +28,6 @@ if(ENABLE_GPU)
 endif()

 if(ENABLE_GPU)
-    if(ENABLE_MPI)
-        set_property(SOURCE "mpi/mpi_initializer.cc"
-                PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
-        pybind11_add_module(_ms_mpi NO_EXTRAS "mpi/mpi_initializer.cc")
-        target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
-    endif()
-
    file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}  "*.cu")
    set_property(SOURCE ${CUDA_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)

@ -47,21 +39,6 @@ if(ENABLE_GPU)
        list(REMOVE_DUPLICATES CUDA_NVCC_FLAGS)
        cuda_add_library(gpu_hash_table STATIC ${CUDA_SRC_LIST})
    endif()
-
-    set(GPU_COLLECTIVE_SRCS "distribution/collective_wrapper.cc"
-                            "distribution/mpi_wrapper.cc"
-                            "distribution/nccl_wrapper.cc")
-
-
-    if(ENABLE_MPI)
-        include(ExternalProject)
-        # gpu_collective
-        set_property(SOURCE ${GPU_COLLECTIVE_SRCS}
-            PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
-        add_library(gpu_collective SHARED ${GPU_COLLECTIVE_SRCS})
-        target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl)
-        target_link_libraries(_ms_mpi PRIVATE gpu_collective)
-    endif()
 endif()

 set_property(SOURCE ${DEVICE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
--- a/mindspore/ccsrc/plugin/device/gpu/hal/device/mpi/mpi_initializer.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/hal/device/mpi/mpi_initializer.cc
@ -1,44 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "plugin/device/gpu/hal/device/mpi/mpi_initializer.h"
-#include <dlfcn.h>
-#include <mpi.h>
-#include <pybind11/operators.h>
-#include <iostream>
-#include <vector>
-#include <string>
-
-namespace mindspore {
-namespace device {
-namespace gpu {
-MPIInitializer &MPIInitializer::GetInstance() {
-  static MPIInitializer instance;
-  return instance;
-}
-
-int MPIInitializer::get_rank_id(const std::string &group) { return GetRankIDByGroup(group); }
-
-int MPIInitializer::get_rank_size(const std::string &group) { return GetGroupSize(group); }
-
-PYBIND11_MODULE(_ms_mpi, mpi_initializer) {
-  mpi_initializer.doc() = "mindspore mpi python wrapper";
-  mpi_initializer.def("get_rank_id", &MPIInitializer::get_rank_id, "get rank id");
-  mpi_initializer.def("get_rank_size", &MPIInitializer::get_rank_size, "get rank size");
-}
-}  // namespace gpu
-}  // namespace device
-}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/gpu/hal/device/mpi/mpi_initializer.h
+++ b/mindspore/ccsrc/plugin/device/gpu/hal/device/mpi/mpi_initializer.h
@ -1,42 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_MPI_MPI_INITIALIZER_H_
-#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_MPI_MPI_INITIALIZER_H_
-
-#include <string>
-#include "plugin/device/gpu/hal/device/distribution/collective_wrapper.h"
-
-namespace mindspore {
-namespace device {
-namespace gpu {
-class MPIInitializer {
- public:
-  MPIInitializer(MPIInitializer const &) = delete;
-  MPIInitializer &operator=(const MPIInitializer &) = delete;
-  static MPIInitializer &GetInstance();
-  static int get_rank_id(const std::string &group);
-  static int get_rank_size(const std::string &groups);
-
- private:
-  MPIInitializer() = default;
-  ~MPIInitializer() = default;
-};
-}  // namespace gpu
-}  // namespace device
-}  // namespace mindspore
-
-#endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_MPI_MPI_INITIALIZER_H_
--- a/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_deprecated_interface.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_deprecated_interface.cc
@ -18,7 +18,6 @@
 #include <cuda.h>
 #include <vector>
 #include <string>
-#include "plugin/device/gpu/hal/device/distribution/collective_init.h"

 namespace mindspore {
 namespace device {
@ -61,18 +60,6 @@ int GPUDeprecatedInterface::GetGPUMultiProcessorCount() {
  }
  return sm_count;
 }
-
-void GPUDeprecatedInterface::GPUInitCollective() { CollectiveInitializer::InitCollective(); }
-
-void GPUDeprecatedInterface::GPUFinalizeCollective() { CollectiveInitializer::FinalizeCollective(); }
-
-uint32_t GPUDeprecatedInterface::GPUGetRankID(const std::string &group_name) {
-  return CollectiveInitializer::GetRankID(group_name);
-}
-
-uint32_t GPUDeprecatedInterface::GPUGetRankSize(const std::string &group_name) {
-  return CollectiveInitializer::GetRankSize(group_name);
-}
 }  // namespace gpu
 }  // namespace device
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_deprecated_interface.h
+++ b/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_deprecated_interface.h
@ -35,10 +35,6 @@ class GPUDeprecatedInterface : public DeprecatedInterface {
  int GetGPUCapabilityMajor() override;
  int GetGPUCapabilityMinor() override;
  int GetGPUMultiProcessorCount() override;
-  void GPUInitCollective() override;
-  void GPUFinalizeCollective() override;
-  uint32_t GPUGetRankID(const std::string &group_name) override;
-  uint32_t GPUGetRankSize(const std::string &group_name) override;
 };
 }  // namespace gpu
 }  // namespace device
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_gpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_gpu_kernel.cc
@ -20,14 +20,8 @@
 namespace mindspore {
 namespace kernel {
 void NcclGpuKernelMod::SelectCollectiveHandle() {
-  use_mpi_ = false;
-  if (use_mpi_) {
-    collective_handle_ = device::gpu::CollectiveInitializer::instance().collective_handle();
-    MS_EXCEPTION_IF_NULL(collective_handle_);
-  } else {
-    if (!LoadNvidiaCommLib()) {
-      MS_LOG(EXCEPTION) << "Failed to load nivdia communication library.";
-    }
+  if (!LoadNvidiaCommLib()) {
+    MS_LOG(EXCEPTION) << "Failed to load nivdia communication library.";
  }
 }

@ -46,128 +40,65 @@ bool NcclGpuKernelMod::LoadNvidiaCommLib() {

 bool NcclGpuKernelMod::AllReduce(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
                                 ncclRedOp_t reduce_op, cudaStream_t stream, const std::string &group_name) {
-  if (use_mpi_) {
-    auto all_reduce_funcptr =
-      reinterpret_cast<kernel::AllReduce>(dlsym(const_cast<void *>(collective_handle_), "AllReduce"));
-    MS_EXCEPTION_IF_NULL(all_reduce_funcptr);
-    CHECK_NCCL_RET_WITH_EXCEPT(
-      kernel_node_, (*all_reduce_funcptr)(input_addr, output_addr, count, data_type, reduce_op, stream, group_name),
-      "ncclAllReduce failed");
-  } else {
-    auto allreduce_func = DlsymFuncObj(AllReduce, nvidia_collective_handle_);
-    CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
-                               allreduce_func(input_addr, output_addr, count, data_type, reduce_op, group_name, stream),
-                               "ncclAllReduce failed");
-  }
+  auto allreduce_func = DlsymFuncObj(AllReduce, nvidia_collective_handle_);
+  CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
+                             allreduce_func(input_addr, output_addr, count, data_type, reduce_op, group_name, stream),
+                             "ncclAllReduce failed");
  return true;
 }

 bool NcclGpuKernelMod::AllGather(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
                                 cudaStream_t stream, const std::string &group_name) {
-  if (use_mpi_) {
-    auto all_gather_funcptr =
-      reinterpret_cast<kernel::AllGather>(dlsym(const_cast<void *>(collective_handle_), "AllGather"));
-    MS_EXCEPTION_IF_NULL(all_gather_funcptr);
-    CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
-                               (*all_gather_funcptr)(input_addr, output_addr, count, data_type, stream, group_name),
-                               "ncclAllGather failed");
-  } else {
-    auto allgather_func = DlsymFuncObj(AllGather, nvidia_collective_handle_);
-    CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
-                               allgather_func(input_addr, output_addr, count, data_type, group_name, stream),
-                               "ncclAllGather failed");
-  }
+  auto allgather_func = DlsymFuncObj(AllGather, nvidia_collective_handle_);
+  CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
+                             allgather_func(input_addr, output_addr, count, data_type, group_name, stream),
+                             "ncclAllGather failed");
  return true;
 }

 bool NcclGpuKernelMod::ReduceScatter(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
                                     ncclRedOp_t reduce_op, cudaStream_t stream, const std::string &group_name) {
-  if (use_mpi_) {
-    auto reduce_scatter_funcptr =
-      reinterpret_cast<kernel::ReduceScatter>(dlsym(const_cast<void *>(collective_handle_), "ReduceScatter"));
-    MS_EXCEPTION_IF_NULL(reduce_scatter_funcptr);
-    CHECK_NCCL_RET_WITH_EXCEPT(
-      kernel_node_, (*reduce_scatter_funcptr)(input_addr, output_addr, count, data_type, reduce_op, stream, group_name),
-      "ncclReduceScatter failed");
-  } else {
-    auto reducescatter_func = DlsymFuncObj(ReduceScatter, nvidia_collective_handle_);
-    CHECK_NCCL_RET_WITH_EXCEPT(
-      kernel_node_, reducescatter_func(input_addr, output_addr, count, data_type, reduce_op, group_name, stream),
-      "ncclReduceScatter failed");
-  }
+  auto reducescatter_func = DlsymFuncObj(ReduceScatter, nvidia_collective_handle_);
+  CHECK_NCCL_RET_WITH_EXCEPT(
+    kernel_node_, reducescatter_func(input_addr, output_addr, count, data_type, reduce_op, group_name, stream),
+    "ncclReduceScatter failed");
  return true;
 }

 bool NcclGpuKernelMod::Broadcast(const void *input_addr, void *output_addr, size_t count, ncclDataType_t data_type,
                                 int root, cudaStream_t stream, const std::string &group_name) {
-  if (use_mpi_) {
-    auto broadcast_funcptr =
-      reinterpret_cast<kernel::Broadcast>(dlsym(const_cast<void *>(collective_handle_), "Broadcast"));
-    MS_EXCEPTION_IF_NULL(broadcast_funcptr);
-    CHECK_NCCL_RET_WITH_EXCEPT(
-      kernel_node_, (*broadcast_funcptr)(input_addr, output_addr, count, data_type, root, stream, group_name),
-      "ncclBroadcast failed");
-  } else {
-    auto broadcast_func = DlsymFuncObj(Broadcast, nvidia_collective_handle_);
-    CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
-                               broadcast_func(input_addr, output_addr, count, data_type, root, group_name, stream),
-                               "ncclBroadcast failed");
-  }
+  auto broadcast_func = DlsymFuncObj(Broadcast, nvidia_collective_handle_);
+  CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_,
+                             broadcast_func(input_addr, output_addr, count, data_type, root, group_name, stream),
+                             "ncclBroadcast failed");
  return true;
 }

 bool NcclGpuKernelMod::Send(const void *send_addr, size_t count, ncclDataType_t data_type, int peer_rank,
                            cudaStream_t stream, const std::string &group_name) {
-  if (use_mpi_) {
-    auto nccl_send_func = reinterpret_cast<kernel::Send>(dlsym(const_cast<void *>(collective_handle_), "Send"));
-    MS_EXCEPTION_IF_NULL(nccl_send_func);
-    CHECK_NCCL_RET_WITH_EXCEPT(
-      kernel_node_, (*nccl_send_func)(send_addr, count, data_type, peer_rank, stream, group_name), "ncclSend failed");
-  } else {
-    auto send_func = DlsymFuncObj(Send, nvidia_collective_handle_);
-    CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, send_func(send_addr, count, data_type, peer_rank, group_name, stream),
-                               "ncclSend failed");
-  }
+  auto send_func = DlsymFuncObj(Send, nvidia_collective_handle_);
+  CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, send_func(send_addr, count, data_type, peer_rank, group_name, stream),
+                             "ncclSend failed");
  return true;
 }

 bool NcclGpuKernelMod::Recv(void *recv_addr, size_t count, ncclDataType_t data_type, int peer_rank, cudaStream_t stream,
                            const std::string &group_name) {
-  if (use_mpi_) {
-    auto nccl_recv_func = reinterpret_cast<kernel::Recv>(dlsym(const_cast<void *>(collective_handle_), "Recv"));
-    MS_EXCEPTION_IF_NULL(nccl_recv_func);
-    CHECK_NCCL_RET_WITH_EXCEPT(
-      kernel_node_, (*nccl_recv_func)(recv_addr, count, data_type, peer_rank, stream, group_name), "ncclRecv failed");
-  } else {
-    auto recv_func = DlsymFuncObj(Recv, nvidia_collective_handle_);
-    CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, recv_func(recv_addr, count, data_type, peer_rank, group_name, stream),
-                               "ncclRecv failed");
-  }
+  auto recv_func = DlsymFuncObj(Recv, nvidia_collective_handle_);
+  CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, recv_func(recv_addr, count, data_type, peer_rank, group_name, stream),
+                             "ncclRecv failed");
  return true;
 }

 bool NcclGpuKernelMod::GroupStart() {
-  if (use_mpi_) {
-    auto nccl_gstart_func =
-      reinterpret_cast<kernel::GroupStart>(dlsym(const_cast<void *>(collective_handle_), "GroupStart"));
-    MS_EXCEPTION_IF_NULL(nccl_gstart_func);
-    CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, (*nccl_gstart_func)(), "ncclGroupStart failed");
-  } else {
-    auto groupstart_func = DlsymFuncObj(GroupStart, nvidia_collective_handle_);
-    CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, groupstart_func(), "ncclGroupStart failed");
-  }
+  auto groupstart_func = DlsymFuncObj(GroupStart, nvidia_collective_handle_);
+  CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, groupstart_func(), "ncclGroupStart failed");
  return true;
 }

 bool NcclGpuKernelMod::GroupEnd() {
-  if (use_mpi_) {
-    auto nccl_gend_func = reinterpret_cast<kernel::GroupEnd>(dlsym(const_cast<void *>(collective_handle_), "GroupEnd"));
-    MS_EXCEPTION_IF_NULL(nccl_gend_func);
-    CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, (*nccl_gend_func)(), "ncclGroupEnd failed");
-  } else {
-    auto groupend_func = DlsymFuncObj(GroupEnd, nvidia_collective_handle_);
-    CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, groupend_func(), "ncclGroupEnd failed");
-  }
+  auto groupend_func = DlsymFuncObj(GroupEnd, nvidia_collective_handle_);
+  CHECK_NCCL_RET_WITH_EXCEPT(kernel_node_, groupend_func(), "ncclGroupEnd failed");
  return true;
 }
 }  // namespace kernel
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nccl/nccl_gpu_kernel.h
@ -24,7 +24,6 @@
 #include "plugin/device/gpu/kernel/gpu_kernel.h"
 #include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
 #include "plugin/device/gpu/kernel/kernel_constants.h"
-#include "plugin/device/gpu/hal/device/distribution/collective_init.h"
 #include "plugin/device/gpu/hal/hardware/nvidia_collective_comm_lib.h"
 #include "runtime/collective/collective_comm_lib_loader.h"

--- a/mindspore/ccsrc/runtime/collective/gpu_collective_init.cc
+++ b/mindspore/ccsrc/runtime/collective/gpu_collective_init.cc
@ -1,89 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "runtime/collective/gpu_collective_init.h"
-#include "runtime/hardware/device_context_manager.h"
-#include "utils/log_adapter.h"
-
-namespace mindspore {
-namespace device {
-namespace gpu {
-void GPUCollectiveInitializer::InitCollective() {
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-  if (device_target != kGPUDevice) {
-    MS_LOG(EXCEPTION) << "You are trying to call 'init('nccl')', Please check "
-                         "this MindSpore package is GPU version and built with NCCL.";
-  }
-  const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
-    {kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
-  MS_EXCEPTION_IF_NULL(device_context);
-  auto deprecated_ptr = device_context->GetDeprecatedInterface();
-  MS_EXCEPTION_IF_NULL(deprecated_ptr);
-  deprecated_ptr->GPUInitCollective();
-}
-
-void GPUCollectiveInitializer::FinalizeCollective() {
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-  if (device_target != kGPUDevice) {
-    MS_LOG(EXCEPTION) << "You are trying to call 'finalize('nccl')', Please check "
-                         "this MindSpore package is GPU version and built with NCCL.";
-  }
-  const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
-    {kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
-  MS_EXCEPTION_IF_NULL(device_context);
-  auto deprecated_ptr = device_context->GetDeprecatedInterface();
-  MS_EXCEPTION_IF_NULL(deprecated_ptr);
-  deprecated_ptr->GPUFinalizeCollective();
-}
-
-uint32_t GPUCollectiveInitializer::GetRankID(const std::string &group_name) {
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-  if (device_target != kGPUDevice) {
-    MS_LOG(EXCEPTION) << "You are trying to call 'GetRankID', Please check "
-                         "this MindSpore package is GPU version and built with NCCL.";
-  }
-  const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
-    {kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
-  MS_EXCEPTION_IF_NULL(device_context);
-  auto deprecated_ptr = device_context->GetDeprecatedInterface();
-  MS_EXCEPTION_IF_NULL(deprecated_ptr);
-  return deprecated_ptr->GPUGetRankID(group_name);
-}
-
-uint32_t GPUCollectiveInitializer::GetRankSize(const std::string &group_name) {
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-  if (device_target != kGPUDevice) {
-    MS_LOG(EXCEPTION) << "You are trying to call 'GetRankSize', Please check "
-                         "this MindSpore package is GPU version and built with NCCL.";
-  }
-  const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
-    {kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
-  MS_EXCEPTION_IF_NULL(device_context);
-  auto deprecated_ptr = device_context->GetDeprecatedInterface();
-  MS_EXCEPTION_IF_NULL(deprecated_ptr);
-  return deprecated_ptr->GPUGetRankSize(group_name);
-}
-}  // namespace gpu
-}  // namespace device
-}  // namespace mindspore
--- a/mindspore/ccsrc/runtime/collective/gpu_collective_init.h
+++ b/mindspore/ccsrc/runtime/collective/gpu_collective_init.h
@ -1,41 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_
-#define MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_
-
-#include <string>
-#include "include/backend/visible.h"
-
-namespace mindspore {
-namespace device {
-namespace gpu {
-class BACKEND_EXPORT GPUCollectiveInitializer {
- public:
-  GPUCollectiveInitializer() = default;
-  ~GPUCollectiveInitializer() = default;
-  GPUCollectiveInitializer(GPUCollectiveInitializer const &) = delete;
-  GPUCollectiveInitializer &operator=(const GPUCollectiveInitializer &) = delete;
-  static void InitCollective();
-  static void FinalizeCollective();
-  static uint32_t GetRankID(const std::string &group_name);
-  static uint32_t GetRankSize(const std::string &group_name);
-};
-}  // namespace gpu
-}  // namespace device
-}  // namespace mindspore
-
-#endif  // MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_
--- a/mindspore/ccsrc/runtime/hardware/deprecated_interface.h
+++ b/mindspore/ccsrc/runtime/hardware/deprecated_interface.h
@ -61,10 +61,6 @@ class DeprecatedInterface {
  virtual int GetGPUCapabilityMajor() { return -1; }
  virtual int GetGPUCapabilityMinor() { return -1; }
  virtual int GetGPUMultiProcessorCount() { return -1; }
-  virtual void GPUInitCollective() {}
-  virtual void GPUFinalizeCollective() {}
-  virtual uint32_t GPUGetRankID(const std::string &group_name) { return 0; }
-  virtual uint32_t GPUGetRankSize(const std::string &group_name) { return 0; }
 };
 }  // namespace device
 }  // namespace mindspore