forked from mindspore-Ecosystem/mindspore
support host reduce scatter and allgather
This commit is contained in:
parent
5306172fee
commit
af8108c9e1
3
build.sh
3
build.sh
|
@ -86,7 +86,7 @@ checkopts()
|
||||||
ENABLE_DUMPE2E="off"
|
ENABLE_DUMPE2E="off"
|
||||||
ENABLE_DUMP_IR="on"
|
ENABLE_DUMP_IR="on"
|
||||||
COMPILE_MINDDATA="on"
|
COMPILE_MINDDATA="on"
|
||||||
ENABLE_MPI="on"
|
ENABLE_MPI="off"
|
||||||
CUDA_VERSION="9.2"
|
CUDA_VERSION="9.2"
|
||||||
COMPILE_PREDICT="off"
|
COMPILE_PREDICT="off"
|
||||||
USE_GLOG="on"
|
USE_GLOG="on"
|
||||||
|
@ -168,6 +168,7 @@ checkopts()
|
||||||
if [[ "X$OPTARG" == "Xgpu" ]]; then
|
if [[ "X$OPTARG" == "Xgpu" ]]; then
|
||||||
ENABLE_GPU="on"
|
ENABLE_GPU="on"
|
||||||
ENABLE_CPU="on"
|
ENABLE_CPU="on"
|
||||||
|
ENABLE_MPI="on"
|
||||||
elif [[ "X$OPTARG" == "Xd" || "X$OPTARG" == "Xascend" ]]; then
|
elif [[ "X$OPTARG" == "Xd" || "X$OPTARG" == "Xascend" ]]; then
|
||||||
ENABLE_D="on"
|
ENABLE_D="on"
|
||||||
ENABLE_CPU="on"
|
ENABLE_CPU="on"
|
||||||
|
|
|
@ -26,6 +26,9 @@ include_directories(${Python3_INCLUDE_DIRS})
|
||||||
include_directories(${CMAKE_SOURCE_DIR}/third_party)
|
include_directories(${CMAKE_SOURCE_DIR}/third_party)
|
||||||
if (ENABLE_CPU)
|
if (ENABLE_CPU)
|
||||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/mkl_dnn.cmake)
|
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/mkl_dnn.cmake)
|
||||||
|
if (ENABLE_MPI)
|
||||||
|
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (ENABLE_GPU)
|
if (ENABLE_GPU)
|
||||||
|
@ -36,7 +39,6 @@ if (ENABLE_GPU)
|
||||||
|
|
||||||
if (ENABLE_MPI)
|
if (ENABLE_MPI)
|
||||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/nccl.cmake)
|
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/nccl.cmake)
|
||||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ompi.cmake)
|
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -109,19 +109,20 @@ if (ENABLE_CPU)
|
||||||
)
|
)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (ENABLE_MPI)
|
||||||
|
install(
|
||||||
|
TARGETS _ms_mpi
|
||||||
|
DESTINATION ${INSTALL_BASE_DIR}
|
||||||
|
COMPONENT mindspore
|
||||||
|
)
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (ENABLE_GPU)
|
if (ENABLE_GPU)
|
||||||
if (ENABLE_MPI)
|
|
||||||
install(
|
|
||||||
TARGETS _ms_mpi
|
|
||||||
DESTINATION ${INSTALL_BASE_DIR}
|
|
||||||
COMPONENT mindspore
|
|
||||||
)
|
|
||||||
install(
|
install(
|
||||||
TARGETS gpu_collective
|
TARGETS gpu_collective
|
||||||
DESTINATION ${INSTALL_LIB_DIR}
|
DESTINATION ${INSTALL_LIB_DIR}
|
||||||
COMPONENT mindspore
|
COMPONENT mindspore
|
||||||
)
|
)
|
||||||
endif ()
|
|
||||||
install(
|
install(
|
||||||
TARGETS gpu_queue
|
TARGETS gpu_queue
|
||||||
DESTINATION ${INSTALL_LIB_DIR}
|
DESTINATION ${INSTALL_LIB_DIR}
|
||||||
|
|
|
@ -8,6 +8,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||||
add_compile_definitions(BUILDING_DLL)
|
add_compile_definitions(BUILDING_DLL)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (ENABLE_MPI)
|
||||||
|
add_compile_definitions(ENABLE_MPI)
|
||||||
|
endif ()
|
||||||
|
|
||||||
if(ENABLE_GPU)
|
if(ENABLE_GPU)
|
||||||
find_package(CUDA REQUIRED)
|
find_package(CUDA REQUIRED)
|
||||||
find_package(Threads)
|
find_package(Threads)
|
||||||
|
@ -120,7 +124,11 @@ endforeach ()
|
||||||
set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
|
set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
|
||||||
add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
|
add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
|
||||||
target_link_libraries(mindspore proto_input)
|
target_link_libraries(mindspore proto_input)
|
||||||
target_link_libraries(mindspore securec mindspore::flatbuffers)
|
if (ENABLE_CPU AND ENABLE_MPI)
|
||||||
|
target_link_libraries(mindspore securec mindspore::flatbuffers mindspore::ompi)
|
||||||
|
else ()
|
||||||
|
target_link_libraries(mindspore securec mindspore::flatbuffers)
|
||||||
|
endif ()
|
||||||
if (NOT WIN32)
|
if (NOT WIN32)
|
||||||
target_link_libraries(mindspore dl)
|
target_link_libraries(mindspore dl)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -14,6 +14,15 @@ endif ()
|
||||||
|
|
||||||
if (ENABLE_CPU)
|
if (ENABLE_CPU)
|
||||||
file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cpu/*.cc")
|
file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cpu/*.cc")
|
||||||
|
if (ENABLE_MPI)
|
||||||
|
# _ms_mpi
|
||||||
|
set_property(SOURCE "gpu/mpi/mpi_initializer.cc"
|
||||||
|
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
|
||||||
|
pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc")
|
||||||
|
target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
|
||||||
|
else ()
|
||||||
|
list(REMOVE_ITEM CPU_SRC_LIST "cpu/mpi/mpi_adapter.cc")
|
||||||
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
# gpu
|
# gpu
|
||||||
|
@ -39,11 +48,6 @@ if (ENABLE_GPU)
|
||||||
set_property(SOURCE ${GPU_COLLECTIVE_SRCS}
|
set_property(SOURCE ${GPU_COLLECTIVE_SRCS}
|
||||||
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
|
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
|
||||||
add_library(gpu_collective SHARED ${GPU_COLLECTIVE_SRCS})
|
add_library(gpu_collective SHARED ${GPU_COLLECTIVE_SRCS})
|
||||||
# _ms_mpi
|
|
||||||
set_property(SOURCE "gpu/mpi/mpi_initializer.cc"
|
|
||||||
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
|
|
||||||
pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc")
|
|
||||||
target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
|
|
||||||
target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl)
|
target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "device/ascend/ascend_kernel_runtime.h"
|
#include "device/ascend/ascend_kernel_runtime.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
@ -24,6 +23,7 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#include "device/ascend/ascend_device_address.h"
|
#include "device/ascend/ascend_device_address.h"
|
||||||
|
#include "device/cpu/mpi/mpi_adapter.h"
|
||||||
#include "utils/context/ms_context.h"
|
#include "utils/context/ms_context.h"
|
||||||
#include "device/ascend/profiling/profiling_manager.h"
|
#include "device/ascend/profiling/profiling_manager.h"
|
||||||
#include "hccl/hcom.h"
|
#include "hccl/hcom.h"
|
||||||
|
@ -510,11 +510,19 @@ bool AscendKernelRuntime::HcclInit() {
|
||||||
MS_LOG(ERROR) << "file path " << config_path_str << " does not exist";
|
MS_LOG(ERROR) << "file path " << config_path_str << " does not exist";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
#ifdef ENABLE_MPI
|
||||||
|
int rank_id = device::cpu::MPIAdapter::Instance().GetRankId();
|
||||||
|
const char *offset = std::getenv("RANK_OFFSET");
|
||||||
|
if (offset != nullptr) {
|
||||||
|
int rank_offset = std::stoi(offset);
|
||||||
|
rank_id += rank_offset;
|
||||||
|
}
|
||||||
|
const char *identify = reinterpret_cast<const char *>(std::to_string(rank_id).c_str());
|
||||||
|
#else
|
||||||
const char *identify = std::getenv("RANK_ID");
|
const char *identify = std::getenv("RANK_ID");
|
||||||
|
#endif
|
||||||
if (identify == nullptr) {
|
if (identify == nullptr) {
|
||||||
MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID";
|
MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID";
|
||||||
free(full_path);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << identify;
|
MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << identify;
|
||||||
|
|
|
@ -0,0 +1,191 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "device/cpu/mpi/mpi_adapter.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include "utils/log_adapter.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace device {
|
||||||
|
namespace cpu {
|
||||||
|
namespace {
|
||||||
|
MPI_Op GetMpiOp(const std::string &op_type) {
|
||||||
|
if (op_type == "sum") {
|
||||||
|
return MPI_SUM;
|
||||||
|
} else if (op_type == "max") {
|
||||||
|
return MPI_MAX;
|
||||||
|
} else if (op_type == "min") {
|
||||||
|
return MPI_MIN;
|
||||||
|
} else if (op_type == "prod") {
|
||||||
|
return MPI_PROD;
|
||||||
|
}
|
||||||
|
MS_LOG(EXCEPTION) << "unsupport op_type:" << op_type;
|
||||||
|
return MPI_SUM;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
MPIAdapter::MPIAdapter() : rank_id_(0), rank_size_(0), comm_group_world_(MPI_GROUP_NULL) { Init(); }
|
||||||
|
|
||||||
|
MPIAdapter::~MPIAdapter() {
|
||||||
|
for (auto iter = ranks_group_.begin(); iter != ranks_group_.end(); ++iter) {
|
||||||
|
MPI_Group_free(&iter->second);
|
||||||
|
}
|
||||||
|
if (comm_group_world_ != MPI_GROUP_NULL) {
|
||||||
|
MPI_Group_free(&comm_group_world_);
|
||||||
|
}
|
||||||
|
int finalized;
|
||||||
|
MPI_Finalized(&finalized);
|
||||||
|
if (finalized == 0) {
|
||||||
|
MPI_Finalize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MPIAdapter &MPIAdapter::Instance() {
|
||||||
|
static MPIAdapter instance;
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
int MPIAdapter::GetRankId() const { return rank_id_; }
|
||||||
|
|
||||||
|
void MPIAdapter::Init() {
|
||||||
|
static bool init = false;
|
||||||
|
if (init) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int init_flag = 0;
|
||||||
|
if (MPI_Initialized(&init_flag) != MPI_SUCCESS) {
|
||||||
|
MS_LOG(EXCEPTION) << "Check mpi initialized fail!";
|
||||||
|
}
|
||||||
|
if (init_flag == 0) {
|
||||||
|
auto ret = MPI_Init(nullptr, nullptr);
|
||||||
|
if (ret != MPI_SUCCESS) {
|
||||||
|
MS_LOG(EXCEPTION) << "Failed to init mpi!";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Comm_group(MPI_COMM_WORLD, &comm_group_world_);
|
||||||
|
if (comm_group_world_ == MPI_GROUP_NULL) {
|
||||||
|
MS_LOG(EXCEPTION) << "comm_group_world_ init fail!";
|
||||||
|
}
|
||||||
|
auto ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_);
|
||||||
|
if (ret != MPI_SUCCESS) {
|
||||||
|
MS_LOG(EXCEPTION) << "Failed to init mpi rank id!";
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = MPI_Comm_size(MPI_COMM_WORLD, &rank_size_);
|
||||||
|
if (ret != MPI_SUCCESS) {
|
||||||
|
MS_LOG(EXCEPTION) << "Failed to init mpi rank size!rankid:" << rank_id_;
|
||||||
|
}
|
||||||
|
init = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Group MPIAdapter::AddGroup(const std::vector<int> &ranks) {
|
||||||
|
if (ranks.size() > static_cast<size_t>(rank_size_) || ranks.empty()) {
|
||||||
|
MS_LOG(EXCEPTION) << "input rank size: " << ranks.size() << ", max rank size: " << rank_size_;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std::find(ranks.begin(), ranks.end(), rank_id_) == ranks.end()) {
|
||||||
|
MS_LOG(ERROR) << "rankid:" << rank_id_ << " is not in the input group.";
|
||||||
|
return MPI_GROUP_NULL;
|
||||||
|
}
|
||||||
|
std::lock_guard<std::mutex> lock(group_mutex_);
|
||||||
|
auto iter = ranks_group_.find(ranks);
|
||||||
|
if (iter != ranks_group_.end()) {
|
||||||
|
return iter->second;
|
||||||
|
}
|
||||||
|
const auto ranks_size = ranks.size();
|
||||||
|
std::vector<int> ranks_input(ranks_size, 0);
|
||||||
|
for (size_t i = 0; i < ranks_size; ++i) {
|
||||||
|
ranks_input[i] = ranks[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Group group = MPI_GROUP_NULL;
|
||||||
|
MPI_Group_incl(comm_group_world_, ranks.size(), ranks_input.data(), &group);
|
||||||
|
if (group == MPI_GROUP_NULL) {
|
||||||
|
MS_LOG(EXCEPTION) << "create mpi group fail!rankid:" << rank_id_;
|
||||||
|
}
|
||||||
|
|
||||||
|
ranks_group_[ranks] = group;
|
||||||
|
MS_LOG(INFO) << "rank:" << rank_id_ << " add group:" << group;
|
||||||
|
return group;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MPIAdapter::ReduceScatter(float *input, float *output, const std::vector<int> &ranks_group, size_t data_num,
|
||||||
|
const std::string &op_type) {
|
||||||
|
if (ranks_group.empty()) {
|
||||||
|
MS_LOG(ERROR) << "input rank group is empty!";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto group = AddGroup(ranks_group);
|
||||||
|
if (group == MPI_GROUP_NULL) {
|
||||||
|
MS_LOG(EXCEPTION) << "Get mpi group fail!rankid:" << rank_id_;
|
||||||
|
}
|
||||||
|
MPI_Comm comm;
|
||||||
|
MPI_Comm_create_group(MPI_COMM_WORLD, group, 0, &comm);
|
||||||
|
if (comm == MPI_COMM_NULL) {
|
||||||
|
MS_LOG(EXCEPTION) << "create mpi comm fail!rankid:" << rank_id_;
|
||||||
|
}
|
||||||
|
std::vector<int> receive_count(ranks_group.size(), 0);
|
||||||
|
for (size_t i = 0; i < ranks_group.size(); ++i) {
|
||||||
|
receive_count[i] = data_num;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto op = GetMpiOp(op_type);
|
||||||
|
auto ret = MPI_Reduce_scatter(input, output, receive_count.data(), MPI_FLOAT, op, comm);
|
||||||
|
bool result = true;
|
||||||
|
if (ret != MPI_SUCCESS) {
|
||||||
|
MS_LOG(ERROR) << "mpi reduce_scatter fail!ret = " << ret << ", rankid:" << rank_id_;
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = MPI_Comm_free(&comm);
|
||||||
|
if (ret != MPI_SUCCESS) {
|
||||||
|
MS_LOG(WARNING) << "mpi comm free fail! ret = " << ret << ", rankid:" << rank_id_;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MPIAdapter::AllGather(float *input, float *output, const std::vector<int> &ranks_group, size_t data_num) {
|
||||||
|
if (ranks_group.empty()) {
|
||||||
|
MS_LOG(ERROR) << "input rank group is empty!";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto group = AddGroup(ranks_group);
|
||||||
|
if (group == MPI_GROUP_NULL) {
|
||||||
|
MS_LOG(EXCEPTION) << "Get mpi group fail! rankid:" << rank_id_;
|
||||||
|
}
|
||||||
|
MPI_Comm comm;
|
||||||
|
MPI_Comm_create_group(MPI_COMM_WORLD, group, 0, &comm);
|
||||||
|
if (comm == MPI_COMM_NULL) {
|
||||||
|
MS_LOG(EXCEPTION) << "create mpi comm fail! rankid:" << rank_id_;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ret = MPI_Allgather(input, data_num, MPI_FLOAT, output, data_num, MPI_FLOAT, comm);
|
||||||
|
bool result = true;
|
||||||
|
if (ret != MPI_SUCCESS) {
|
||||||
|
MS_LOG(ERROR) << "mpi allgater fail!ret = " << ret << ", rankid:" << rank_id_;
|
||||||
|
result = false;
|
||||||
|
}
|
||||||
|
ret = MPI_Comm_free(&comm);
|
||||||
|
if (ret != MPI_SUCCESS) {
|
||||||
|
MS_LOG(WARNING) << "mpi comm free fail!ret = " << ret << ",rankid:" << rank_id_;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace device
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,55 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_DEVICE_CPU_MPI_MPI_ADAPTER_H_
|
||||||
|
#define MINDSPORE_CCSRC_DEVICE_CPU_MPI_MPI_ADAPTER_H_
|
||||||
|
#ifdef ENABLE_MPI
|
||||||
|
#include <mpi.h>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace device {
|
||||||
|
namespace cpu {
|
||||||
|
constexpr auto kOpTypeSum = "sum";
|
||||||
|
class MPIAdapter {
|
||||||
|
public:
|
||||||
|
~MPIAdapter();
|
||||||
|
static MPIAdapter &Instance();
|
||||||
|
int GetRankId() const;
|
||||||
|
bool ReduceScatter(float *input, float *output, const std::vector<int> &ranks_group, size_t data_num,
|
||||||
|
const std::string &op_type = kOpTypeSum);
|
||||||
|
bool AllGather(float *input, float *output, const std::vector<int> &ranks_group, size_t data_num);
|
||||||
|
|
||||||
|
private:
|
||||||
|
MPIAdapter();
|
||||||
|
void Init();
|
||||||
|
MPI_Group AddGroup(const std::vector<int> &ranks);
|
||||||
|
|
||||||
|
int rank_id_;
|
||||||
|
int rank_size_;
|
||||||
|
MPI_Group comm_group_world_;
|
||||||
|
// key:ranks group, value: mpi group
|
||||||
|
std::map<std::vector<int>, MPI_Group> ranks_group_;
|
||||||
|
std::mutex group_mutex_;
|
||||||
|
};
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace device
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // ENABLE_MPI
|
||||||
|
#endif // MINDSPORE_CCSRC_DEVICE_CPU_MPI_MPI_ADAPTER_H_
|
|
@ -20,7 +20,6 @@
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
|
||||||
class CollectiveFakeInitializer {
|
class CollectiveFakeInitializer {
|
||||||
public:
|
public:
|
||||||
CollectiveFakeInitializer() = default;
|
CollectiveFakeInitializer() = default;
|
||||||
|
|
|
@ -24,10 +24,28 @@ namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
MPIInitializer::MPIInitializer() {
|
MPIInitializer::MPIInitializer() {
|
||||||
|
int init_flag = 0;
|
||||||
|
if (MPI_Initialized(&init_flag) != MPI_SUCCESS) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (init_flag == 0) {
|
||||||
|
auto ret = MPI_Init(nullptr, nullptr);
|
||||||
|
if (ret != MPI_SUCCESS) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_);
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank_id_);
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &rank_size_);
|
MPI_Comm_size(MPI_COMM_WORLD, &rank_size_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MPIInitializer::~MPIInitializer() {
|
||||||
|
int finalized_flag = 0;
|
||||||
|
(void)MPI_Finalized(&finalized_flag);
|
||||||
|
if (finalized_flag == 0) {
|
||||||
|
(void)MPI_Finalize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
MPIInitializer &MPIInitializer::GetInstance() {
|
MPIInitializer &MPIInitializer::GetInstance() {
|
||||||
static MPIInitializer instance;
|
static MPIInitializer instance;
|
||||||
return instance;
|
return instance;
|
||||||
|
|
|
@ -30,7 +30,7 @@ class MPIInitializer {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MPIInitializer();
|
MPIInitializer();
|
||||||
~MPIInitializer() = default;
|
~MPIInitializer();
|
||||||
|
|
||||||
int rank_id_;
|
int rank_id_;
|
||||||
int rank_size_;
|
int rank_size_;
|
||||||
|
|
|
@ -21,6 +21,11 @@ if (ENABLE_CPU)
|
||||||
file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
"cpu/*.cc"
|
"cpu/*.cc"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (NOT ENABLE_MPI)
|
||||||
|
list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc")
|
||||||
|
list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc")
|
||||||
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (ENABLE_GPU)
|
if (ENABLE_GPU)
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "kernel/cpu/allgather_cpu_kernel.h"
|
||||||
|
#include "device/cpu/cpu_device_address.h"
|
||||||
|
#include "device/cpu/mpi/mpi_adapter.h"
|
||||||
|
#include "ir/primitive.h"
|
||||||
|
#include "utils/log_adapter.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace kernel {
|
||||||
|
namespace {
|
||||||
|
constexpr auto kRanksGroup = "group";
|
||||||
|
constexpr auto kAllGatherInputNum = 1;
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
AllGatherCPUKernel::AllGatherCPUKernel() : input_data_number_(0) {}
|
||||||
|
|
||||||
|
void AllGatherCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||||
|
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||||
|
if (input_num != kAllGatherInputNum) {
|
||||||
|
MS_LOG(EXCEPTION) << "allgather input num:" << input_num;
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < input_num; ++i) {
|
||||||
|
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i);
|
||||||
|
size_t count = 1;
|
||||||
|
for (size_t j = 0; j < shape.size(); j++) {
|
||||||
|
count *= IntToSize(shape[j]);
|
||||||
|
}
|
||||||
|
input_data_number_ += count;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ranks_group = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kRanksGroup);
|
||||||
|
if (ranks_group != nullptr) {
|
||||||
|
ranks_group_ = GetValue<std::vector<int>>(ranks_group);
|
||||||
|
} else {
|
||||||
|
MS_LOG(EXCEPTION) << "Miss attribute " << kRanksGroup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AllGatherCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||||
|
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||||
|
const std::vector<kernel::AddressPtr> &outputs) {
|
||||||
|
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
|
||||||
|
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
|
||||||
|
|
||||||
|
return device::cpu::MPIAdapter::Instance().AllGather(input_addr, output_addr, ranks_group_, input_data_number_);
|
||||||
|
}
|
||||||
|
} // namespace kernel
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,45 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
|
||||||
|
#define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
|
||||||
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
|
#include "kernel/cpu/cpu_kernel.h"
|
||||||
|
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace kernel {
|
||||||
|
class AllGatherCPUKernel : public CPUKernel {
|
||||||
|
public:
|
||||||
|
AllGatherCPUKernel();
|
||||||
|
~AllGatherCPUKernel() override = default;
|
||||||
|
|
||||||
|
void InitKernel(const CNodePtr &kernel_node) override;
|
||||||
|
|
||||||
|
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||||
|
const std::vector<AddressPtr> &outputs) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t input_data_number_;
|
||||||
|
std::vector<int> ranks_group_;
|
||||||
|
};
|
||||||
|
|
||||||
|
MS_REG_CPU_KERNEL(HostAllGather, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||||
|
AllGatherCPUKernel);
|
||||||
|
} // namespace kernel
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
|
|
@ -0,0 +1,62 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "kernel/cpu/reduce_scatter_cpu_kernel.h"
|
||||||
|
#include "device/cpu/cpu_device_address.h"
|
||||||
|
#include "device/cpu/mpi/mpi_adapter.h"
|
||||||
|
#include "ir/primitive.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace kernel {
|
||||||
|
namespace {
|
||||||
|
constexpr auto kRanksGroup = "group";
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
ReduceScatterCPUKernel::ReduceScatterCPUKernel() : output_data_number_(0), op_type_(device::cpu::kOpTypeSum) {}
|
||||||
|
|
||||||
|
void ReduceScatterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||||
|
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||||
|
for (size_t i = 0; i < output_num; ++i) {
|
||||||
|
auto shape = AnfAlgo::GetOutputInferShape(kernel_node, i);
|
||||||
|
size_t size = 1;
|
||||||
|
for (size_t j = 0; j < shape.size(); j++) {
|
||||||
|
size *= IntToSize(shape[j]);
|
||||||
|
}
|
||||||
|
output_data_number_ += size;
|
||||||
|
}
|
||||||
|
auto op = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("op");
|
||||||
|
if (op != nullptr) {
|
||||||
|
op_type_ = GetValue<std::string>(op);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ranks_group = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(kRanksGroup);
|
||||||
|
if (ranks_group != nullptr) {
|
||||||
|
ranks_group_ = GetValue<std::vector<int>>(ranks_group);
|
||||||
|
} else {
|
||||||
|
MS_LOG(EXCEPTION) << "Miss attribute " << kRanksGroup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ReduceScatterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||||
|
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||||
|
const std::vector<kernel::AddressPtr> &outputs) {
|
||||||
|
auto input_addr = reinterpret_cast<float *>(inputs[0]->addr);
|
||||||
|
auto output_addr = reinterpret_cast<float *>(outputs[0]->addr);
|
||||||
|
|
||||||
|
return device::cpu::MPIAdapter::Instance().ReduceScatter(input_addr, output_addr, ranks_group_, output_data_number_,
|
||||||
|
op_type_);
|
||||||
|
}
|
||||||
|
} // namespace kernel
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,46 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
|
||||||
|
#define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include "kernel/cpu/cpu_kernel.h"
|
||||||
|
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace kernel {
|
||||||
|
class ReduceScatterCPUKernel : public CPUKernel {
|
||||||
|
public:
|
||||||
|
ReduceScatterCPUKernel();
|
||||||
|
~ReduceScatterCPUKernel() override = default;
|
||||||
|
|
||||||
|
void InitKernel(const CNodePtr &kernel_node) override;
|
||||||
|
|
||||||
|
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||||
|
const std::vector<AddressPtr> &outputs) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t output_data_number_;
|
||||||
|
std::string op_type_;
|
||||||
|
std::vector<int> ranks_group_;
|
||||||
|
};
|
||||||
|
|
||||||
|
MS_REG_CPU_KERNEL(HostReduceScatter, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||||
|
ReduceScatterCPUKernel);
|
||||||
|
} // namespace kernel
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
|
|
@ -0,0 +1,70 @@
|
||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import mindspore.context as context
|
||||||
|
import mindspore.nn as nn
|
||||||
|
from mindspore import Tensor
|
||||||
|
from mindspore.common import dtype as mstype
|
||||||
|
from mindspore.ops import operations as P
|
||||||
|
import mindspore._ms_mpi as mpi
|
||||||
|
# run comand:
|
||||||
|
# mpirun -np 3 python test_reduce_scatter.py
|
||||||
|
|
||||||
|
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||||
|
|
||||||
|
class Net(nn.Cell):
|
||||||
|
def __init__(self):
|
||||||
|
super(Net, self).__init__()
|
||||||
|
self.op = "sum"
|
||||||
|
|
||||||
|
self.reducescatter = P.HostReduceScatter(op=self.op, group=[0,1,2])
|
||||||
|
|
||||||
|
def construct(self, x):
|
||||||
|
return self.reducescatter(x)
|
||||||
|
|
||||||
|
class AllGatherNet(nn.Cell):
|
||||||
|
def __init__(self):
|
||||||
|
super(AllGatherNet, self).__init__()
|
||||||
|
self.hostallgather = P.HostAllGather(group=(0, 1, 2))
|
||||||
|
|
||||||
|
def construct(self, x):
|
||||||
|
return self.hostallgather(x)
|
||||||
|
|
||||||
|
def test_net_reduce_scatter():
|
||||||
|
x = np.ones(12).astype(np.float32) * 0.1
|
||||||
|
|
||||||
|
reducescatter = Net()
|
||||||
|
rankid = mpi.get_rank_id()
|
||||||
|
print("self rankid:", rankid)
|
||||||
|
output = reducescatter(Tensor(x, mstype.float32))
|
||||||
|
print("output:\n", output)
|
||||||
|
expect_result = np.ones(4).astype(np.float32) * 0.3
|
||||||
|
diff = abs(output.asnumpy() - expect_result)
|
||||||
|
error = np.ones(shape=expect_result.shape) * 1.0e-6
|
||||||
|
assert np.all(diff < error)
|
||||||
|
|
||||||
|
allgather = AllGatherNet()
|
||||||
|
allgather_output = allgather(output)
|
||||||
|
print("allgather result:\n", allgather_output)
|
||||||
|
expect_allgather_result = np.ones(12).astype(np.float32) * 0.3
|
||||||
|
diff = abs(allgather_output.asnumpy() - expect_allgather_result)
|
||||||
|
error = np.ones(shape=expect_allgather_result.shape) * 1.0e-6
|
||||||
|
assert np.all(diff < error)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test_net_reduce_scatter()
|
Loading…
Reference in New Issue