plugin dynamic load

This commit is contained in:
lby 2022-08-27 10:00:03 +08:00
parent 2d0cb07eed
commit 090b15bed0
23 changed files with 338 additions and 112 deletions

View File

@ -81,6 +81,7 @@ if [[ "X$COMPILE_LITE" = "Xon" ]]; then
source mindspore/lite/build_lite.sh
else
mkdir -pv "${BUILD_PATH}/package/mindspore/lib"
mkdir -pv "${BUILD_PATH}/package/mindspore/lib/plugin"
update_submodule
build_mindspore

View File

@ -68,6 +68,7 @@ set(INSTALL_BASE_DIR ".")
set(INSTALL_BIN_DIR "bin")
set(INSTALL_CFG_DIR "config")
set(INSTALL_LIB_DIR "lib")
set(INSTALL_PLUGIN_DIR "${INSTALL_LIB_DIR}/plugin")
# set package files
install(
TARGETS _c_expression
@ -98,7 +99,7 @@ install(
if(ENABLE_D)
install(
TARGETS mindspore_ascend
DESTINATION ${INSTALL_LIB_DIR}
DESTINATION ${INSTALL_PLUGIN_DIR}
COMPONENT mindspore
)
endif()
@ -106,7 +107,7 @@ endif()
if(ENABLE_GPU)
install(
TARGETS mindspore_gpu
DESTINATION ${INSTALL_LIB_DIR}
DESTINATION ${INSTALL_PLUGIN_DIR}
COMPONENT mindspore
)
endif()

View File

@ -18,6 +18,7 @@ set(INSTALL_BASE_DIR ".")
set(INSTALL_BIN_DIR "bin")
set(INSTALL_CFG_DIR "config")
set(INSTALL_LIB_DIR "lib")
set(INSTALL_PLUGIN_DIR "${INSTALL_LIB_DIR}/plugin")
# set package files
install(
@ -35,7 +36,7 @@ install(
if(ENABLE_D)
install(
TARGETS mindspore_ascend
DESTINATION ${INSTALL_LIB_DIR}
DESTINATION ${INSTALL_PLUGIN_DIR}
COMPONENT mindspore
)
endif()
@ -43,7 +44,7 @@ endif()
if(ENABLE_GPU)
install(
TARGETS mindspore_gpu
DESTINATION ${INSTALL_LIB_DIR}
DESTINATION ${INSTALL_PLUGIN_DIR}
COMPONENT mindspore
)
endif()

View File

@ -479,18 +479,16 @@ elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
else()
MESSAGE(FATAL_ERROR "other platform: ${CMAKE_SYSTEM_NAME}")
endif()
set(MINDSPORE_RPATH ${ORIGIN_PATH}/lib:${MINDSPORE_RPATH})
set(MINDSPORE_RPATH ${ORIGIN_PATH}/lib:${ORIGIN_PATH}/lib/plugin:${ORIGIN_PATH}/..:${MINDSPORE_RPATH})
if(ENABLE_D)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/ascend)
add_subdirectory(plugin/device/ascend)
target_link_libraries(mindspore_backend PRIVATE mindspore_ascend)
endif()
if(ENABLE_GPU)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/gpu)
add_subdirectory(plugin/device/gpu)
target_link_libraries(mindspore_backend PRIVATE mindspore_gpu)
endif()
if(CMAKE_SYSTEM_NAME MATCHES "Windows")

View File

@ -139,7 +139,7 @@ class BACKEND_EXPORT OpAdaptationInfoRegister {
static std::string GenerateKey(const std::string &op_name, const std::string &device_name, bool flag);
// key: (op_name + device_name + flag), value: <OpAdaptationInfo *>
std::map<std::string, OpAdaptationInfo *> op_info_map_;
inline static std::map<std::string, OpAdaptationInfo *> op_info_map_;
};
class RegisterHelper {

View File

@ -22,9 +22,10 @@
#include "utils/hash_map.h"
#include "common/graph_kernel/expanders/utils.h"
#include "include/common/visible.h"
namespace mindspore::graphkernel::expanders {
class OpDescFactory {
class COMMON_EXPORT OpDescFactory {
public:
static OpDescFactory &Instance() {
static OpDescFactory instance = OpDescFactory();
@ -46,7 +47,7 @@ class OpDescFactory {
void Register(const std::string &op, const RegFunc &func) { creators[op] = func; }
private:
mindspore::HashMap<std::string, RegFunc> creators;
inline static mindspore::HashMap<std::string, RegFunc> creators;
};
class OpDescRegister {

View File

@ -62,13 +62,7 @@ if(ENABLE_D)
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/array.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/map.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/pattern_engine.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/helper.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/node_pass.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/visit.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/kernel/kernel_build_info.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/device/kernel_info.cc")
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc")
endif()
if(NOT ENABLE_TESTCASES AND NOT BUILD_LITE)
@ -76,9 +70,15 @@ if(NOT ENABLE_TESTCASES AND NOT BUILD_LITE)
set(MSLIB_SRC ${MSLIB_SRC} ${CMAKE_SOURCE_DIR}/mindspore/core/utils/status.cc)
endif()
if(ENABLE_D OR ENABLE_ACL)
if((ENABLE_D OR ENABLE_ACL) AND NOT BUILD_LITE)
list(APPEND MSLIB_SRC
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.cc")
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/pattern_engine.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/helper.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/node_pass.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/visit.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/kernel/kernel_build_info.cc"
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/device/kernel_info.cc")
endif()
if(ENABLE_GPU)
@ -90,8 +90,6 @@ if(BUILD_LITE)
file(GLOB_RECURSE ACL_REMOVE_SRC ${CMAKE_CURRENT_SOURCE_DIR}
"model/acl/acl_vm/*.cc"
)
list(REMOVE_ITEM MSLIB_SRC
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.cc")
list(REMOVE_ITEM MSLIB_SRC "${CMAKE_CURRENT_SOURCE_DIR}/akg_kernel_register.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/model/acl/acl_model_multi.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/model/acl/acl_model.cc"
@ -104,7 +102,7 @@ endif()
add_library(mindspore_shared_lib_obj OBJECT ${MSLIB_SRC})
add_library(mindspore_shared_lib SHARED $<TARGET_OBJECTS:mindspore_shared_lib_obj>)
if(BUILD_LITE)
if(BUILD_LITE OR ENABLE_ACL)
target_link_libraries(mindspore_shared_lib PRIVATE $<TARGET_OBJECTS:_mindspore_transform_graph_ir_obj>)
add_dependencies(mindspore_shared_lib _mindspore_transform_graph_ir_obj)
elseif(MODE_ASCEND_ACL)
@ -158,10 +156,11 @@ if(ENABLE_D OR ENABLE_ACL)
find_library(libaicore_utils libaicore_utils.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(libaicpu_engine_common libaicpu_engine_common.so ${ASCEND_CANN_RUNTIME_PATH}
${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(GE_RUNNER ge_runner ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
target_link_libraries(mindspore_shared_lib PRIVATE -Wl,--no-as-needed graph ${ge_compiler}
${acl_retr} ${acl_cblas} ${acl_dvpp} ${acl_runtime} ${libplatform} ${libcompress} ${libopskernel}
${libaicore_utils} ${libaicpu_engine_common} ${acl})
${libaicore_utils} ${libaicpu_engine_common} ${acl} ${GE_RUNNER})
add_dependencies(mindspore_shared_lib_obj graph)
add_dependencies(mindspore_shared_lib graph)
endif()

View File

@ -133,9 +133,9 @@ class BACKEND_EXPORT DataQueueMgr {
bool dynamic_shape_{false};
size_t default_capacity_{2};
std::map<std::string, std::shared_ptr<BlockingQueue>> name_queue_map_;
std::map<std::string, DataQueueCreator> data_queue_creator_map_ = {}; // key: device name, value: DataQueueCreator
inline static std::map<std::string, std::shared_ptr<BlockingQueue>> name_queue_map_;
// key: device name, value: DataQueueCreator
inline static std::map<std::string, DataQueueCreator> data_queue_creator_map_ = {};
HANDLER_DEFINE(bool, DestoryTdtHandle);
};

View File

@ -32,11 +32,7 @@
#include "include/common/utils/parallel_context.h"
#include "frontend/parallel/costmodel_context.h"
#include "frontend/optimizer/ad/dfunctor.h"
#ifdef ENABLE_GPU_COLLECTIVE
#include "plugin/device/gpu/hal/device/distribution/collective_init.h"
#else
#include "runtime/collective/collective_fake_init.h"
#endif
#include "runtime/collective/gpu_collective_init.h"
#if ((defined ENABLE_CPU) && (!defined _WIN32))
#include "ps/util.h"
#endif
@ -404,25 +400,15 @@ PYBIND11_MODULE(_c_expression, m) {
(void)py::class_<OpLib, std::shared_ptr<OpLib>>(m, "Oplib")
.def(py::init())
.def_static("reg_op", &OpLib::RegOp, "Register op info.");
#ifdef ENABLE_GPU_COLLECTIVE
(void)m.def("init_gpu_collective", &mindspore::device::gpu::CollectiveInitializer::InitCollective,
(void)m.def("init_gpu_collective", &mindspore::device::gpu::GPUCollectiveInitializer::InitCollective,
"Init gpu collective communication mode.");
(void)m.def("finalize_gpu_collective", &mindspore::device::gpu::CollectiveInitializer::FinalizeCollective,
(void)m.def("finalize_gpu_collective", &mindspore::device::gpu::GPUCollectiveInitializer::FinalizeCollective,
"Finalize gpu collective communication mode.");
(void)m.def("get_rank_id", &mindspore::device::gpu::CollectiveInitializer::GetRankID,
(void)m.def("get_rank_id", &mindspore::device::gpu::GPUCollectiveInitializer::GetRankID,
"Finalize gpu collective communication mode.");
(void)m.def("get_rank_size", &mindspore::device::gpu::CollectiveInitializer::GetRankSize,
(void)m.def("get_rank_size", &mindspore::device::gpu::GPUCollectiveInitializer::GetRankSize,
"Finalize gpu collective communication mode.");
#else
(void)m.def("init_gpu_collective", &mindspore::device::gpu::CollectiveFakeInitializer::InitCollective,
"Init gpu collective communication mode.");
(void)m.def("finalize_gpu_collective", &mindspore::device::gpu::CollectiveFakeInitializer::FinalizeCollective,
"Finalize gpu collective communication mode.");
(void)m.def("get_rank_id", &mindspore::device::gpu::CollectiveFakeInitializer::GetRankID,
"Finalize gpu collective communication mode.");
(void)m.def("get_rank_size", &mindspore::device::gpu::CollectiveFakeInitializer::GetRankSize,
"Finalize gpu collective communication mode.");
#endif
(void)py::class_<CollectiveManager, std::shared_ptr<CollectiveManager>>(m, "CollectiveManager")
.def_static("get_instance", &CollectiveManager::instance, "Get collective manager instance.")

View File

@ -1605,6 +1605,7 @@ void FinalizeHccl() {
session::ExecutorManager::Instance().Clear();
device::DeviceContextManager::GetInstance().ClearDeviceContexts();
device::KernelRuntimeManager::Instance().ClearRuntimeResource();
device::DeviceContextManager::GetInstance().UnloadPlugin();
}
uint32_t GetHcclRankId() {
@ -1731,8 +1732,7 @@ void MemoryRecycle() {
FuncGraphLoopBreaker::Inst().BreakLoop();
}
void ClearResAtexit() {
MS_LOG(INFO) << "Pipeline clear all resource";
void ClearResPart1() {
runtime::OpExecutor::GetInstance().WorkerJoin();
// When the python process exits, the kernels on the device may not have finished executing.
device::KernelRuntimeManager::Instance().WaitTaskFinishOnDevice();
@ -1765,7 +1765,6 @@ void ClearResAtexit() {
MS_LOG(INFO) << "End Finalize StreamSynchronizer...";
(void)distributed::collective::CollectiveManager::instance()->Finalize();
PrimitivePy::ClearHookRes();
ad::g_k_prims.clear();
ad::ClearKPynativeCellStaticRes();
@ -1776,7 +1775,9 @@ void ClearResAtexit() {
pipeline::GetAttrMap().clear();
pipeline::GraphExecutorPy::ClearRes();
pipeline::ReclaimOptimizer();
}
void ClearResPart2() {
MS_LOG(INFO) << "Start clear PyNativeExecutor...";
pynative::PyNativeExecutor::GetInstance()->ClearRes();
MS_LOG(INFO) << "End clear PyNativeExecutor.";
@ -1804,7 +1805,6 @@ void ClearResAtexit() {
ConfigManager::GetInstance().ResetIterNum();
MS_LOG(INFO) << "End clear ConfigManager.";
#endif
MS_LOG(INFO) << "Start clear device context...";
device::DeviceContextManager::GetInstance().ClearDeviceContexts();
MS_LOG(INFO) << "End clear device context.";
@ -1829,6 +1829,9 @@ void ClearResAtexit() {
Debugger::GetInstance()->Reset();
#endif
g_args_cache.clear();
}
void ClearResPart3() {
// clean static variable to prevent from crash. As static variable is released after
// Python threads is released.
MS_LOG(INFO) << "Start clear ClearObjectCache...";
@ -1854,6 +1857,17 @@ void ClearResAtexit() {
MS_LOG(INFO) << "Start clear ProtobufLibrary...";
google::protobuf::ShutdownProtobufLibrary();
MS_LOG(INFO) << "End clear ProtobufLibrary...";
MS_LOG(INFO) << "Start unload dynamic lib...";
device::DeviceContextManager::GetInstance().UnloadPlugin();
MS_LOG(INFO) << "End unload dynamic lib...";
}
void ClearResAtexit() {
MS_LOG(INFO) << "Pipeline clear all resource";
ClearResPart1();
ClearResPart2();
ClearResPart3();
}
py::bytes PyEncrypt(char *plain_data, size_t plain_len, char *key, size_t key_len, const std::string &enc_mode) {

View File

@ -30,6 +30,7 @@
namespace mindspore {
namespace kernel {
constexpr size_t kPluginSuffix = 6;
bool AicpuOpKernelLoad::GetBinaryFileName(const std::string &so_name, const std::string &bin_folder_path,
std::string *bin_file_path) {
MS_EXCEPTION_IF_NULL(bin_file_path);
@ -105,7 +106,7 @@ bool AicpuOpKernelLoad::GetSoNeedLoadPath(std::string *file_path) const {
MS_LOG(ERROR) << "Current path [" << cust_kernel_so_path << "] is invalid.";
return false;
}
auto real_cust_kernel_so_path = cust_kernel_so_path.substr(0, pos) + "/";
auto real_cust_kernel_so_path = cust_kernel_so_path.substr(0, pos - kPluginSuffix);
if (real_cust_kernel_so_path.size() > PATH_MAX) {
MS_LOG(ERROR) << "Current path [" << real_cust_kernel_so_path << "] is too long.";
return false;

View File

@ -27,7 +27,7 @@ add_library(mindspore_gpu SHARED ${GPU_SUB_OBJECTS_SRC})
target_link_libraries(mindspore_gpu PUBLIC mindspore_backend_common)
target_link_libraries(mindspore_gpu PRIVATE mindspore_core mindspore_common proto_input mindspore::protobuf)
target_link_libraries(mindspore_gpu PRIVATE securec)
set_target_properties(mindspore_gpu PROPERTIES INSTALL_RPATH $ORIGIN)
set_target_properties(mindspore_gpu PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
target_link_libraries(mindspore_gpu PRIVATE mindspore::dnnl mindspore::mkldnn nnacl)
target_link_libraries(mindspore_gpu PRIVATE mindspore::ssl mindspore::crypto)
target_link_libraries(mindspore_gpu PRIVATE mindspore::event mindspore::event_pthreads

View File

@ -18,6 +18,7 @@
#include <cuda.h>
#include <vector>
#include <string>
#include "plugin/device/gpu/hal/device/distribution/collective_init.h"
namespace mindspore {
namespace device {
@ -60,6 +61,18 @@ int GPUDeprecatedInterface::GetGPUMultiProcessorCount() {
}
return sm_count;
}
void GPUDeprecatedInterface::GPUInitCollective() { CollectiveInitializer::InitCollective(); }
void GPUDeprecatedInterface::GPUFinalizeCollective() { CollectiveInitializer::FinalizeCollective(); }
uint32_t GPUDeprecatedInterface::GPUGetRankID(const std::string &group_name) {
return CollectiveInitializer::GetRankID(group_name);
}
uint32_t GPUDeprecatedInterface::GPUGetRankSize(const std::string &group_name) {
return CollectiveInitializer::GetRankSize(group_name);
}
} // namespace gpu
} // namespace device
} // namespace mindspore

View File

@ -35,6 +35,10 @@ class GPUDeprecatedInterface : public DeprecatedInterface {
int GetGPUCapabilityMajor() override;
int GetGPUCapabilityMinor() override;
int GetGPUMultiProcessorCount() override;
void GPUInitCollective() override;
void GPUFinalizeCollective() override;
uint32_t GPUGetRankID(const std::string &group_name) override;
uint32_t GPUGetRankSize(const std::string &group_name) override;
};
} // namespace gpu
} // namespace device

View File

@ -57,7 +57,7 @@ struct OpInfo {
uint32_t pid;
};
class ProfilerManager {
class BACKEND_EXPORT ProfilerManager {
public:
static std::shared_ptr<ProfilerManager> &GetInstance();
ProfilerManager() = default;
@ -75,7 +75,7 @@ class ProfilerManager {
bool is_dynamic_shape_net_ = 0;
};
class Profiler {
class BACKEND_EXPORT Profiler {
public:
static std::shared_ptr<Profiler> GetInstance(const std::string &name) noexcept;
static bool Register(const std::string &name, const std::shared_ptr<Profiler> &instance);
@ -125,7 +125,7 @@ class Profiler {
bool is_parallel_strategy = false;
private:
BACKEND_EXPORT inline static std::map<std::string, std::shared_ptr<Profiler>> instance_map_ = {};
inline static std::map<std::string, std::shared_ptr<Profiler>> instance_map_ = {};
};
} // namespace profiler
} // namespace mindspore

View File

@ -1,44 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "runtime/collective/collective_fake_init.h"
#include "utils/log_adapter.h"
namespace mindspore {
namespace device {
namespace gpu {
void CollectiveFakeInitializer::InitCollective() {
MS_LOG(EXCEPTION) << "You are trying to call 'init('nccl')', Please check "
"this MindSpore package is GPU version and built with NCCL.";
}
void CollectiveFakeInitializer::FinalizeCollective() {
MS_LOG(EXCEPTION) << "You are trying to call 'init('nccl')', Please check "
"this MindSpore package is GPU version and built with NCCL.";
}
uint32_t CollectiveFakeInitializer::GetRankID(const std::string &) {
MS_LOG(EXCEPTION) << "You are trying to call 'GetRankID', Please check "
"this MindSpore package is GPU version and built with NCCL.";
}
uint32_t CollectiveFakeInitializer::GetRankSize(const std::string &) {
MS_LOG(EXCEPTION) << "You are trying to call 'GetRankSize', Please check "
"this MindSpore package is GPU version and built with NCCL.";
}
} // namespace gpu
} // namespace device
} // namespace mindspore

View File

@ -0,0 +1,89 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "runtime/collective/gpu_collective_init.h"
#include "runtime/hardware/device_context_manager.h"
#include "utils/log_adapter.h"
namespace mindspore {
namespace device {
namespace gpu {
void GPUCollectiveInitializer::InitCollective() {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
if (device_target != kGPUDevice) {
MS_LOG(EXCEPTION) << "You are trying to call 'init('nccl')', Please check "
"this MindSpore package is GPU version and built with NCCL.";
}
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
MS_EXCEPTION_IF_NULL(device_context);
auto deprecated_ptr = device_context->GetDeprecatedInterface();
MS_EXCEPTION_IF_NULL(deprecated_ptr);
deprecated_ptr->GPUInitCollective();
}
void GPUCollectiveInitializer::FinalizeCollective() {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
if (device_target != kGPUDevice) {
MS_LOG(EXCEPTION) << "You are trying to call 'finalize('nccl')', Please check "
"this MindSpore package is GPU version and built with NCCL.";
}
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
MS_EXCEPTION_IF_NULL(device_context);
auto deprecated_ptr = device_context->GetDeprecatedInterface();
MS_EXCEPTION_IF_NULL(deprecated_ptr);
deprecated_ptr->GPUFinalizeCollective();
}
uint32_t GPUCollectiveInitializer::GetRankID(const std::string &group_name) {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
if (device_target != kGPUDevice) {
MS_LOG(EXCEPTION) << "You are trying to call 'GetRankID', Please check "
"this MindSpore package is GPU version and built with NCCL.";
}
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
MS_EXCEPTION_IF_NULL(device_context);
auto deprecated_ptr = device_context->GetDeprecatedInterface();
MS_EXCEPTION_IF_NULL(deprecated_ptr);
return deprecated_ptr->GPUGetRankID(group_name);
}
uint32_t GPUCollectiveInitializer::GetRankSize(const std::string &group_name) {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
if (device_target != kGPUDevice) {
MS_LOG(EXCEPTION) << "You are trying to call 'GetRankSize', Please check "
"this MindSpore package is GPU version and built with NCCL.";
}
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
MS_EXCEPTION_IF_NULL(device_context);
auto deprecated_ptr = device_context->GetDeprecatedInterface();
MS_EXCEPTION_IF_NULL(deprecated_ptr);
return deprecated_ptr->GPUGetRankSize(group_name);
}
} // namespace gpu
} // namespace device
} // namespace mindspore

View File

@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_DISTRIBUTION_COLLECTIVE_FAKE_INIT_H_
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_DISTRIBUTION_COLLECTIVE_FAKE_INIT_H_
#ifndef MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_
#define MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_
#include <string>
#include "include/backend/visible.h"
@ -23,12 +23,12 @@
namespace mindspore {
namespace device {
namespace gpu {
class BACKEND_EXPORT CollectiveFakeInitializer {
class BACKEND_EXPORT GPUCollectiveInitializer {
public:
CollectiveFakeInitializer() = default;
~CollectiveFakeInitializer() = default;
CollectiveFakeInitializer(CollectiveFakeInitializer const &) = delete;
CollectiveFakeInitializer &operator=(const CollectiveFakeInitializer &) = delete;
GPUCollectiveInitializer() = default;
~GPUCollectiveInitializer() = default;
GPUCollectiveInitializer(GPUCollectiveInitializer const &) = delete;
GPUCollectiveInitializer &operator=(const GPUCollectiveInitializer &) = delete;
static void InitCollective();
static void FinalizeCollective();
static uint32_t GetRankID(const std::string &group_name);
@ -38,4 +38,4 @@ class BACKEND_EXPORT CollectiveFakeInitializer {
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_DISTRIBUTION_COLLECTIVE_FAKE_INIT_H_
#endif // MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_

View File

@ -49,8 +49,8 @@ class BACKEND_EXPORT KernelRuntimeManager {
~KernelRuntimeManager() = default;
DISABLE_COPY_AND_ASSIGN(KernelRuntimeManager);
std::string GetDeviceKey(const std::string &device_name, uint32_t device_id) const;
std::map<std::string, std::shared_ptr<KernelRuntime> > runtime_map_;
std::map<std::string, KernelRuntimeCreator> runtime_creators_;
inline static std::map<std::string, std::shared_ptr<KernelRuntime> > runtime_map_;
inline static std::map<std::string, KernelRuntimeCreator> runtime_creators_;
std::mutex lock_;
};

View File

@ -58,6 +58,10 @@ class DeprecatedInterface {
virtual int GetGPUCapabilityMajor() { return -1; }
virtual int GetGPUCapabilityMinor() { return -1; }
virtual int GetGPUMultiProcessorCount() { return -1; }
virtual void GPUInitCollective() {}
virtual void GPUFinalizeCollective() {}
virtual uint32_t GPUGetRankID(const std::string &group_name) { return 0; }
virtual uint32_t GPUGetRankSize(const std::string &group_name) { return 0; }
};
} // namespace device
} // namespace mindspore

View File

@ -15,12 +15,117 @@
*/
#include "runtime/hardware/device_context_manager.h"
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#endif
#include <dirent.h>
#include <algorithm>
#include <string>
#include <fstream>
#include "utils/ms_context.h"
#include "utils/dlopen_macro.h"
namespace mindspore {
namespace plugin_loader {
void PluginLoader::LoadDynamicLib(const std::string &plugin_file, std::map<std::string, void *> *all_handles) {
MS_EXCEPTION_IF_NULL(all_handles);
void *handle = nullptr;
std::string err_msg;
if (plugin_file.find("libmindspore_") == std::string::npos) {
return;
}
auto so_name = GetDynamicLibName(plugin_file);
#if defined(_WIN32) || defined(_WIN64)
handle = LoadLibrary(plugin_file.c_str());
err_msg = std::to_string(GetLastError());
#else
handle = dlopen(plugin_file.c_str(), RTLD_NOW | RTLD_LOCAL);
err_msg = GetDlErrorMsg();
#endif
if (handle == nullptr) {
MS_LOG(DEBUG) << "Load dynamic lib: " << so_name << " failed. " << err_msg;
return;
}
(*all_handles)[so_name] = handle;
}
void PluginLoader::CloseDynamicLib(const std::string &dl_name, void *handle) {
#if defined(_WIN32) || defined(_WIN64)
if (!FreeLibrary(static_cast<HMODULE>(handle))) {
MS_LOG(EXCEPTION) << "Closing dynamic lib: " + dl_name + " handle failed. Error: " + std::to_string(GetLastError());
}
#else
if (dlclose(handle) != 0) {
MS_LOG(EXCEPTION) << "Closing dynamic lib: " << dl_name << "failed, error message: " << GetDlErrorMsg();
}
#endif
}
std::string PluginLoader::GetDynamicLibName(const std::string &plugin_file) {
auto pos = plugin_file.rfind('.');
if (pos == std::string::npos) {
MS_LOG(WARNING) << "Invalid plugin file " << plugin_file;
return "unknown_name";
}
return plugin_file.substr(0, pos);
}
bool PluginLoader::GetPluginPath(std::string *file_path) {
MS_EXCEPTION_IF_NULL(file_path);
std::string cur_so_path;
#if !defined(_WIN32) && !defined(_WIN64)
Dl_info dl_info;
if (dladdr(reinterpret_cast<void *>(PluginLoader::GetPluginPath), &dl_info) == 0) {
MS_LOG(INFO) << "Get dladdr error";
return false;
}
cur_so_path = dl_info.dli_fname;
#else
HMODULE hModule = nullptr;
if (GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT | GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS,
(LPCSTR)PluginLoader::GetPluginPath, &hModule) == 0) {
MS_LOG(INFO) << "Get GetModuleHandleEx failed.";
return false;
}
char szPath[MAX_PATH];
if (GetModuleFileName(hModule, szPath, sizeof(szPath)) == 0) {
MS_LOG(INFO) << "Get GetModuleHandleEx failed.";
return false;
}
cur_so_path = std::string(szPath);
#endif
auto pos = cur_so_path.find_last_of('/');
if (cur_so_path.empty() || pos == std::string::npos) {
MS_LOG(INFO) << "Current so path empty or the path [" << cur_so_path << "] is invalid.";
return false;
}
auto plugin_so_path = cur_so_path.substr(0, pos) + "/plugin";
if (plugin_so_path.size() >= PATH_MAX) {
MS_LOG(INFO) << "Current path [" << plugin_so_path << "] is invalid.";
return false;
}
char real_path_mem[PATH_MAX] = {0};
#if defined(_WIN32) || defined(_WIN64)
if (_fullpath(real_path_mem, common::SafeCStr(plugin_so_path), PATH_MAX) == nullptr) {
MS_LOG(INFO) << "Plugin path is invalid: [" << plugin_so_path << "], skip!";
return false;
}
#else
if (realpath(common::SafeCStr(plugin_so_path), real_path_mem) == nullptr) {
MS_LOG(INFO) << "Plugin path is invalid: [" << plugin_so_path << "], skip!";
return false;
}
#endif
*file_path = std::string(real_path_mem);
return true;
}
} // namespace plugin_loader
namespace device {
DeviceContextManager &DeviceContextManager::GetInstance() {
static DeviceContextManager instance{};
instance.LoadPlugin();
return instance;
}
@ -30,6 +135,41 @@ void DeviceContextManager::Register(const std::string &device_name, DeviceContex
}
}
void DeviceContextManager::LoadPlugin() {
if (load_init_) {
return;
}
if (plugin_path_.empty() && !plugin_loader::PluginLoader::GetPluginPath(&plugin_path_)) {
MS_LOG(INFO) << "Plugin path is invalid, skip!";
return;
}
DIR *dir = opendir(plugin_path_.c_str());
if (dir == nullptr) {
MS_LOG(ERROR) << "Open plugin dir failed, plugin path:" << plugin_path_;
return;
}
struct dirent *entry;
while ((entry = readdir(dir)) != nullptr) {
auto plugin_file = entry->d_name;
plugin_loader::PluginLoader::LoadDynamicLib(plugin_file, &plugin_maps_);
}
(void)closedir(dir);
load_init_ = true;
}
void DeviceContextManager::UnloadPlugin() {
if (plugin_maps_.empty()) {
return;
}
auto iter = plugin_maps_.begin();
while (iter != plugin_maps_.end()) {
plugin_loader::PluginLoader::CloseDynamicLib(iter->first, iter->second);
iter++;
}
plugin_maps_.clear();
load_init_ = false;
}
void DeviceContextManager::ClearDeviceContexts() {
for (auto &iter : device_contexts_) {
MS_LOG(INFO) << "Release device " << iter.first;
@ -49,7 +189,6 @@ DeviceContext *DeviceContextManager::GetOrCreateDeviceContext(const DeviceContex
name = "GE";
device_context_key_str = "GE_0";
}
auto device_context_iter = device_contexts_.find(device_context_key_str);
if (device_context_iter != device_contexts_.end()) {
return device_context_iter->second.get();

View File

@ -28,6 +28,18 @@
#include "include/backend/visible.h"
namespace mindspore {
namespace plugin_loader {
class PluginLoader {
public:
static void LoadDynamicLib(const std::string &plugin_file, std::map<std::string, void *> *all_handles);
static void CloseDynamicLib(const std::string &dl_name, void *handle);
static bool GetPluginPath(std::string *file_path);
private:
static std::string GetDynamicLibName(const std::string &plugin_file);
};
} // namespace plugin_loader
namespace device {
using DeviceContextCreator = std::function<std::shared_ptr<DeviceContext>(const DeviceContextKey &)>;
@ -39,16 +51,22 @@ class BACKEND_EXPORT DeviceContextManager {
void UpdateDeviceContextKey(const DeviceContextKey &old_key, const DeviceContextKey &new_key);
void ClearDeviceContexts();
void WaitTaskFinishOnDevice() const;
void UnloadPlugin();
private:
DeviceContextManager() = default;
~DeviceContextManager() = default;
DISABLE_COPY_AND_ASSIGN(DeviceContextManager);
void LoadPlugin();
std::map<std::string, void *> plugin_maps_;
inline static bool load_init_;
inline static std::string plugin_path_;
// The string converted from DeviceContextKey -> DeviceContextPtr.
std::map<std::string, DeviceContextPtr> device_contexts_;
inline static std::map<std::string, DeviceContextPtr> device_contexts_;
// The name of device -> DeviceContextCreator.
std::map<std::string, DeviceContextCreator> device_context_creators_;
inline static std::map<std::string, DeviceContextCreator> device_context_creators_;
};
class DeviceContextRegister {

View File

@ -124,6 +124,7 @@ package_data = {
'*.pyd',
'*.dll',
'bin/*',
'lib/plugin/*',
'lib/*.so*',
'lib/*.a',
'lib/*.dylib*',