forked from mindspore-Ecosystem/mindspore
!40996 plugin support dynamic load
Merge pull request !40996 from liubuyu/plugin
This commit is contained in:
commit
ebd91618e6
1
build.sh
1
build.sh
|
@ -81,6 +81,7 @@ if [[ "X$COMPILE_LITE" = "Xon" ]]; then
|
|||
source mindspore/lite/build_lite.sh
|
||||
else
|
||||
mkdir -pv "${BUILD_PATH}/package/mindspore/lib"
|
||||
mkdir -pv "${BUILD_PATH}/package/mindspore/lib/plugin"
|
||||
update_submodule
|
||||
|
||||
build_mindspore
|
||||
|
|
|
@ -68,6 +68,7 @@ set(INSTALL_BASE_DIR ".")
|
|||
set(INSTALL_BIN_DIR "bin")
|
||||
set(INSTALL_CFG_DIR "config")
|
||||
set(INSTALL_LIB_DIR "lib")
|
||||
set(INSTALL_PLUGIN_DIR "${INSTALL_LIB_DIR}/plugin")
|
||||
# set package files
|
||||
install(
|
||||
TARGETS _c_expression
|
||||
|
@ -98,7 +99,7 @@ install(
|
|||
if(ENABLE_D)
|
||||
install(
|
||||
TARGETS mindspore_ascend
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
DESTINATION ${INSTALL_PLUGIN_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
|
@ -106,7 +107,7 @@ endif()
|
|||
if(ENABLE_GPU)
|
||||
install(
|
||||
TARGETS mindspore_gpu
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
DESTINATION ${INSTALL_PLUGIN_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
|
|
|
@ -18,6 +18,7 @@ set(INSTALL_BASE_DIR ".")
|
|||
set(INSTALL_BIN_DIR "bin")
|
||||
set(INSTALL_CFG_DIR "config")
|
||||
set(INSTALL_LIB_DIR "lib")
|
||||
set(INSTALL_PLUGIN_DIR "${INSTALL_LIB_DIR}/plugin")
|
||||
|
||||
# set package files
|
||||
install(
|
||||
|
@ -35,7 +36,7 @@ install(
|
|||
if(ENABLE_D)
|
||||
install(
|
||||
TARGETS mindspore_ascend
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
DESTINATION ${INSTALL_PLUGIN_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
|
@ -43,7 +44,7 @@ endif()
|
|||
if(ENABLE_GPU)
|
||||
install(
|
||||
TARGETS mindspore_gpu
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
DESTINATION ${INSTALL_PLUGIN_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
|
|
|
@ -492,18 +492,16 @@ elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
|||
else()
|
||||
MESSAGE(FATAL_ERROR "other platform: ${CMAKE_SYSTEM_NAME}")
|
||||
endif()
|
||||
set(MINDSPORE_RPATH ${ORIGIN_PATH}/lib:${MINDSPORE_RPATH})
|
||||
set(MINDSPORE_RPATH ${ORIGIN_PATH}/lib:${ORIGIN_PATH}/lib/plugin:${ORIGIN_PATH}/..:${MINDSPORE_RPATH})
|
||||
|
||||
if(ENABLE_D)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/ascend)
|
||||
add_subdirectory(plugin/device/ascend)
|
||||
target_link_libraries(mindspore_backend PRIVATE mindspore_ascend)
|
||||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/gpu)
|
||||
add_subdirectory(plugin/device/gpu)
|
||||
target_link_libraries(mindspore_backend PRIVATE mindspore_gpu)
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
|
|
|
@ -139,7 +139,7 @@ class BACKEND_EXPORT OpAdaptationInfoRegister {
|
|||
|
||||
static std::string GenerateKey(const std::string &op_name, const std::string &device_name, bool flag);
|
||||
// key: (op_name + device_name + flag), value: <OpAdaptationInfo *>
|
||||
std::map<std::string, OpAdaptationInfo *> op_info_map_;
|
||||
inline static std::map<std::string, OpAdaptationInfo *> op_info_map_;
|
||||
};
|
||||
|
||||
class RegisterHelper {
|
||||
|
|
|
@ -22,9 +22,10 @@
|
|||
|
||||
#include "utils/hash_map.h"
|
||||
#include "common/graph_kernel/expanders/utils.h"
|
||||
#include "include/common/visible.h"
|
||||
|
||||
namespace mindspore::graphkernel::expanders {
|
||||
class OpDescFactory {
|
||||
class COMMON_EXPORT OpDescFactory {
|
||||
public:
|
||||
static OpDescFactory &Instance() {
|
||||
static OpDescFactory instance = OpDescFactory();
|
||||
|
@ -46,7 +47,7 @@ class OpDescFactory {
|
|||
void Register(const std::string &op, const RegFunc &func) { creators[op] = func; }
|
||||
|
||||
private:
|
||||
mindspore::HashMap<std::string, RegFunc> creators;
|
||||
inline static mindspore::HashMap<std::string, RegFunc> creators;
|
||||
};
|
||||
|
||||
class OpDescRegister {
|
||||
|
|
|
@ -62,13 +62,7 @@ if(ENABLE_D)
|
|||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/array.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/map.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/pattern_engine.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/helper.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/node_pass.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/visit.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/kernel/kernel_build_info.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/device/kernel_info.cc")
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc")
|
||||
endif()
|
||||
|
||||
if(NOT ENABLE_TESTCASES AND NOT BUILD_LITE)
|
||||
|
@ -76,9 +70,15 @@ if(NOT ENABLE_TESTCASES AND NOT BUILD_LITE)
|
|||
set(MSLIB_SRC ${MSLIB_SRC} ${CMAKE_SOURCE_DIR}/mindspore/core/utils/status.cc)
|
||||
endif()
|
||||
|
||||
if(ENABLE_D OR ENABLE_ACL)
|
||||
if((ENABLE_D OR ENABLE_ACL) AND NOT BUILD_LITE)
|
||||
list(APPEND MSLIB_SRC
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.cc")
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/pattern_engine.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/helper.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/node_pass.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/visit.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/kernel/kernel_build_info.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/device/kernel_info.cc")
|
||||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
|
@ -90,8 +90,6 @@ if(BUILD_LITE)
|
|||
file(GLOB_RECURSE ACL_REMOVE_SRC ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"model/acl/acl_vm/*.cc"
|
||||
)
|
||||
list(REMOVE_ITEM MSLIB_SRC
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.cc")
|
||||
list(REMOVE_ITEM MSLIB_SRC "${CMAKE_CURRENT_SOURCE_DIR}/akg_kernel_register.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/model/acl/acl_model_multi.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/model/acl/acl_model.cc"
|
||||
|
@ -104,7 +102,7 @@ endif()
|
|||
|
||||
add_library(mindspore_shared_lib_obj OBJECT ${MSLIB_SRC})
|
||||
add_library(mindspore_shared_lib SHARED $<TARGET_OBJECTS:mindspore_shared_lib_obj>)
|
||||
if(BUILD_LITE)
|
||||
if(BUILD_LITE OR ENABLE_ACL)
|
||||
target_link_libraries(mindspore_shared_lib PRIVATE $<TARGET_OBJECTS:_mindspore_transform_graph_ir_obj>)
|
||||
add_dependencies(mindspore_shared_lib _mindspore_transform_graph_ir_obj)
|
||||
elseif(MODE_ASCEND_ACL)
|
||||
|
@ -158,10 +156,11 @@ if(ENABLE_D OR ENABLE_ACL)
|
|||
find_library(libaicore_utils libaicore_utils.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(libaicpu_engine_common libaicpu_engine_common.so ${ASCEND_CANN_RUNTIME_PATH}
|
||||
${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(GE_RUNNER ge_runner ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
|
||||
target_link_libraries(mindspore_shared_lib PRIVATE -Wl,--no-as-needed graph ${ge_compiler}
|
||||
${acl_retr} ${acl_cblas} ${acl_dvpp} ${acl_runtime} ${libplatform} ${libcompress} ${libopskernel}
|
||||
${libaicore_utils} ${libaicpu_engine_common} ${acl})
|
||||
${libaicore_utils} ${libaicpu_engine_common} ${acl} ${GE_RUNNER})
|
||||
add_dependencies(mindspore_shared_lib_obj graph)
|
||||
add_dependencies(mindspore_shared_lib graph)
|
||||
endif()
|
||||
|
|
|
@ -132,9 +132,9 @@ class BACKEND_EXPORT DataQueueMgr {
|
|||
bool dynamic_shape_{false};
|
||||
size_t default_capacity_{2};
|
||||
|
||||
std::map<std::string, std::shared_ptr<BlockingQueue>> name_queue_map_;
|
||||
|
||||
std::map<std::string, DataQueueCreator> data_queue_creator_map_ = {}; // key: device name, value: DataQueueCreator
|
||||
inline static std::map<std::string, std::shared_ptr<BlockingQueue>> name_queue_map_;
|
||||
// key: device name, value: DataQueueCreator
|
||||
inline static std::map<std::string, DataQueueCreator> data_queue_creator_map_ = {};
|
||||
|
||||
HANDLER_DEFINE(bool, DestoryTdtHandle);
|
||||
};
|
||||
|
|
|
@ -32,11 +32,7 @@
|
|||
#include "include/common/utils/parallel_context.h"
|
||||
#include "frontend/parallel/costmodel_context.h"
|
||||
#include "frontend/optimizer/ad/dfunctor.h"
|
||||
#ifdef ENABLE_GPU_COLLECTIVE
|
||||
#include "plugin/device/gpu/hal/device/distribution/collective_init.h"
|
||||
#else
|
||||
#include "runtime/collective/collective_fake_init.h"
|
||||
#endif
|
||||
#include "runtime/collective/gpu_collective_init.h"
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
||||
#include "ps/util.h"
|
||||
#endif
|
||||
|
@ -404,25 +400,15 @@ PYBIND11_MODULE(_c_expression, m) {
|
|||
(void)py::class_<OpLib, std::shared_ptr<OpLib>>(m, "Oplib")
|
||||
.def(py::init())
|
||||
.def_static("reg_op", &OpLib::RegOp, "Register op info.");
|
||||
#ifdef ENABLE_GPU_COLLECTIVE
|
||||
(void)m.def("init_gpu_collective", &mindspore::device::gpu::CollectiveInitializer::InitCollective,
|
||||
|
||||
(void)m.def("init_gpu_collective", &mindspore::device::gpu::GPUCollectiveInitializer::InitCollective,
|
||||
"Init gpu collective communication mode.");
|
||||
(void)m.def("finalize_gpu_collective", &mindspore::device::gpu::CollectiveInitializer::FinalizeCollective,
|
||||
(void)m.def("finalize_gpu_collective", &mindspore::device::gpu::GPUCollectiveInitializer::FinalizeCollective,
|
||||
"Finalize gpu collective communication mode.");
|
||||
(void)m.def("get_rank_id", &mindspore::device::gpu::CollectiveInitializer::GetRankID,
|
||||
(void)m.def("get_rank_id", &mindspore::device::gpu::GPUCollectiveInitializer::GetRankID,
|
||||
"Finalize gpu collective communication mode.");
|
||||
(void)m.def("get_rank_size", &mindspore::device::gpu::CollectiveInitializer::GetRankSize,
|
||||
(void)m.def("get_rank_size", &mindspore::device::gpu::GPUCollectiveInitializer::GetRankSize,
|
||||
"Finalize gpu collective communication mode.");
|
||||
#else
|
||||
(void)m.def("init_gpu_collective", &mindspore::device::gpu::CollectiveFakeInitializer::InitCollective,
|
||||
"Init gpu collective communication mode.");
|
||||
(void)m.def("finalize_gpu_collective", &mindspore::device::gpu::CollectiveFakeInitializer::FinalizeCollective,
|
||||
"Finalize gpu collective communication mode.");
|
||||
(void)m.def("get_rank_id", &mindspore::device::gpu::CollectiveFakeInitializer::GetRankID,
|
||||
"Finalize gpu collective communication mode.");
|
||||
(void)m.def("get_rank_size", &mindspore::device::gpu::CollectiveFakeInitializer::GetRankSize,
|
||||
"Finalize gpu collective communication mode.");
|
||||
#endif
|
||||
|
||||
(void)py::class_<CollectiveManager, std::shared_ptr<CollectiveManager>>(m, "CollectiveManager")
|
||||
.def_static("get_instance", &CollectiveManager::instance, "Get collective manager instance.")
|
||||
|
|
|
@ -1603,6 +1603,7 @@ void FinalizeHccl() {
|
|||
session::ExecutorManager::Instance().Clear();
|
||||
device::DeviceContextManager::GetInstance().ClearDeviceContexts();
|
||||
device::KernelRuntimeManager::Instance().ClearRuntimeResource();
|
||||
device::DeviceContextManager::GetInstance().UnloadPlugin();
|
||||
}
|
||||
|
||||
uint32_t GetHcclRankId() {
|
||||
|
@ -1729,8 +1730,7 @@ void MemoryRecycle() {
|
|||
FuncGraphLoopBreaker::Inst().BreakLoop();
|
||||
}
|
||||
|
||||
void ClearResAtexit() {
|
||||
MS_LOG(INFO) << "Pipeline clear all resource";
|
||||
void ClearResPart1() {
|
||||
runtime::OpExecutor::GetInstance().WorkerJoin();
|
||||
// When the python process exits, the kernels on the device may not have finished executing.
|
||||
device::KernelRuntimeManager::Instance().WaitTaskFinishOnDevice();
|
||||
|
@ -1753,7 +1753,6 @@ void ClearResAtexit() {
|
|||
MS_LOG(INFO) << "End Finalize StreamSynchronizer...";
|
||||
|
||||
(void)distributed::collective::CollectiveManager::instance()->Finalize();
|
||||
|
||||
PrimitivePy::ClearHookRes();
|
||||
ad::g_k_prims.clear();
|
||||
ad::ClearKPynativeCellStaticRes();
|
||||
|
@ -1764,7 +1763,9 @@ void ClearResAtexit() {
|
|||
pipeline::GetAttrMap().clear();
|
||||
pipeline::GraphExecutorPy::ClearRes();
|
||||
pipeline::ReclaimOptimizer();
|
||||
}
|
||||
|
||||
void ClearResPart2() {
|
||||
MS_LOG(INFO) << "Start clear PyNativeExecutor...";
|
||||
pynative::PyNativeExecutor::GetInstance()->ClearRes();
|
||||
MS_LOG(INFO) << "End clear PyNativeExecutor.";
|
||||
|
@ -1792,7 +1793,6 @@ void ClearResAtexit() {
|
|||
ConfigManager::GetInstance().ResetIterNum();
|
||||
MS_LOG(INFO) << "End clear ConfigManager.";
|
||||
#endif
|
||||
|
||||
MS_LOG(INFO) << "Start clear device context...";
|
||||
device::DeviceContextManager::GetInstance().ClearDeviceContexts();
|
||||
MS_LOG(INFO) << "End clear device context.";
|
||||
|
@ -1817,6 +1817,9 @@ void ClearResAtexit() {
|
|||
Debugger::GetInstance()->Reset();
|
||||
#endif
|
||||
g_args_cache.clear();
|
||||
}
|
||||
|
||||
void ClearResPart3() {
|
||||
// clean static variable to prevent from crash. As static variable is released after
|
||||
// Python threads is released.
|
||||
MS_LOG(INFO) << "Start clear ClearObjectCache...";
|
||||
|
@ -1842,6 +1845,17 @@ void ClearResAtexit() {
|
|||
MS_LOG(INFO) << "Start clear ProtobufLibrary...";
|
||||
google::protobuf::ShutdownProtobufLibrary();
|
||||
MS_LOG(INFO) << "End clear ProtobufLibrary...";
|
||||
|
||||
MS_LOG(INFO) << "Start unload dynamic lib...";
|
||||
device::DeviceContextManager::GetInstance().UnloadPlugin();
|
||||
MS_LOG(INFO) << "End unload dynamic lib...";
|
||||
}
|
||||
|
||||
void ClearResAtexit() {
|
||||
MS_LOG(INFO) << "Pipeline clear all resource";
|
||||
ClearResPart1();
|
||||
ClearResPart2();
|
||||
ClearResPart3();
|
||||
}
|
||||
|
||||
py::bytes PyEncrypt(char *plain_data, size_t plain_len, char *key, size_t key_len, const std::string &enc_mode) {
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr size_t kPluginSuffix = 6;
|
||||
bool AicpuOpKernelLoad::GetBinaryFileName(const std::string &so_name, const std::string &bin_folder_path,
|
||||
std::string *bin_file_path) {
|
||||
MS_EXCEPTION_IF_NULL(bin_file_path);
|
||||
|
@ -105,7 +106,7 @@ bool AicpuOpKernelLoad::GetSoNeedLoadPath(std::string *file_path) const {
|
|||
MS_LOG(ERROR) << "Current path [" << cust_kernel_so_path << "] is invalid.";
|
||||
return false;
|
||||
}
|
||||
auto real_cust_kernel_so_path = cust_kernel_so_path.substr(0, pos) + "/";
|
||||
auto real_cust_kernel_so_path = cust_kernel_so_path.substr(0, pos - kPluginSuffix);
|
||||
if (real_cust_kernel_so_path.size() > PATH_MAX) {
|
||||
MS_LOG(ERROR) << "Current path [" << real_cust_kernel_so_path << "] is too long.";
|
||||
return false;
|
||||
|
|
|
@ -27,7 +27,7 @@ add_library(mindspore_gpu SHARED ${GPU_SUB_OBJECTS_SRC})
|
|||
target_link_libraries(mindspore_gpu PUBLIC mindspore_backend_common)
|
||||
target_link_libraries(mindspore_gpu PRIVATE mindspore_core mindspore_common proto_input mindspore::protobuf)
|
||||
target_link_libraries(mindspore_gpu PRIVATE securec)
|
||||
set_target_properties(mindspore_gpu PROPERTIES INSTALL_RPATH $ORIGIN)
|
||||
set_target_properties(mindspore_gpu PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
|
||||
target_link_libraries(mindspore_gpu PRIVATE mindspore::dnnl mindspore::mkldnn nnacl)
|
||||
target_link_libraries(mindspore_gpu PRIVATE mindspore::ssl mindspore::crypto)
|
||||
target_link_libraries(mindspore_gpu PRIVATE mindspore::event mindspore::event_pthreads
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <cuda.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "plugin/device/gpu/hal/device/distribution/collective_init.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
|
@ -60,6 +61,18 @@ int GPUDeprecatedInterface::GetGPUMultiProcessorCount() {
|
|||
}
|
||||
return sm_count;
|
||||
}
|
||||
|
||||
void GPUDeprecatedInterface::GPUInitCollective() { CollectiveInitializer::InitCollective(); }
|
||||
|
||||
void GPUDeprecatedInterface::GPUFinalizeCollective() { CollectiveInitializer::FinalizeCollective(); }
|
||||
|
||||
uint32_t GPUDeprecatedInterface::GPUGetRankID(const std::string &group_name) {
|
||||
return CollectiveInitializer::GetRankID(group_name);
|
||||
}
|
||||
|
||||
uint32_t GPUDeprecatedInterface::GPUGetRankSize(const std::string &group_name) {
|
||||
return CollectiveInitializer::GetRankSize(group_name);
|
||||
}
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -35,6 +35,10 @@ class GPUDeprecatedInterface : public DeprecatedInterface {
|
|||
int GetGPUCapabilityMajor() override;
|
||||
int GetGPUCapabilityMinor() override;
|
||||
int GetGPUMultiProcessorCount() override;
|
||||
void GPUInitCollective() override;
|
||||
void GPUFinalizeCollective() override;
|
||||
uint32_t GPUGetRankID(const std::string &group_name) override;
|
||||
uint32_t GPUGetRankSize(const std::string &group_name) override;
|
||||
};
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
|
|
|
@ -57,7 +57,7 @@ struct OpInfo {
|
|||
uint32_t pid;
|
||||
};
|
||||
|
||||
class ProfilerManager {
|
||||
class BACKEND_EXPORT ProfilerManager {
|
||||
public:
|
||||
static std::shared_ptr<ProfilerManager> &GetInstance();
|
||||
ProfilerManager() = default;
|
||||
|
@ -75,7 +75,7 @@ class ProfilerManager {
|
|||
bool is_dynamic_shape_net_ = 0;
|
||||
};
|
||||
|
||||
class Profiler {
|
||||
class BACKEND_EXPORT Profiler {
|
||||
public:
|
||||
static std::shared_ptr<Profiler> GetInstance(const std::string &name) noexcept;
|
||||
static bool Register(const std::string &name, const std::shared_ptr<Profiler> &instance);
|
||||
|
@ -125,7 +125,7 @@ class Profiler {
|
|||
bool is_parallel_strategy = false;
|
||||
|
||||
private:
|
||||
BACKEND_EXPORT inline static std::map<std::string, std::shared_ptr<Profiler>> instance_map_ = {};
|
||||
inline static std::map<std::string, std::shared_ptr<Profiler>> instance_map_ = {};
|
||||
};
|
||||
} // namespace profiler
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/collective/collective_fake_init.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace gpu {
|
||||
void CollectiveFakeInitializer::InitCollective() {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'init('nccl')', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
|
||||
void CollectiveFakeInitializer::FinalizeCollective() {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'init('nccl')', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
|
||||
uint32_t CollectiveFakeInitializer::GetRankID(const std::string &) {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'GetRankID', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
|
||||
uint32_t CollectiveFakeInitializer::GetRankSize(const std::string &) {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'GetRankSize', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,89 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/collective/gpu_collective_init.h"
|
||||
#include "runtime/hardware/device_context_manager.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace gpu {
|
||||
void GPUCollectiveInitializer::InitCollective() {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (device_target != kGPUDevice) {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'init('nccl')', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
|
||||
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
auto deprecated_ptr = device_context->GetDeprecatedInterface();
|
||||
MS_EXCEPTION_IF_NULL(deprecated_ptr);
|
||||
deprecated_ptr->GPUInitCollective();
|
||||
}
|
||||
|
||||
void GPUCollectiveInitializer::FinalizeCollective() {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (device_target != kGPUDevice) {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'finalize('nccl')', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
|
||||
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
auto deprecated_ptr = device_context->GetDeprecatedInterface();
|
||||
MS_EXCEPTION_IF_NULL(deprecated_ptr);
|
||||
deprecated_ptr->GPUFinalizeCollective();
|
||||
}
|
||||
|
||||
uint32_t GPUCollectiveInitializer::GetRankID(const std::string &group_name) {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (device_target != kGPUDevice) {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'GetRankID', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
|
||||
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
auto deprecated_ptr = device_context->GetDeprecatedInterface();
|
||||
MS_EXCEPTION_IF_NULL(deprecated_ptr);
|
||||
return deprecated_ptr->GPUGetRankID(group_name);
|
||||
}
|
||||
|
||||
uint32_t GPUCollectiveInitializer::GetRankSize(const std::string &group_name) {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_target = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (device_target != kGPUDevice) {
|
||||
MS_LOG(EXCEPTION) << "You are trying to call 'GetRankSize', Please check "
|
||||
"this MindSpore package is GPU version and built with NCCL.";
|
||||
}
|
||||
const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
|
||||
{kGPUDevice, ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID)});
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
auto deprecated_ptr = device_context->GetDeprecatedInterface();
|
||||
MS_EXCEPTION_IF_NULL(deprecated_ptr);
|
||||
return deprecated_ptr->GPUGetRankSize(group_name);
|
||||
}
|
||||
} // namespace gpu
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_DISTRIBUTION_COLLECTIVE_FAKE_INIT_H_
|
||||
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_DISTRIBUTION_COLLECTIVE_FAKE_INIT_H_
|
||||
#ifndef MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_
|
||||
#define MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_
|
||||
|
||||
#include <string>
|
||||
#include "include/backend/visible.h"
|
||||
|
@ -23,12 +23,12 @@
|
|||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace gpu {
|
||||
class BACKEND_EXPORT CollectiveFakeInitializer {
|
||||
class BACKEND_EXPORT GPUCollectiveInitializer {
|
||||
public:
|
||||
CollectiveFakeInitializer() = default;
|
||||
~CollectiveFakeInitializer() = default;
|
||||
CollectiveFakeInitializer(CollectiveFakeInitializer const &) = delete;
|
||||
CollectiveFakeInitializer &operator=(const CollectiveFakeInitializer &) = delete;
|
||||
GPUCollectiveInitializer() = default;
|
||||
~GPUCollectiveInitializer() = default;
|
||||
GPUCollectiveInitializer(GPUCollectiveInitializer const &) = delete;
|
||||
GPUCollectiveInitializer &operator=(const GPUCollectiveInitializer &) = delete;
|
||||
static void InitCollective();
|
||||
static void FinalizeCollective();
|
||||
static uint32_t GetRankID(const std::string &group_name);
|
||||
|
@ -38,4 +38,4 @@ class BACKEND_EXPORT CollectiveFakeInitializer {
|
|||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_DISTRIBUTION_COLLECTIVE_FAKE_INIT_H_
|
||||
#endif // MINDSPORE_CCSRC_RUNTIME_COLLECTIVE_GPU_COLLECTIVE_INIT_H_
|
|
@ -49,8 +49,8 @@ class BACKEND_EXPORT KernelRuntimeManager {
|
|||
~KernelRuntimeManager() = default;
|
||||
DISABLE_COPY_AND_ASSIGN(KernelRuntimeManager);
|
||||
std::string GetDeviceKey(const std::string &device_name, uint32_t device_id) const;
|
||||
std::map<std::string, std::shared_ptr<KernelRuntime> > runtime_map_;
|
||||
std::map<std::string, KernelRuntimeCreator> runtime_creators_;
|
||||
inline static std::map<std::string, std::shared_ptr<KernelRuntime> > runtime_map_;
|
||||
inline static std::map<std::string, KernelRuntimeCreator> runtime_creators_;
|
||||
std::mutex lock_;
|
||||
};
|
||||
|
||||
|
|
|
@ -58,6 +58,10 @@ class DeprecatedInterface {
|
|||
virtual int GetGPUCapabilityMajor() { return -1; }
|
||||
virtual int GetGPUCapabilityMinor() { return -1; }
|
||||
virtual int GetGPUMultiProcessorCount() { return -1; }
|
||||
virtual void GPUInitCollective() {}
|
||||
virtual void GPUFinalizeCollective() {}
|
||||
virtual uint32_t GPUGetRankID(const std::string &group_name) { return 0; }
|
||||
virtual uint32_t GPUGetRankSize(const std::string &group_name) { return 0; }
|
||||
};
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -15,12 +15,117 @@
|
|||
*/
|
||||
|
||||
#include "runtime/hardware/device_context_manager.h"
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <dirent.h>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include "utils/ms_context.h"
|
||||
#include "utils/dlopen_macro.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace plugin_loader {
|
||||
void PluginLoader::LoadDynamicLib(const std::string &plugin_file, std::map<std::string, void *> *all_handles) {
|
||||
MS_EXCEPTION_IF_NULL(all_handles);
|
||||
void *handle = nullptr;
|
||||
std::string err_msg;
|
||||
if (plugin_file.find("libmindspore_") == std::string::npos) {
|
||||
return;
|
||||
}
|
||||
auto so_name = GetDynamicLibName(plugin_file);
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
handle = LoadLibrary(plugin_file.c_str());
|
||||
err_msg = std::to_string(GetLastError());
|
||||
#else
|
||||
handle = dlopen(plugin_file.c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||
err_msg = GetDlErrorMsg();
|
||||
#endif
|
||||
if (handle == nullptr) {
|
||||
MS_LOG(DEBUG) << "Load dynamic lib: " << so_name << " failed. " << err_msg;
|
||||
return;
|
||||
}
|
||||
(*all_handles)[so_name] = handle;
|
||||
}
|
||||
|
||||
void PluginLoader::CloseDynamicLib(const std::string &dl_name, void *handle) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
if (!FreeLibrary(static_cast<HMODULE>(handle))) {
|
||||
MS_LOG(EXCEPTION) << "Closing dynamic lib: " + dl_name + " handle failed. Error: " + std::to_string(GetLastError());
|
||||
}
|
||||
|
||||
#else
|
||||
if (dlclose(handle) != 0) {
|
||||
MS_LOG(EXCEPTION) << "Closing dynamic lib: " << dl_name << "failed, error message: " << GetDlErrorMsg();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string PluginLoader::GetDynamicLibName(const std::string &plugin_file) {
|
||||
auto pos = plugin_file.rfind('.');
|
||||
if (pos == std::string::npos) {
|
||||
MS_LOG(WARNING) << "Invalid plugin file " << plugin_file;
|
||||
return "unknown_name";
|
||||
}
|
||||
return plugin_file.substr(0, pos);
|
||||
}
|
||||
|
||||
bool PluginLoader::GetPluginPath(std::string *file_path) {
|
||||
MS_EXCEPTION_IF_NULL(file_path);
|
||||
std::string cur_so_path;
|
||||
#if !defined(_WIN32) && !defined(_WIN64)
|
||||
Dl_info dl_info;
|
||||
if (dladdr(reinterpret_cast<void *>(PluginLoader::GetPluginPath), &dl_info) == 0) {
|
||||
MS_LOG(INFO) << "Get dladdr error";
|
||||
return false;
|
||||
}
|
||||
cur_so_path = dl_info.dli_fname;
|
||||
#else
|
||||
HMODULE hModule = nullptr;
|
||||
if (GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT | GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS,
|
||||
(LPCSTR)PluginLoader::GetPluginPath, &hModule) == 0) {
|
||||
MS_LOG(INFO) << "Get GetModuleHandleEx failed.";
|
||||
return false;
|
||||
}
|
||||
char szPath[MAX_PATH];
|
||||
if (GetModuleFileName(hModule, szPath, sizeof(szPath)) == 0) {
|
||||
MS_LOG(INFO) << "Get GetModuleHandleEx failed.";
|
||||
return false;
|
||||
}
|
||||
cur_so_path = std::string(szPath);
|
||||
#endif
|
||||
auto pos = cur_so_path.find_last_of('/');
|
||||
if (cur_so_path.empty() || pos == std::string::npos) {
|
||||
MS_LOG(INFO) << "Current so path empty or the path [" << cur_so_path << "] is invalid.";
|
||||
return false;
|
||||
}
|
||||
auto plugin_so_path = cur_so_path.substr(0, pos) + "/plugin";
|
||||
if (plugin_so_path.size() >= PATH_MAX) {
|
||||
MS_LOG(INFO) << "Current path [" << plugin_so_path << "] is invalid.";
|
||||
return false;
|
||||
}
|
||||
char real_path_mem[PATH_MAX] = {0};
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
if (_fullpath(real_path_mem, common::SafeCStr(plugin_so_path), PATH_MAX) == nullptr) {
|
||||
MS_LOG(INFO) << "Plugin path is invalid: [" << plugin_so_path << "], skip!";
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
if (realpath(common::SafeCStr(plugin_so_path), real_path_mem) == nullptr) {
|
||||
MS_LOG(INFO) << "Plugin path is invalid: [" << plugin_so_path << "], skip!";
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
*file_path = std::string(real_path_mem);
|
||||
return true;
|
||||
}
|
||||
} // namespace plugin_loader
|
||||
|
||||
namespace device {
|
||||
DeviceContextManager &DeviceContextManager::GetInstance() {
|
||||
static DeviceContextManager instance{};
|
||||
instance.LoadPlugin();
|
||||
return instance;
|
||||
}
|
||||
|
||||
|
@ -30,6 +135,41 @@ void DeviceContextManager::Register(const std::string &device_name, DeviceContex
|
|||
}
|
||||
}
|
||||
|
||||
void DeviceContextManager::LoadPlugin() {
|
||||
if (load_init_) {
|
||||
return;
|
||||
}
|
||||
if (plugin_path_.empty() && !plugin_loader::PluginLoader::GetPluginPath(&plugin_path_)) {
|
||||
MS_LOG(INFO) << "Plugin path is invalid, skip!";
|
||||
return;
|
||||
}
|
||||
DIR *dir = opendir(plugin_path_.c_str());
|
||||
if (dir == nullptr) {
|
||||
MS_LOG(ERROR) << "Open plugin dir failed, plugin path:" << plugin_path_;
|
||||
return;
|
||||
}
|
||||
struct dirent *entry;
|
||||
while ((entry = readdir(dir)) != nullptr) {
|
||||
auto plugin_file = entry->d_name;
|
||||
plugin_loader::PluginLoader::LoadDynamicLib(plugin_file, &plugin_maps_);
|
||||
}
|
||||
(void)closedir(dir);
|
||||
load_init_ = true;
|
||||
}
|
||||
|
||||
void DeviceContextManager::UnloadPlugin() {
|
||||
if (plugin_maps_.empty()) {
|
||||
return;
|
||||
}
|
||||
auto iter = plugin_maps_.begin();
|
||||
while (iter != plugin_maps_.end()) {
|
||||
plugin_loader::PluginLoader::CloseDynamicLib(iter->first, iter->second);
|
||||
iter++;
|
||||
}
|
||||
plugin_maps_.clear();
|
||||
load_init_ = false;
|
||||
}
|
||||
|
||||
void DeviceContextManager::ClearDeviceContexts() {
|
||||
for (auto &iter : device_contexts_) {
|
||||
MS_LOG(INFO) << "Release device " << iter.first;
|
||||
|
@ -49,7 +189,6 @@ DeviceContext *DeviceContextManager::GetOrCreateDeviceContext(const DeviceContex
|
|||
name = "GE";
|
||||
device_context_key_str = "GE_0";
|
||||
}
|
||||
|
||||
auto device_context_iter = device_contexts_.find(device_context_key_str);
|
||||
if (device_context_iter != device_contexts_.end()) {
|
||||
return device_context_iter->second.get();
|
||||
|
|
|
@ -28,6 +28,18 @@
|
|||
#include "include/backend/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace plugin_loader {
|
||||
class PluginLoader {
|
||||
public:
|
||||
static void LoadDynamicLib(const std::string &plugin_file, std::map<std::string, void *> *all_handles);
|
||||
static void CloseDynamicLib(const std::string &dl_name, void *handle);
|
||||
static bool GetPluginPath(std::string *file_path);
|
||||
|
||||
private:
|
||||
static std::string GetDynamicLibName(const std::string &plugin_file);
|
||||
};
|
||||
} // namespace plugin_loader
|
||||
|
||||
namespace device {
|
||||
using DeviceContextCreator = std::function<std::shared_ptr<DeviceContext>(const DeviceContextKey &)>;
|
||||
|
||||
|
@ -39,16 +51,22 @@ class BACKEND_EXPORT DeviceContextManager {
|
|||
void UpdateDeviceContextKey(const DeviceContextKey &old_key, const DeviceContextKey &new_key);
|
||||
void ClearDeviceContexts();
|
||||
void WaitTaskFinishOnDevice() const;
|
||||
void UnloadPlugin();
|
||||
|
||||
private:
|
||||
DeviceContextManager() = default;
|
||||
~DeviceContextManager() = default;
|
||||
DISABLE_COPY_AND_ASSIGN(DeviceContextManager);
|
||||
void LoadPlugin();
|
||||
|
||||
std::map<std::string, void *> plugin_maps_;
|
||||
inline static bool load_init_;
|
||||
inline static std::string plugin_path_;
|
||||
|
||||
// The string converted from DeviceContextKey -> DeviceContextPtr.
|
||||
std::map<std::string, DeviceContextPtr> device_contexts_;
|
||||
inline static std::map<std::string, DeviceContextPtr> device_contexts_;
|
||||
// The name of device -> DeviceContextCreator.
|
||||
std::map<std::string, DeviceContextCreator> device_context_creators_;
|
||||
inline static std::map<std::string, DeviceContextCreator> device_context_creators_;
|
||||
};
|
||||
|
||||
class DeviceContextRegister {
|
||||
|
|
Loading…
Reference in New Issue