diff --git a/cmake/package.cmake b/cmake/package.cmake index 979339e0065..dff5ccf3e80 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -95,6 +95,22 @@ install( COMPONENT mindspore ) +if(ENABLE_D) + install( + TARGETS mindspore_ascend + DESTINATION ${INSTALL_LIB_DIR} + COMPONENT mindspore + ) +endif() + +if(ENABLE_GPU) + install( + TARGETS mindspore_gpu + DESTINATION ${INSTALL_LIB_DIR} + COMPONENT mindspore + ) +endif() + if(USE_GLOG) install(FILES ${glog_LIBPATH}/libmindspore_glog.so.0.4.0 DESTINATION ${INSTALL_LIB_DIR} RENAME libmindspore_glog.so.0 COMPONENT mindspore) diff --git a/cmake/package_tar.cmake b/cmake/package_tar.cmake index df4aeeff80a..d18150d120c 100644 --- a/cmake/package_tar.cmake +++ b/cmake/package_tar.cmake @@ -32,6 +32,22 @@ install( COMPONENT mindspore ) +if(ENABLE_D) + install( + TARGETS mindspore_ascend + DESTINATION ${INSTALL_LIB_DIR} + COMPONENT mindspore + ) +endif() + +if(ENABLE_GPU) + install( + TARGETS mindspore_gpu + DESTINATION ${INSTALL_LIB_DIR} + COMPONENT mindspore + ) +endif() + if(USE_GLOG) file(GLOB_RECURSE GLOG_LIB_LIST ${glog_LIBPATH}/libmindspore_glog*) install( diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 62fe1e0ee74..d47e0f6b6be 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -14,9 +14,6 @@ set(FBS_FILES ms_build_flatbuffers(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}../../schema generated_fbs_files ${SERVER_FLATBUFFER_OUTPUT}) if(ENABLE_D OR ENABLE_ACL) - set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/lib64) - set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/lib64) - set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/lib64) set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/op_tiling) set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling) set(MINDSPORE_RPATH @@ -24,9 +21,77 @@ if(ENABLE_D OR ENABLE_ACL) set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling) endif() -if(ENABLE_D) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/ascend/kernel/aicpu/aicpu_ops) - add_subdirectory(plugin/device/ascend/kernel/aicpu/aicpu_ops) +if(ENABLE_GPU) + find_package(CUDA REQUIRED) + find_package(Threads) + if(${CUDA_VERSION} VERSION_LESS ${MS_REQUIRE_CUDA_VERSION}) + message(FATAL_ERROR "The minimum CUDA version ${MS_REQUIRE_CUDA_VERSION} is required, \ + but only CUDA ${CUDA_VERSION} found.") + endif() + enable_language(CUDA) + if(NOT CUDA_PATH OR CUDA_PATH STREQUAL "") + if(DEFINED ENV{CUDA_HOME} AND NOT $ENV{CUDA_HOME} STREQUAL "") + set(CUDA_PATH $ENV{CUDA_HOME}) + else() + set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR}) + endif() + endif() + + if(DEFINED ENV{CUDNN_HOME} AND NOT $ENV{CUDNN_HOME} STREQUAL "") + set(CUDNN_INCLUDE_DIR $ENV{CUDNN_HOME}/include) + set(CUDNN_LIBRARY_DIR $ENV{CUDNN_HOME}/lib64) + find_path(CUDNN_INCLUDE_PATH cudnn.h HINTS ${CUDNN_INCLUDE_DIR} NO_DEFAULT_PATH) + find_library(CUDNN_LIBRARY_PATH "cudnn" HINTS ${CUDNN_LIBRARY_DIR} NO_DEFAULT_PATH) + find_library(CUBLAS_LIBRARY_PATH "cublas" HINTS ${CUDNN_LIBRARY_DIR}) + if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND) + message(FATAL_ERROR "Failed to find cudnn header file, please set environment variable CUDNN_HOME to \ + cudnn installation position.") + endif() + if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND) + message(FATAL_ERROR "Failed to find cudnn library file, please set environment variable CUDNN_HOME to \ + cudnn installation position.") + endif() + else() + list(APPEND CMAKE_PREFIX_PATH ${CUDA_TOOLKIT_ROOT_DIR}) + find_path(CUDNN_INCLUDE_PATH cudnn.h PATH_SUFFIXES cuda/inclulde include cuda) + find_library(CUDNN_LIBRARY_PATH "cudnn" PATH_SUFFIXES cuda/lib64 lib64 lib cuda/lib lib/x86_64-linux-gnu) + find_library(CUBLAS_LIBRARY_PATH "cublas" PATH_SUFFIXES cuda/lib64 lib64 lib cuda/lib lib/x86_64-linux-gnu) + if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND) + message(FATAL_ERROR "Failed to find cudnn header file, if cudnn library is not installed, please put \ + cudnn header file in cuda include path or user include path(eg. /usr/local/cuda/include; \ + /usr/local/include; /usr/include), if cudnn library is installed in other position, please \ + set environment variable CUDNN_HOME to cudnn installation position, there should be cudnn.h \ + in {CUDNN_HOME}/include.") + endif() + if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND) + message(FATAL_ERROR "Failed to find cudnn library file, if cudnn library is not installed, please put \ + cudnn library file in cuda library path or user library path(eg. /usr/local/cuda/lib64; \ + /usr/local/lib64; /usr/lib64; /usr/local/lib; /usr/lib), if cudnn library is installed in other \ + position, please set environment variable CUDNN_HOME to cudnn installation position, there should \ + be cudnn library file in {CUDNN_HOME}/lib64.") + endif() + endif() + + if(NOT CUPTI_INCLUDE_DIRS OR CUPTI_INCLUDE_DIRS STREQUAL "") + set(CUPTI_INCLUDE_DIRS ${CUDA_PATH}/extras/CUPTI/include) + endif() + set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:${CUDA_PATH}/lib64) + message("CUDA_PATH: ${CUDA_PATH}") + message("CUDA_INCLUDE_DIRS: ${CUDA_INCLUDE_DIRS}") + message("CUDNN_INCLUDE_PATH: ${CUDNN_INCLUDE_PATH}") + message("CUDNN_LIBRARY_PATH: ${CUDNN_LIBRARY_PATH}") + message("CUBLAS_LIBRARY_PATH: ${CUBLAS_LIBRARY_PATH}") + message("CUPTI_INCLUDE_DIRS: ${CUPTI_INCLUDE_DIRS}") + include_directories(${CUDNN_INCLUDE_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS} ${CUPTI_INCLUDE_DIRS}) + + list(APPEND CUDA_NVCC_FLAGS -arch=sm_53 --expt-relaxed-constexpr) + if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") + list(APPEND CUDA_NVCC_FLAGS -G) + message("CUDA_NVCC_FLAGS" ${CUDA_NVCC_FLAGS}) + endif() + set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS}) + add_compile_definitions(ENABLE_GPU) endif() if(ENABLE_CPU) @@ -59,111 +124,6 @@ if(ENABLE_MPI) add_compile_definitions(ENABLE_MPI) endif() -if(ENABLE_GPU) - find_package(CUDA REQUIRED) - find_package(Threads) - if(${CUDA_VERSION} VERSION_LESS ${MS_REQUIRE_CUDA_VERSION}) - message(FATAL_ERROR "The minimum CUDA version ${MS_REQUIRE_CUDA_VERSION} is required, \ - but only CUDA ${CUDA_VERSION} found.") - endif() - enable_language(CUDA) - if(NOT CUDA_PATH OR CUDA_PATH STREQUAL "") - if(DEFINED ENV{CUDA_HOME} AND NOT $ENV{CUDA_HOME} STREQUAL "") - set(CUDA_PATH $ENV{CUDA_HOME}) - else() - set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR}) - endif() - endif() - - if(DEFINED ENV{CUDNN_HOME} AND NOT $ENV{CUDNN_HOME} STREQUAL "") - set(CUDNN_INCLUDE_DIR $ENV{CUDNN_HOME}/include) - set(CUDNN_LIBRARY_DIR $ENV{CUDNN_HOME}/lib64) - find_path(CUDNN_INCLUDE_PATH cudnn.h HINTS ${CUDNN_INCLUDE_DIR} NO_DEFAULT_PATH) - find_library(CUDNN_LIBRARY_PATH "cudnn" HINTS ${CUDNN_LIBRARY_DIR} NO_DEFAULT_PATH) - if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND) - message(FATAL_ERROR "Failed to find cudnn header file, please set environment variable CUDNN_HOME to \ - cudnn installation position.") - endif() - if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND) - message(FATAL_ERROR "Failed to find cudnn library file, please set environment variable CUDNN_HOME to \ - cudnn installation position.") - endif() - else() - list(APPEND CMAKE_PREFIX_PATH ${CUDA_TOOLKIT_ROOT_DIR}) - find_path(CUDNN_INCLUDE_PATH cudnn.h PATH_SUFFIXES cuda/inclulde include cuda) - find_library(CUDNN_LIBRARY_PATH "cudnn" PATH_SUFFIXES cuda/lib64 lib64 lib cuda/lib lib/x86_64-linux-gnu) - if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND) - message(FATAL_ERROR "Failed to find cudnn header file, if cudnn library is not installed, please put \ - cudnn header file in cuda include path or user include path(eg. /usr/local/cuda/include; \ - /usr/local/include; /usr/include), if cudnn library is installed in other position, please \ - set environment variable CUDNN_HOME to cudnn installation position, there should be cudnn.h \ - in {CUDNN_HOME}/include.") - endif() - if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND) - message(FATAL_ERROR "Failed to find cudnn library file, if cudnn library is not installed, please put \ - cudnn library file in cuda library path or user library path(eg. /usr/local/cuda/lib64; \ - /usr/local/lib64; /usr/lib64; /usr/local/lib; /usr/lib), if cudnn library is installed in other \ - position, please set environment variable CUDNN_HOME to cudnn installation position, there should \ - be cudnn library file in {CUDNN_HOME}/lib64.") - endif() - endif() - - if(NOT CUPTI_INCLUDE_DIRS OR CUPTI_INCLUDE_DIRS STREQUAL "") - set(CUPTI_INCLUDE_DIRS ${CUDA_PATH}/extras/CUPTI/include) - endif() - set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:${CUDA_PATH}/lib64) - message("CUDA_PATH: ${CUDA_PATH}") - message("CUDA_INCLUDE_DIRS: ${CUDA_INCLUDE_DIRS}") - message("CUDNN_INCLUDE_PATH: ${CUDNN_INCLUDE_PATH}") - message("CUDNN_LIBRARY_PATH: ${CUDNN_LIBRARY_PATH}") - message("CUPTI_INCLUDE_DIRS: ${CUPTI_INCLUDE_DIRS}") - include_directories(${CUDNN_INCLUDE_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS} ${CUPTI_INCLUDE_DIRS}) - - file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} - "plugin/device/gpu/hal/device/*.cc" - "plugin/device/gpu/hal/device/*.cu" - "plugin/device/gpu/kernel/*.cu" - ) - - list(REMOVE_ITEM GPU_SRC_LIST "plugin/device/gpu/kernel/cuda_impl/cuda_ops/*.cu") - - list(APPEND CUDA_NVCC_FLAGS -arch=sm_53 --expt-relaxed-constexpr) - if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") - list(APPEND CUDA_NVCC_FLAGS -G) - message("CUDA_NVCC_FLAGS" ${CUDA_NVCC_FLAGS}) - endif() - list(REMOVE_ITEM GPU_SRC_LIST "plugin/device/gpu/hal/device/mpi/mpi_initializer.cc" - "plugin/device/gpu/hal/device/distribution/collective_wrapper.cc" - "plugin/device/gpu/hal/device/distribution/mpi_wrapper.cc" - "plugin/device/gpu/hal/device/distribution/nccl_wrapper.cc" - "plugin/device/gpu/hal/device/trt_loader.cc") - - if(NOT ${TENSORRT_HOME} STREQUAL "") - find_path(TENSORRT_HOME_INCLUDE NvInfer.h HINTS ${TENSORRT_HOME}/include) - if(TENSORRT_HOME_INCLUDE STREQUAL TENSORRT_HOME_INCLUDE-NOTFOUND) - message(FATAL_ERROR "Tensor-RT dir not exist ${TENSORRT_HOME}") - endif() - message("Enable GPU inference. Tensor-RT include dir: ${TENSORRT_HOME_INCLUDE}") - set(ENABLE_GPU_INFER TRUE) - add_compile_definitions(ENABLE_GPU_INFER) - include_directories(${TENSORRT_HOME_INCLUDE}) - list(APPEND GPU_SRC_LIST ${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/gpu/hal/device/trt_loader.cc) - endif() - - set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) - if(${CUDA_VERSION} VERSION_LESS 11.0) - string(REPLACE "-std=c++17" "-std=c++11" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - else() - string(REPLACE "-std=c++17" "-std=c++14" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - endif() - set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) - cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST}) - set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS}) - add_compile_definitions(ENABLE_GPU) - add_dependencies(gpu_cuda_lib proto_input) - add_subdirectory(plugin/device/gpu/kernel/cuda_impl/cuda_ops) -endif() - ## make protobuf files file(GLOB ONNX_PROTO "" ${CMAKE_SOURCE_DIR}/third_party/proto/onnx/onnx.proto) message("onnx proto path is :" ${ONNX_PROTO}) @@ -328,22 +288,11 @@ set(BACKEND_SUB_COMP runtime/hardware runtime/pynative runtime/data_queue - plugin/device/ascend/hal/device - plugin/device/ascend/hal/hardware - plugin/device/ascend/hal/hccl_adapter - plugin/device/ascend/hal/profiler - plugin/device/ascend/kernel - plugin/device/ascend/optimizer plugin/device/cpu/hal/device plugin/device/cpu/hal/hardware plugin/device/cpu/hal/profiler plugin/device/cpu/kernel plugin/device/cpu/optimizer - plugin/device/gpu/hal/device - plugin/device/gpu/hal/hardware - plugin/device/gpu/hal/profiler - plugin/device/gpu/kernel - plugin/device/gpu/optimizer transform/graph_ir ) @@ -370,14 +319,19 @@ endif() set_property(SOURCE ${BACKEND_SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME) add_library(mindspore_backend SHARED ${BACKEND_SUB_OBJECTS_SRC}) +add_library(mindspore_backend_common STATIC ${BACKEND_SUB_OBJECTS_SRC}) + if(MODE_ASCEND_ACL) add_library(mindspore_backend_static STATIC ${BACKEND_SUB_OBJECTS_SRC}) endif() + if(CMAKE_SYSTEM_NAME MATCHES "Windows") target_link_libraries(mindspore_backend PRIVATE mindspore::pybind11_module) endif() + target_link_libraries(mindspore_backend PRIVATE mindspore_core mindspore_common proto_input mindspore::protobuf) target_link_libraries(mindspore_backend PRIVATE securec) + if(CMAKE_SYSTEM_NAME MATCHES "Darwin") set_target_properties(mindspore_backend PROPERTIES MACOSX_RPATH ON) set_target_properties(mindspore_backend PROPERTIES INSTALL_RPATH @loader_path) @@ -388,17 +342,6 @@ endif() if(ENABLE_CPU) target_link_libraries(mindspore_backend PRIVATE mindspore::dnnl mindspore::mkldnn nnacl) endif() -if(ENABLE_GPU) - message("add gpu lib to mindspore_backend") - target_link_libraries(mindspore_backend PRIVATE gpu_cuda_lib cublas cuda_ops - ${CUDA_PATH}/lib64/libcurand.so - ${CUDNN_LIBRARY_PATH} - ${CUDA_PATH}/lib64/libcudart.so - ${CUDA_PATH}/lib64/stubs/libcuda.so - ${CUDA_PATH}/lib64/libcusolver.so - ${CUDA_PATH}/lib64/libcufft.so - ${CUDA_PATH}/lib64/libcusparse.so) -endif() if(NOT WIN32) target_link_libraries(mindspore_backend PRIVATE mindspore::ssl mindspore::crypto) @@ -421,28 +364,7 @@ elseif(ENABLE_CPU AND NOT WIN32) -Wl,--no-as-needed mindspore::event_core ps_cache) endif() -if(ENABLE_D) - find_library(GE_RUNNER ge_runner ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(GRAPH graph ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(HCCL hccl ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - target_link_libraries(mindspore_backend PUBLIC ${GE_RUNNER} ${GRAPH} ${HCCL}) -endif() - if(MODE_ASCEND_ALL) - MESSAGE("USE DAV LIB PATH: ${ASCEND_PATH}") - find_library(ERROR_MANAGER error_manager ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(RUNTIME_LIB runtime ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(TSDCLIENT tsdclient HINTS ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(DATATRANSFER datatransfer HINTS ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(PROFILING msprofiler ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(ACL ascendcl ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(ACL_TDT_CHANNEL acl_tdt_channel ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(PLATFORM platform ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(OPT_FEATURE opt_feature ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(adump_server libadump_server.a ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) - find_library(OPTILING optiling ${ASCEND_CANN_OPP_PATH} ${ASCEND_TOOLKIT_OPP_PATH}) - target_link_libraries(mindspore_backend PUBLIC ${RUNTIME_LIB} ${TSDCLIENT} ${DATATRANSFER} ${ERROR_MANAGER} - -Wl,--no-as-needed ${OPTILING} ${PLATFORM} ${ACL} ${ACL_TDT_CHANNEL} ${OPT_FEATURE} ${PROFILING}) target_link_libraries(mindspore PUBLIC -Wl,--start-group proto_input mindspore::protobuf -Wl,--end-group) elseif(CMAKE_SYSTEM_NAME MATCHES "Windows") target_link_libraries(mindspore PUBLIC -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece @@ -473,6 +395,18 @@ else() endif() set(MINDSPORE_RPATH ${ORIGIN_PATH}/lib:${MINDSPORE_RPATH}) +if(ENABLE_D) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/ascend) + add_subdirectory(plugin/device/ascend) + target_link_libraries(mindspore_backend PRIVATE mindspore_ascend) +endif() + +if(ENABLE_GPU) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/gpu) + add_subdirectory(plugin/device/gpu) + target_link_libraries(mindspore_backend PRIVATE mindspore_gpu) +endif() + set_target_properties(_c_expression PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH}) if(CMAKE_SYSTEM_NAME MATCHES "Windows") @@ -526,16 +460,6 @@ if(ENABLE_MINDDATA) add_subdirectory(minddata/dataset) endif() -if(MODE_ASCEND_ALL) - target_link_libraries(_c_expression PRIVATE ${adump_server}) -endif() - -if(ENABLE_D) - if(ENABLE_MPI) - set_target_properties(_ascend_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH}) - endif() -endif() - if(ENABLE_TEST OR ENABLE_TESTCASES) include_directories(${CMAKE_BINARY_DIR}) list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/ge/ge_operator_stub.cc) diff --git a/mindspore/ccsrc/backend/common/optimizer/helper.cc b/mindspore/ccsrc/backend/common/optimizer/helper.cc index 763434dbb17..6957f2f7696 100644 --- a/mindspore/ccsrc/backend/common/optimizer/helper.cc +++ b/mindspore/ccsrc/backend/common/optimizer/helper.cc @@ -734,10 +734,10 @@ AbstractBasePtrList RectifyAbstractFromRegAttr(const PrimitivePtr &primitive, MS_EXCEPTION_IF_NULL(ms_context); auto device = ms_context->get_param(MS_CTX_DEVICE_TARGET); if (device == kGPUDevice) { - if (DynamicShapeConstInputToAttrGPU.find(primitive->name()) != DynamicShapeConstInputToAttrGPU.end()) { + if (IsOneOfDynamicShapeConstInputToAttrGPU(primitive->name())) { return input_abstract; } - } else if (DynamicShapeConstInputToAttr.find(primitive->name()) != DynamicShapeConstInputToAttr.end()) { + } else if (IsOneOfDynamicShapeConstInputToAttr(primitive->name())) { return input_abstract; } auto convert_input_list = reg.GetConstInputAttrInfo(); diff --git a/mindspore/ccsrc/backend/common/pass/custom_op_reg_info_to_attr.cc b/mindspore/ccsrc/backend/common/pass/custom_op_reg_info_to_attr.cc index e5748b34213..1de58be3148 100644 --- a/mindspore/ccsrc/backend/common/pass/custom_op_reg_info_to_attr.cc +++ b/mindspore/ccsrc/backend/common/pass/custom_op_reg_info_to_attr.cc @@ -133,7 +133,7 @@ const AnfNodePtr CustomOpRegInfoToAttr::Process(const FuncGraphPtr &, const AnfN MS_EXCEPTION_IF_NULL(primitive); auto func_type = common::AnfAlgo::GetNodeAttr(cnode, kAttrFuncType); // AKG/AICPU need to process attr, TBE will process later in the json creating phase. - if (kCustomTypeAkg.find(func_type) == kCustomTypeAkg.end() || func_type == kCustomTypeAICPU) { + if (!IsOneOfCustomAkgType(func_type) || func_type == kCustomTypeAICPU) { return nullptr; } // Early return if current node does not have attr diff --git a/mindspore/ccsrc/backend/common/somas/somas.h b/mindspore/ccsrc/backend/common/somas/somas.h index abaaeacac21..3a63647b2c7 100644 --- a/mindspore/ccsrc/backend/common/somas/somas.h +++ b/mindspore/ccsrc/backend/common/somas/somas.h @@ -35,6 +35,7 @@ #include "include/common/utils/anfalgo.h" #include "backend/common/session/kernel_graph.h" #include "runtime/hardware/device_type.h" +#include "include/backend/visible.h" namespace mindspore { namespace somas { @@ -248,7 +249,7 @@ using SomasPtr = std::shared_ptr; using SomasCreator = std::function()>; // @todo will delete when old runtime remove -class SomasManager { +class BACKEND_EXPORT SomasManager { public: static SomasManager &Instance() { static SomasManager instance{}; diff --git a/mindspore/ccsrc/backend/graph_compiler/backend.cc b/mindspore/ccsrc/backend/graph_compiler/backend.cc index 4a4389de041..a9385f3ac5b 100644 --- a/mindspore/ccsrc/backend/graph_compiler/backend.cc +++ b/mindspore/ccsrc/backend/graph_compiler/backend.cc @@ -399,7 +399,7 @@ void ClearInputDeviceAddress(const KernelGraphPtr &graph, const DeviceContext *d } bool OpInBlackList(const session::BackendOpRunInfoPtr &op_run_info) { - return kOpCacheBlackList.find(op_run_info->base_op_run_info.op_name) != kOpCacheBlackList.end(); + return IsOneOfCacheBlackList(op_run_info->base_op_run_info.op_name); } int GetExecutionMode() { diff --git a/mindspore/ccsrc/backend/graph_compiler/graph_partition.cc b/mindspore/ccsrc/backend/graph_compiler/graph_partition.cc index 8b9c78dfad2..202ae5048b3 100644 --- a/mindspore/ccsrc/backend/graph_compiler/graph_partition.cc +++ b/mindspore/ccsrc/backend/graph_compiler/graph_partition.cc @@ -28,9 +28,6 @@ #include "utils/ms_context.h" #include "ps/ps_context.h" #include "utils/anf_utils.h" -#ifdef ENABLE_D -#include "include/transform/graph_ir/utils.h" -#endif namespace mindspore { const char kMsConvert[] = "ms"; const char kMsVm[] = "vm"; diff --git a/mindspore/ccsrc/common/graph_kernel/adapter/callback_impl.cc b/mindspore/ccsrc/common/graph_kernel/adapter/callback_impl.cc index 5ecf1755939..d0461729db5 100644 --- a/mindspore/ccsrc/common/graph_kernel/adapter/callback_impl.cc +++ b/mindspore/ccsrc/common/graph_kernel/adapter/callback_impl.cc @@ -194,12 +194,17 @@ void CallbackImpl::ResetKernelInfo(const AnfNodePtr &node) { auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); if (GetTargetFromContext() == kAscendDevice) { + cnode->set_kernel_info(std::make_shared()); auto kernel_info_setter = GraphKernelInfoManager::Instance().GetGraphKernelInfo(kAscendDevice); + MS_EXCEPTION_IF_NULL(kernel_info_setter); kernel_info_setter->SetKernelInfo(cnode, KernelType::UNKNOWN_KERNEL_TYPE); } else if (GetTargetFromContext() == kGPUDevice) { + cnode->set_kernel_info(std::make_shared()); auto kernel_info_setter = GraphKernelInfoManager::Instance().GetGraphKernelInfo(kGPUDevice); + MS_EXCEPTION_IF_NULL(kernel_info_setter); kernel_info_setter->SetKernelInfo(cnode, KernelType::UNKNOWN_KERNEL_TYPE); } else { + cnode->set_kernel_info(std::make_shared()); auto kernel_info_setter = GraphKernelInfoManager::Instance().GetGraphKernelInfo(kCPUDevice); if (kernel_info_setter != nullptr) { kernel_info_setter->SetKernelInfo(cnode, KernelType::UNKNOWN_KERNEL_TYPE); diff --git a/mindspore/ccsrc/common/graph_kernel/model/op_register.h b/mindspore/ccsrc/common/graph_kernel/model/op_register.h index fd2be0880e4..d3f5ea4d585 100644 --- a/mindspore/ccsrc/common/graph_kernel/model/op_register.h +++ b/mindspore/ccsrc/common/graph_kernel/model/op_register.h @@ -21,10 +21,11 @@ #include "utils/hash_map.h" #include "common/graph_kernel/model/op_node.h" +#include "include/backend/visible.h" namespace mindspore::graphkernel::inner { using CreatorFunc = std::function; -class OpRegistry { +class BACKEND_EXPORT OpRegistry { public: static OpRegistry &Instance() { static OpRegistry instance{}; diff --git a/mindspore/ccsrc/cxx_api/CMakeLists.txt b/mindspore/ccsrc/cxx_api/CMakeLists.txt index 8641d2bedcd..a3dabc68549 100644 --- a/mindspore/ccsrc/cxx_api/CMakeLists.txt +++ b/mindspore/ccsrc/cxx_api/CMakeLists.txt @@ -62,7 +62,13 @@ if(ENABLE_D) "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/array.cc" "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/map.cc" "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.cc" - "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc") + "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc" + "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/pattern_engine.cc" + "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/helper.cc" + "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/node_pass.cc" + "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/visit.cc" + "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/kernel/kernel_build_info.cc" + "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/device/kernel_info.cc") endif() if(NOT ENABLE_TESTCASES AND NOT BUILD_LITE) @@ -70,11 +76,22 @@ if(NOT ENABLE_TESTCASES AND NOT BUILD_LITE) set(MSLIB_SRC ${MSLIB_SRC} ${CMAKE_SOURCE_DIR}/mindspore/core/utils/status.cc) endif() +if(ENABLE_D OR ENABLE_ACL) + list(APPEND MSLIB_SRC + "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.cc") +endif() + +if(ENABLE_GPU) + list(APPEND MSLIB_SRC "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/gpu/hal/device/cuda_driver.cc") +endif() + if(BUILD_LITE) list(APPEND MSLIB_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../../mindspore/ccsrc/utils/config_manager.cc") file(GLOB_RECURSE ACL_REMOVE_SRC ${CMAKE_CURRENT_SOURCE_DIR} "model/acl/acl_vm/*.cc" ) + list(REMOVE_ITEM MSLIB_SRC + "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.cc") list(REMOVE_ITEM MSLIB_SRC "${CMAKE_CURRENT_SOURCE_DIR}/akg_kernel_register.cc" "${CMAKE_CURRENT_SOURCE_DIR}/model/acl/acl_model_multi.cc" "${CMAKE_CURRENT_SOURCE_DIR}/model/acl/acl_model.cc" @@ -157,7 +174,8 @@ if(ENABLE_D) endif() if(ENABLE_GPU) - target_link_libraries(mindspore_shared_lib PRIVATE gpu_cuda_lib cublas cuda_ops + target_link_libraries(mindspore_shared_lib PRIVATE cuda_ops + ${CUBLAS_LIBRARY_PATH} ${CUDA_PATH}/lib64/libcurand.so ${CUDNN_LIBRARY_PATH} ${CUDA_PATH}/lib64/libcudart.so diff --git a/mindspore/ccsrc/debug/CMakeLists.txt b/mindspore/ccsrc/debug/CMakeLists.txt index 7052db5798d..14ed40fa420 100644 --- a/mindspore/ccsrc/debug/CMakeLists.txt +++ b/mindspore/ccsrc/debug/CMakeLists.txt @@ -15,9 +15,6 @@ set(_OFFLINE_SRC_LIST if(ENABLE_DUMP_IR) file(GLOB_RECURSE _RDR_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "rdr/*.cc") - if(NOT ENABLE_D) - list(REMOVE_ITEM _RDR_SRC_LIST "rdr/task_debug_info_recorder.cc") - endif() endif() if("${ENABLE_HIDDEN}" STREQUAL "OFF") diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_or_list_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_or_list_eliminate.h index 42844a747b2..b794b147a6b 100644 --- a/mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_or_list_eliminate.h +++ b/mindspore/ccsrc/frontend/optimizer/irpass/item_tuple_or_list_eliminate.h @@ -21,6 +21,7 @@ #include #include #include +#include #include "frontend/optimizer/optimizer_caller.h" #include "frontend/optimizer/anf_visitor.h" @@ -40,6 +41,8 @@ namespace irpass { // {prim::kPrimListGetItem, L, N} // {prim::kPrimTupleSetItem, T, N, Z} // {prim::kPrimListSetItem, L, N, Z} +const std::map kSliceAttrToIndex = {{kSliceStart, 1}, {kSliceStop, 2}, {kSliceStep, 3}}; + class TupleListConvertItemIndexToPositive : public AnfVisitor { public: AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override { diff --git a/mindspore/ccsrc/include/common/utils/utils.h b/mindspore/ccsrc/include/common/utils/utils.h index 625ecca0e5f..59906a1f542 100644 --- a/mindspore/ccsrc/include/common/utils/utils.h +++ b/mindspore/ccsrc/include/common/utils/utils.h @@ -29,6 +29,7 @@ #include "utils/log_adapter.h" #include "ir/dtype/type.h" +#include "include/common/visible.h" namespace mindspore { // op name. Op which not exists in operator/ops.h, so define it's name here @@ -661,7 +662,6 @@ constexpr auto kCustomTypePyfunc = "pyfunc"; constexpr auto kCustomTypeTbe = "tbe"; constexpr auto kCustomTypeAICPU = "aicpu"; constexpr auto kCustomTypeHybrid = "hybrid"; -const std::set kCustomTypeAkg = {"ir_builder", "tvm_compute", "hybrid"}; // primal attr key name constexpr auto kPrimalAttrForwardNodeName = "forward_node_name"; @@ -791,145 +791,23 @@ constexpr auto kOpFormat_FRACTAL_Z_3D = "FRACTAL_Z_3D"; constexpr auto kOpFormat_FRACTAL_ZN_LSTM = "FRACTAL_ZN_LSTM"; constexpr auto kOpFormat_FRACTAL_ZN_RNN = "FRACTAL_ZN_RNN"; constexpr auto kOpFormat_ND_RNN_BIAS = "ND_RNN_BIAS"; - -const std::set kOpFormatList = {kOpFormat_DEFAULT, - kOpFormat_NC1KHKWHWC0, - kOpFormat_ND, - kOpFormat_NCHW, - kOpFormat_NHWC, - kOpFormat_HWCN, - kOpFormat_NC1HWC0, - kOpFormat_FRAC_Z, - kOpFormat_C1HWNCoC0, - kOpFormat_FRAC_NZ, - kOpFormat_NC1HWC0_C04, - kOpFormat_FRACTAL_Z_C04, - kOpFormat_NDHWC, - kOpFormat_FRACTAL_ZN_LSTM, - kOpFormat_FRACTAL_ZN_RNN, - kOpFormat_ND_RNN_BIAS, - kOpFormat_NDC1HWC0, - kOpFormat_NCDHW, - kOpFormat_FRACTAL_Z_3D, - kOpFormat_DHWNC, - kOpFormat_DHWCN}; - constexpr auto kSliceStart = "start"; constexpr auto kSliceStop = "stop"; constexpr auto kSliceStep = "step"; -const std::map kSliceAttrToIndex = {{kSliceStart, 1}, {kSliceStop, 2}, {kSliceStep, 3}}; -const std::set kDefaultCompatibleFormat = {kOpFormat_ND, kOpFormat_NCHW, kOpFormat_NHWC, kOpFormat_HWCN, - kOpFormat_NCDHW}; - -const std::set kOptOperatorSet = {kMomentumOpName, - kApplyMomentumOpName, - kApplyAdadeltaOpName, - kApplyAdagradOpName, - kApplyAdagradDAName, - kApplyAdamOpName, - kApplyAdaMaxOpName, - kApplyAddSignOpName, - kApplyCenteredRMSPOpName, - kApplyFtrlOpName, - kApplyFtrlV2OpName, - kApplyGradientDescentOpName, - kApplyPowerSignOpName, - kApplyProximalAdagradOpName, - kApplyProximalGradientDescentOpName, - kApplyRMSPropOpName, - kAdamApplyOneWithDecayOpName, - kAdamApplyOneWithDecayAssignOpName, - kFusedAdamWeightDecayName, - kAdamWeightDecayName, - kFusedCastAdamWeightDecayName, - kFusedAdamName, - kFusedAdaFactorName, - kFusedAdaFactorWithGlobalNormName, - kFusedSparseAdamName, - kFusedMulApplyMomentumOpName, - kFusedWeightScaleApplyMomentum, - kFusedScaleApplyMomentum, - kApplyCenteredRMSPropOpName, - kFusedSparseFtrlName, - kFusedSparseProximalAdagradName, - kFusedSparseLazyAdamName, - kSparseApplyFtrlName, - kSparseApplyFtrlV2Name, - kSGDName, - kLARSUpdateName, - kCombineMomentumWeightOpName, - kCombineMomentumOpName, - kScatterAddOpName, - kScatterUpdateOpName, - kSparseApplyProximalAdagradOpName}; - -const std::set kNodeWithSeedOperators = {kGammaOpName, kPoissonOpName, kStandardLaplaceOpName, - kStandardNormalOpName, kUniformIntOpName, kUniformRealOpName}; -const std::set kPosteriorOperatorSet = {kPullOpName}; - -const std::set kOpCacheBlackList = {kUniformCandidateSamplerOpName, kInitDatasetQueueOpName, - kGetNextOpName}; - -const std::set kOpNotSupportMultiThreadExecList = {kAvgPoolOpName, kAvgPoolGradOpName, kMaxPoolOpName, - kBatchNorm, kBatchNormGradOpName}; - -const std::set kHWSpecialFormatSet = { - kOpFormat_FRACTAL_Z_3D, kOpFormat_NC1KHKWHWC0, kOpFormat_NC1HWC0, kOpFormat_FRAC_NZ, - kOpFormat_C1HWNCoC0, kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04, kOpFormat_FRACTAL_ZN_LSTM, - kOpFormat_FRACTAL_ZN_RNN, kOpFormat_NDC1HWC0, kOpFormat_FRAC_Z}; - -const std::set kFloatDataTypeSet = {kNumberTypeFloat16, kNumberTypeFloat32}; - -const std::set kComputeDepend = {kUniqueOpName, - kUniqueConsecutiveOpName, - kComputeAccidentalHitsOpName, - kSubAndFilterOpName, - kPadAndShiftOpName, - kCTCGreedyDecoderOpName, - kMaskedSelectOpName, - kDynamicStitchOpName, - kGetNextOpName, - kListDiffOpName, - kNonMaxSuppressionV3OpName, - kNonMaxSuppressionWithOverlapsOpName, - kCoalesceOpName, - kTruncatedNormal, - kNonDeterministicInts, - kFractionalAvgPoolGradOpName, - kDenseToDenseSetOperation, - kSegmentMaxOpName, - kCSRSparseMatrixToSparseTensorOpName, - kSegmentMinOpName, - kLuUnpackOpName, - kSegmentSumOpName, - kResizeBicubicOpName, - kResizeAreaOpName, - kSegmentMeanOpName, - kSegmentProdOpName, - kNonZeroOpName, - kSparseSparseMinimumOpName, - kRpcRecvOpName, - kAdaptiveMaxPool3DGradOpName}; - -const std::set k3DFormatSet = {kOpFormat_NCDHW, kOpFormat_NDC1HWC0, kOpFormat_FRACTAL_Z_3D, - kOpFormat_NDHWC, kOpFormat_DHWCN, kOpFormat_DHWNC}; - -const std::set kNoPaddingFormatSet = {kOpFormat_ChannelLast, kOpFormat_FRAC_NZ, kOpFormat_FRACTAL_ZN_RNN, - kOpFormat_ND_RNN_BIAS}; - -const std::set DynamicShapeConstInputToAttr = { - kCastOpName, kExpandDimsOpName, kEmbeddingLookupOpName, kReduceMinOpName, kReduceMeanOpName, - kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName, kTransposeOpName}; - -const std::set DynamicShapeConstInputToAttrCPU = { - kCastOpName, kExpandDimsOpName, kEmbeddingLookupOpName, kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, - kReduceAllOpName, kReduceAnyOpName, kConcatOpName, kReduceSumOpName, kTransposeOpName}; - -const std::set DynamicShapeConstInputToAttrGPU = { - kCastOpName, kExpandDimsOpName, kReshapeOpName, kEmbeddingLookupOpName, kTransposeOpName, - kReduceSumOpName, kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName, - kReduceAnyOpName, kConcatOpName, kScatterNdOpName, kGatherV2OpName, kAvgPool3DGradOpName}; +COMMON_EXPORT bool IsOneOfCustomAkgType(const std::string &name); +COMMON_EXPORT bool IsOneOfOperator(const std::string &name); +COMMON_EXPORT bool IsOneOfPosteriorOperator(const std::string &name); +COMMON_EXPORT bool IsOneOfCacheBlackList(const std::string &name); +COMMON_EXPORT bool IsOneOfNotSupportMultiThreadExec(const std::string &name); +COMMON_EXPORT bool IsOneOf3DFormat(const std::string &format); +COMMON_EXPORT bool IsOneOfNoPaddingFormat(const std::string &format); +COMMON_EXPORT bool IsOneOfDynamicShapeConstInputToAttr(const std::string &name); +COMMON_EXPORT bool IsOneOfDynamicShapeConstInputToAttrCPU(const std::string &name); +COMMON_EXPORT bool IsOneOfDynamicShapeConstInputToAttrGPU(const std::string &name); +COMMON_EXPORT bool IsOneOfComputeDepend(const std::string &name); +COMMON_EXPORT bool IsOneOfHWSpecialFormat(const std::string &format); +COMMON_EXPORT bool IsOneOfFormat(const std::string &format); // The map between kernel's output and input ref relationship. // Key is the output index while the value is input index which will be used as the reference of output. diff --git a/mindspore/ccsrc/kernel/akg/akg_kernel_build_manager.h b/mindspore/ccsrc/kernel/akg/akg_kernel_build_manager.h index cdfe5983b96..61374fbdb46 100644 --- a/mindspore/ccsrc/kernel/akg/akg_kernel_build_manager.h +++ b/mindspore/ccsrc/kernel/akg/akg_kernel_build_manager.h @@ -21,12 +21,13 @@ #include #include #include +#include "include/backend/visible.h" namespace mindspore { namespace kernel { using AkgKernelBuildCreator = std::function()>; -class AkgKernelBuildManager { +class BACKEND_EXPORT AkgKernelBuildManager { public: static AkgKernelBuildManager &Instance(); void Register(const std::string &device_type, AkgKernelBuildCreator &&creator); diff --git a/mindspore/ccsrc/kernel/graph_kernel_info.h b/mindspore/ccsrc/kernel/graph_kernel_info.h index fe721fa3f63..7710d5aa5e0 100644 --- a/mindspore/ccsrc/kernel/graph_kernel_info.h +++ b/mindspore/ccsrc/kernel/graph_kernel_info.h @@ -25,6 +25,7 @@ #include "ir/dtype.h" #include "ir/kernel_info_dev.h" #include "kernel/kernel.h" +#include "include/backend/visible.h" namespace mindspore { class GraphKernelInfo { public: @@ -35,7 +36,7 @@ class GraphKernelInfo { using GraphKernelInfoCreator = std::function()>; -class GraphKernelInfoManager { +class BACKEND_EXPORT GraphKernelInfoManager { public: static GraphKernelInfoManager &Instance() { static GraphKernelInfoManager instance{}; @@ -52,6 +53,7 @@ class GraphKernelInfoManager { MS_EXCEPTION_IF_NULL(iter->second); return (iter->second)(); } + MS_LOG(WARNING) << "Can not get a graph kernel info ptr on device: " << device_type; return nullptr; } diff --git a/mindspore/ccsrc/kernel/oplib/oplib.cc b/mindspore/ccsrc/kernel/oplib/oplib.cc index 7ea5a693087..80b57723ff8 100644 --- a/mindspore/ccsrc/kernel/oplib/oplib.cc +++ b/mindspore/ccsrc/kernel/oplib/oplib.cc @@ -67,7 +67,6 @@ constexpr auto kFormat = "format"; constexpr auto kNeedCompile = "need_compile"; constexpr auto kShape = "shape"; constexpr auto kProcessor = "processor"; -std::multimap> OpLib::op_info_; static std::string ImplTypeToStr(OpImplyType impl_type) { switch (impl_type) { diff --git a/mindspore/ccsrc/kernel/oplib/oplib.h b/mindspore/ccsrc/kernel/oplib/oplib.h index bc96f589a06..9df44f5473d 100644 --- a/mindspore/ccsrc/kernel/oplib/oplib.h +++ b/mindspore/ccsrc/kernel/oplib/oplib.h @@ -27,16 +27,16 @@ namespace mindspore { namespace kernel { -class OpLib { +class BACKEND_EXPORT OpLib { public: OpLib() = default; virtual ~OpLib() = default; - BACKEND_EXPORT static bool RegOp(const std::string &json_string, const std::string &impl_path); + static bool RegOp(const std::string &json_string, const std::string &impl_path); static std::shared_ptr FindOp(const std::string &op_name, OpImplyType imply_type, bool is_dynamic_shape = false); protected: - static std::multimap> op_info_; + inline static std::multimap> op_info_ = {}; private: static bool RegOpFromLocalInfo(); diff --git a/mindspore/ccsrc/pipeline/jit/init.cc b/mindspore/ccsrc/pipeline/jit/init.cc index b9e498cc0a2..6318b50888b 100644 --- a/mindspore/ccsrc/pipeline/jit/init.cc +++ b/mindspore/ccsrc/pipeline/jit/init.cc @@ -35,7 +35,7 @@ #ifdef ENABLE_GPU_COLLECTIVE #include "plugin/device/gpu/hal/device/distribution/collective_init.h" #else -#include "plugin/device/gpu/hal/device/distribution/collective_fake_init.h" +#include "runtime/collective/collective_fake_init.h" #endif #if ((defined ENABLE_CPU) && (!defined _WIN32)) #include "ps/util.h" diff --git a/mindspore/ccsrc/plugin/device/ascend/CMakeLists.txt b/mindspore/ccsrc/plugin/device/ascend/CMakeLists.txt new file mode 100644 index 00000000000..e4c23cd344a --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/CMakeLists.txt @@ -0,0 +1,85 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_BINARY_DIR}) + +if(ENABLE_D OR ENABLE_ACL) + set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/lib64) + set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/lib64) + set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/lib64) + set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/op_tiling) + set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling) + set(MINDSPORE_RPATH + ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling) + set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling) +endif() + +########### mindspore_ascend.so ##### +set(ASCEND_SUB_COMP + hal/device + hal/hardware + hal/hccl_adapter + hal/profiler + kernel + optimizer + ) + +foreach(a_comp ${ASCEND_SUB_COMP}) + add_subdirectory(${a_comp}) + string(REPLACE "/" "_" sub ${a_comp}) + if(TARGET _mindspore_plugin_device_ascend_${sub}_obj) + list(APPEND ASCEND_SUB_OBJECTS_SRC $) + add_dependencies(_mindspore_plugin_device_ascend_${sub}_obj proto_input) + endif() +endforeach() + +add_library(mindspore_ascend SHARED ${ASCEND_SUB_OBJECTS_SRC}) +target_link_libraries(mindspore_ascend PUBLIC mindspore_backend_common) +target_link_libraries(mindspore_ascend PRIVATE mindspore_core mindspore_common proto_input mindspore::protobuf) +target_link_libraries(mindspore_ascend PRIVATE securec) + +set_target_properties(mindspore_ascend PROPERTIES INSTALL_RPATH $ORIGIN) +target_link_libraries(mindspore_ascend PRIVATE mindspore::dnnl mindspore::mkldnn mindspore::ssl + mindspore::crypto nnacl) + +if(ENABLE_DEBUGGER) + # debugger: link grpc + if(ENABLE_D) + target_link_libraries(mindspore_ascend PRIVATE -Wl,--no-as-needed mindspore::grpc++) + endif() +endif() + +if(ENABLE_D) + find_library(GE_RUNNER ge_runner ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(GRAPH graph ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(HCCL hccl ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + target_link_libraries(mindspore_ascend PUBLIC ${GE_RUNNER} ${GRAPH} ${HCCL}) + target_link_libraries(mindspore_ascend PRIVATE mindspore::event mindspore::event_pthreads + mindspore::event_openssl -Wl,--no-as-needed mindspore::event_core ps_cache) +endif() + +if(MODE_ASCEND_ALL) + MESSAGE("USE DAV LIB PATH: ${ASCEND_PATH}") + find_library(ERROR_MANAGER error_manager ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(RUNTIME_LIB runtime ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(TSDCLIENT tsdclient HINTS ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(DATATRANSFER datatransfer HINTS ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(PROFILING msprofiler ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(ACL ascendcl ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(ACL_TDT_CHANNEL acl_tdt_channel ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(PLATFORM platform ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(OPT_FEATURE opt_feature ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(adump_server libadump_server.a ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(OPTILING optiling ${ASCEND_CANN_OPP_PATH} ${ASCEND_TOOLKIT_OPP_PATH}) + + target_link_libraries(mindspore_ascend PUBLIC ${RUNTIME_LIB} ${TSDCLIENT} ${DATATRANSFER} ${ERROR_MANAGER} + -Wl,--no-as-needed ${OPTILING} ${PLATFORM} ${ACL} ${ACL_TDT_CHANNEL} ${OPT_FEATURE} ${PROFILING}) + target_link_libraries(mindspore_ascend PRIVATE ${adump_server}) +endif() + +if(ENABLE_D) + if(ENABLE_MPI) + set_target_properties(_ascend_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH}) + endif() +endif() + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kernel/aicpu/aicpu_ops) +add_subdirectory(kernel/aicpu/aicpu_ops) diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_data_queue.h b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_data_queue.h index e241d4ee458..3724265f9f8 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_data_queue.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_data_queue.h @@ -24,10 +24,11 @@ #include "runtime/hardware/device_context_manager.h" #include "runtime/data_queue/data_queue.h" #include "runtime/rt.h" +#include "include/backend/visible.h" namespace mindspore { namespace device { -class AscendDataQueueDynamic : public DataQueue { +class BACKEND_EXPORT AscendDataQueueDynamic : public DataQueue { public: explicit AscendDataQueueDynamic(const size_t capacity); virtual ~AscendDataQueueDynamic() = default; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc index 706c19b2b83..abea399fa1a 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/kernel_select_ascend.cc @@ -83,8 +83,7 @@ string GetPriorityMatchFormat(const CNodePtr &cnode) { size_t input_num = common::AnfAlgo::GetInputTensorNum(cnode); for (size_t index = 0; index < input_num; ++index) { auto pre_output_format = AnfAlgo::GetPrevNodeOutputFormat(cnode, index); - if (AnfAlgo::IsFeatureMapInput(cnode, index) && - kHWSpecialFormatSet.find(pre_output_format) != kHWSpecialFormatSet.end()) { + if (AnfAlgo::IsFeatureMapInput(cnode, index) && IsOneOfHWSpecialFormat(pre_output_format)) { priority_matched_format = !is_init ? pre_output_format : priority_matched_format; is_init = true; } @@ -494,7 +493,7 @@ KernelSelectStatus SelectCustomKernelInfo(const CNodePtr &kernel_node, KernelTyp auto func_type = common::AnfAlgo::GetNodeAttr(kernel_node, kAttrFuncType); if (func_type == kCustomTypeTbe) { *kernel_type = KernelType::TBE_KERNEL; - } else if (kCustomTypeAkg.find(func_type) != kCustomTypeAkg.end()) { + } else if (IsOneOfCustomAkgType(func_type)) { *kernel_type = KernelType::AKG_KERNEL; } else if (func_type == kCustomTypeAICPU) { *kernel_type = KernelType::AICPU_KERNEL; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_manager.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_manager.cc index de2f2ab4baa..12a35a180ff 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_manager.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_manager.cc @@ -27,6 +27,7 @@ #include "runtime/base.h" #include #include "plugin/device/ascend/hal/device/profiling/profiling_utils.h" +#include "plugin/device/ascend/hal/profiler/ascend_profiling.h" using mindspore::device::ascend::ProfilingUtils; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc index 68f62412116..94363853e43 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc @@ -20,6 +20,7 @@ #include "plugin/device/ascend/kernel/ascend_kernel_mod.h" #include "include/common/utils/utils.h" #include "backend/common/session/kernel_graph.h" +#include "plugin/device/ascend/hal/profiler/ascend_profiling.h" namespace mindspore { namespace device { diff --git a/mindspore/ccsrc/ps/ps_cache/ascend/ascend_ps_cache.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/ps/ascend_ps_cache.cc similarity index 99% rename from mindspore/ccsrc/ps/ps_cache/ascend/ascend_ps_cache.cc rename to mindspore/ccsrc/plugin/device/ascend/hal/device/ps/ascend_ps_cache.cc index 9fa4b5c18ca..9117a8b307d 100644 --- a/mindspore/ccsrc/ps/ps_cache/ascend/ascend_ps_cache.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ps/ascend_ps_cache.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ps/ps_cache/ascend/ascend_ps_cache.h" +#include "plugin/device/ascend/hal/device/ps/ascend_ps_cache.h" #include #include #include diff --git a/mindspore/ccsrc/ps/ps_cache/ascend/ascend_ps_cache.h b/mindspore/ccsrc/plugin/device/ascend/hal/device/ps/ascend_ps_cache.h similarity index 92% rename from mindspore/ccsrc/ps/ps_cache/ascend/ascend_ps_cache.h rename to mindspore/ccsrc/plugin/device/ascend/hal/device/ps/ascend_ps_cache.h index 9762dd9cc86..19de1157679 100644 --- a/mindspore/ccsrc/ps/ps_cache/ascend/ascend_ps_cache.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ps/ascend_ps_cache.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_ASCEND_ASCEND_PS_CACHE_H_ -#define MINDSPORE_CCSRC_PS_PS_CACHE_ASCEND_ASCEND_PS_CACHE_H_ +#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_ +#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_ #include #include @@ -72,4 +72,4 @@ class AscendPsCache : public PsCacheBasic { } // namespace ascend } // namespace ps } // namespace mindspore -#endif // MINDSPORE_CCSRC_PS_PS_CACHE_ASCEND_ASCEND_PS_CACHE_H_ +#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_ diff --git a/mindspore/ccsrc/debug/rdr/task_debug_info_recorder.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/tasksink/task_debug_info_recorder.cc similarity index 95% rename from mindspore/ccsrc/debug/rdr/task_debug_info_recorder.cc rename to mindspore/ccsrc/plugin/device/ascend/hal/device/tasksink/task_debug_info_recorder.cc index 54b4decd5c9..f8641b7de9c 100644 --- a/mindspore/ccsrc/debug/rdr/task_debug_info_recorder.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/tasksink/task_debug_info_recorder.cc @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "debug/rdr/task_debug_info_recorder.h" +#include "plugin/device/ascend/hal/device/tasksink/task_debug_info_recorder.h" #include #include "plugin/device/ascend/hal/device/tasksink/task_generator.h" #include "include/common/debug/rdr/recorder_manager.h" diff --git a/mindspore/ccsrc/debug/rdr/task_debug_info_recorder.h b/mindspore/ccsrc/plugin/device/ascend/hal/device/tasksink/task_debug_info_recorder.h similarity index 85% rename from mindspore/ccsrc/debug/rdr/task_debug_info_recorder.h rename to mindspore/ccsrc/plugin/device/ascend/hal/device/tasksink/task_debug_info_recorder.h index be538823885..e7cf5f0eaf4 100644 --- a/mindspore/ccsrc/debug/rdr/task_debug_info_recorder.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/tasksink/task_debug_info_recorder.h @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_ -#define MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_ +#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_TASKSINK_TASK_DEBUG_INFO_RECORDER_H_ +#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_TASKSINK_TASK_DEBUG_INFO_RECORDER_H_ #include #include #include @@ -49,4 +49,4 @@ bool RecordTaskDebugInfo(SubModuleId module, const std::string &name, const std::vector &task_debug_info_list); } // namespace RDR } // namespace mindspore -#endif // MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_ +#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_TASKSINK_TASK_DEBUG_INFO_RECORDER_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/tasksink/task_generator.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/tasksink/task_generator.cc index 946a2311dd6..dd30bebd991 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/tasksink/task_generator.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/tasksink/task_generator.cc @@ -28,7 +28,7 @@ #include "plugin/device/ascend/hal/device/profiling/profiling_manager.h" #endif #ifdef ENABLE_DUMP_IR -#include "debug/rdr/task_debug_info_recorder.h" +#include "plugin/device/ascend/hal/device/tasksink/task_debug_info_recorder.h" #endif #include "mindspore/core/utils/file_utils.h" diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_utils.h b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_utils.h index 3a15351592c..65ebba5fa61 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_utils.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_utils.h @@ -18,6 +18,7 @@ #define MINDSPORE_CCSRC_RUNTIME_HARDWARE_ASCEND_ASCEND_UTILS_H_ #include +#include #include "plugin/device/ascend/hal/hardware/ascend_device_context.h" #include "backend/common/session/kernel_graph.h" diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/memory_profiling.h b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/memory_profiling.h index 1aa2d2cbad5..a2986a62d73 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/memory_profiling.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/memory_profiling.h @@ -23,6 +23,7 @@ #include #include #include "utils/ms_context.h" +#include "include/backend/visible.h" namespace mindspore { namespace profiler { @@ -107,8 +108,8 @@ class MemoryProfiling { return instance; } - std::shared_ptr AddGraphMemoryNode(uint32_t graph_id); - std::shared_ptr GetGraphMemoryNode(uint32_t graph_id) const; + BACKEND_EXPORT std::shared_ptr AddGraphMemoryNode(uint32_t graph_id); + BACKEND_EXPORT std::shared_ptr GetGraphMemoryNode(uint32_t graph_id) const; void SetDeviceMemSize(uint64_t size) { device_mem_size_ = size; } bool MemoryToPB(); void SaveMemoryProfiling(); diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_kernel_build.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_kernel_build.cc index 9f01e6e4677..20e07e833e2 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_kernel_build.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_kernel_build.cc @@ -439,7 +439,7 @@ void CreateExtInfo(const std::shared_ptr &anf_node, const std::shared_p UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE; auto op_name = common::AnfAlgo::GetCNodeName(anf_node); - if (kComputeDepend.find(op_name) != kComputeDepend.end()) { + if (IsOneOfComputeDepend(op_name)) { shape_type = UnknowShapeOpType::DEPEND_COMPUTE; } ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset, shape_type); diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/dynamic_aicpu_kernel_mod.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/dynamic_aicpu_kernel_mod.cc index dd9d94f3611..82d96ed7fe5 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/dynamic_aicpu_kernel_mod.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/dynamic_aicpu_kernel_mod.cc @@ -39,7 +39,7 @@ DynamicAicpuOpKernelMod::DynamicAicpuOpKernelMod(const AnfNodePtr &anf_node_ptr) auto cnode = anf_node_ptr->cast(); if (cnode != nullptr) { auto op_name = common::AnfAlgo::GetCNodeName(cnode); - if (kComputeDepend.find(op_name) != kComputeDepend.end()) { + if (IsOneOfComputeDepend(op_name)) { unknow_type_ = device::ascend::UnknowShapeOpType::DEPEND_COMPUTE; } } diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/ascend_kernel_mod.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/ascend_kernel_mod.cc index 5682bd16d5a..72b52cd74a5 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/ascend_kernel_mod.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/ascend_kernel_mod.cc @@ -47,7 +47,7 @@ bool AscendKernelMod::IsNeedRetrieveOutputShape() { MS_EXCEPTION_IF_NULL(cnode); auto op_name = common::AnfAlgo::GetCNodeName(cnode); - if (kComputeDepend.find(op_name) != kComputeDepend.end()) { + if (IsOneOfComputeDepend(op_name)) { is_need_retrieve_output_shape_ = true; } return is_need_retrieve_output_shape_; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_json/single_tbe_json_creator.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_json/single_tbe_json_creator.cc index 693e008ba0e..f2c249d8add 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_json/single_tbe_json_creator.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_json/single_tbe_json_creator.cc @@ -166,8 +166,7 @@ void SingleTbeJsonCreator::GenInputDescJson(const AnfNodePtr &anf_node, size_t r auto def_format = TbeJsonUtils::IsNeedChangeDefaultFormat(anf_node) ? kOpFormat_NCDHW : kOpFormat_NCHW; auto format = AnfAlgo::GetInputFormat(anf_node, real_input_index); format = TbeAdapter::FormatPass(format, ori_shape.size()); - format = - (def_format == kOpFormat_NCDHW && k3DFormatSet.find(format) == k3DFormatSet.end()) ? kOpFormat_NCDHW : format; + format = (def_format == kOpFormat_NCDHW && !IsOneOf3DFormat(format)) ? kOpFormat_NCDHW : format; auto d_type = AnfAlgo::GetInputDeviceDataType(anf_node, real_input_index); (*input_desc)[kJDtype] = tbe::TypeIdToString(d_type); (*input_desc)[kJDataType] = GetJsonValue(*input_desc, kJDtype); diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_json/tbe_json_creator.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_json/tbe_json_creator.cc index 24c892e7df9..c246a4ea914 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_json/tbe_json_creator.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_json/tbe_json_creator.cc @@ -426,8 +426,7 @@ void TbeJsonCreator::GenDescJson(const AnfNodePtr &anf_node, size_t node_out_idx auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx); format = tbe::TbeAdapter::FormatPass(format, ori_shape.size()); auto def_format = TbeJsonUtils::IsNeedChangeDefaultFormat(anf_node) ? kOpFormat_NCDHW : kOpFormat_NCHW; - format = - (def_format == kOpFormat_NCDHW && k3DFormatSet.find(format) == k3DFormatSet.end()) ? kOpFormat_NCDHW : format; + format = (def_format == kOpFormat_NCDHW && !IsOneOf3DFormat(format)) ? kOpFormat_NCDHW : format; (*output_desc)[kJDataType] = tbe::TypeIdToString(AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx)); (*output_desc)[kJDtype] = GetJsonValue(*output_desc, kJDataType); diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/common_utils.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/common_utils.cc index 5117a30b470..64d9262a133 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/common_utils.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/common_utils.cc @@ -70,12 +70,12 @@ std::vector HostCheck::GetFinalInferShape(const AnfNodePtr &node, const } auto temp_shape = infer_shape; - if (kNoPaddingFormatSet.find(format) == kNoPaddingFormatSet.end() && format != kOpFormat_FRACTAL_ZN_LSTM && - infer_shape.size() < kShape4dDims && k3DFormatSet.find(format) == k3DFormatSet.end()) { + if (!IsOneOfNoPaddingFormat(format) && format != kOpFormat_FRACTAL_ZN_LSTM && infer_shape.size() < kShape4dDims && + !IsOneOf3DFormat(format)) { MS_LOG(DEBUG) << "Get Device Shape using a shape size is less than 4 ,should be Padding shape by Default firstly"; temp_shape = trans::PaddingShapeTo4dDefault(infer_shape, node); } - if (infer_shape.size() != kNcdhwShapeSize && k3DFormatSet.find(format) != k3DFormatSet.end()) { + if (infer_shape.size() != kNcdhwShapeSize && IsOneOf3DFormat(format)) { temp_shape = trans::PaddingShapeTo5dDefault(infer_shape, node); } return temp_shape; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc index e75a007705c..ab082e84aca 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.cc @@ -41,7 +41,6 @@ constexpr auto kPrefixOutput = "output"; constexpr char kParamTypeDynamic[] = "dynamic"; constexpr char kParamTypeRequre[] = "required"; constexpr char kParamTypeOptional[] = "optional"; -mindspore::HashMap>> TbeKernelSelect::select_cache_ = {}; void TbeMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { auto tbe_selecter = TbeKernelSelect(kernel_node, kernel_info_list); @@ -236,7 +235,7 @@ void TbeKernelSelect::GetAgnosticPatternKernelInfo(const OpInfo &op_info) { MS_LOG(EXCEPTION) << "AgnosticPattern only support one input."; } auto format = AnfAlgo::GetPrevNodeOutputFormat(cnode_ptr_, 0); - if (kOpFormatList.find(format) == kOpFormatList.end()) { + if (!IsOneOfFormat(format)) { MS_LOG(INFO) << "Got the unknown format " << format; format = kOpFormat_DEFAULT; } @@ -332,7 +331,7 @@ bool TbeKernelSelect::IsShapeMatchFormat(const ShapeVector &shape, const std::st } static const std::set kServerNotSupportFormat = {kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04}; // if format is default, it remarkes support all format - if (kOpFormatList.find(format) == kOpFormatList.end()) { + if (!IsOneOfFormat(format)) { MS_LOG(EXCEPTION) << "Got the unknown format " << format; } // server not support format with C04 suffix @@ -346,7 +345,7 @@ bool TbeKernelSelect::IsShapeMatchFormat(const ShapeVector &shape, const std::st } // not support format: // 1 3d formats with shape size > 5 - if (k3DFormatSet.find(format) != k3DFormatSet.end() && shape.size() > kShape5dDims) { + if (IsOneOf3DFormat(format) && shape.size() > kShape5dDims) { return false; } return true; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h index f8948762d00..2c66bf966a6 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h @@ -80,7 +80,7 @@ class TbeKernelSelect { nlohmann::json kernel_json; std::string kernel_hash_name; bool check_cnode; - static mindspore::HashMap>> select_cache_; + inline static mindspore::HashMap>> select_cache_ = {}; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_comm_op_reuse.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_comm_op_reuse.cc index f30892cd97e..4558dd3ba77 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_comm_op_reuse.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_comm_op_reuse.cc @@ -20,6 +20,7 @@ #include "include/common/utils/anfalgo.h" #include "include/common/utils/comm_manager.h" #include "include/common/utils/parallel_context.h" +#include "runtime/graph_scheduler/graph_compiler.h" #include "plugin/device/ascend/hal/device/ascend_stream_assign.h" #include "plugin/device/ascend/optimizer/ascend_comm_op_reuse.h" diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_helper.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_helper.h index b2d8703ce45..71b1270d275 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_helper.h +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ascend_helper.h @@ -29,6 +29,7 @@ #include "plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h" namespace mindspore { +const std::set kFloatDataTypeSet = {kNumberTypeFloat16, kNumberTypeFloat32}; namespace opt { class KernelSelect { public: diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_attr_for_3d_graph.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_attr_for_3d_graph.cc index 485929e21be..12a8017c47b 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_attr_for_3d_graph.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_attr_for_3d_graph.cc @@ -32,7 +32,7 @@ const AnfNodePtr AddIoFormatAttrFor3DGraph::Process(const FuncGraphPtr &func_gra common::AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); auto formats = AnfAlgo::GetAllOutputFormats(node); if (std::any_of(formats.begin(), formats.end(), - [](const std::string &format) { return k3DFormatSet.find(format) != k3DFormatSet.end(); })) { + [](const std::string &format) { return IsOneOf3DFormat(format); })) { common::AnfAlgo::SetNodeAttr(kAttrFormat, MakeValue(kOpFormat_NCDHW), node); } return node; diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/check_consistency.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/check_consistency.cc index fbc23a25bef..31e299cf09c 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/check_consistency.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/check_consistency.cc @@ -28,6 +28,9 @@ namespace mindspore { namespace opt { namespace { +const std::set kDefaultCompatibleFormat = {kOpFormat_ND, kOpFormat_NCHW, kOpFormat_NHWC, kOpFormat_HWCN, + kOpFormat_NCDHW}; + bool CheckFormatForConsistency(const CNodePtr &node, const size_t input_index) { MS_EXCEPTION_IF_NULL(node); // get prior node's device output format diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/rectify_do_mask_kernel_info.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/rectify_do_mask_kernel_info.cc index d1dba89d08b..6523b3bfd93 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/rectify_do_mask_kernel_info.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/format_type/rectify_do_mask_kernel_info.cc @@ -15,7 +15,6 @@ */ #include "plugin/device/ascend/optimizer/format_type/rectify_do_mask_kernel_info.h" - #include "backend/common/session/anf_runtime_algorithm.h" #include "include/common/utils/anfalgo.h" #include "kernel/kernel_build_info.h" @@ -67,7 +66,7 @@ void RectifyDoMaskKernelInfo::RectifyKernelInfo(const std::vector &do_ std::string convert_format; for (const auto &do_mask : do_mask_node_list) { auto do_mask_data_format = AnfAlgo::GetInputFormat(do_mask, 0); - if (special_format.empty() && kHWSpecialFormatSet.find(do_mask_data_format) != kHWSpecialFormatSet.end()) { + if (special_format.empty() && IsOneOfHWSpecialFormat(do_mask_data_format)) { special_format = do_mask_data_format; } if (format_counter.find(do_mask_data_format) == format_counter.end()) { @@ -99,7 +98,7 @@ std::string RectifyDoMaskKernelInfo::GetConvertFormat(const std::map(AnfAlgo::GetSelectKernelBuildInfo(node)); MS_EXCEPTION_IF_NULL(builder); - if (in_format == kOpFormat_DEFAULT && k3DFormatSet.find(out_format) != k3DFormatSet.end()) { + if (in_format == kOpFormat_DEFAULT && IsOneOf3DFormat(out_format)) { builder->SetInputsFormat({kOpFormat_NCDHW}); builder->SetOutputsFormat({out_format}); AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get()); common::AnfAlgo::SetNodeAttr(kAttrSrcFormat, MakeValue(kOpFormat_NCDHW), node); } - if (out_format == kOpFormat_DEFAULT && k3DFormatSet.find(in_format) != k3DFormatSet.end()) { + if (out_format == kOpFormat_DEFAULT && IsOneOf3DFormat(in_format)) { builder->SetInputsFormat({in_format}); builder->SetOutputsFormat({kOpFormat_NCDHW}); AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get()); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/seed_adapter.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/seed_adapter.cc index a76f36a826e..56392894f29 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/seed_adapter.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fission/seed_adapter.cc @@ -30,6 +30,8 @@ namespace mindspore::opt { namespace { +const std::set kNodeWithSeedOperators = {kGammaOpName, kPoissonOpName, kStandardLaplaceOpName, + kStandardNormalOpName, kUniformIntOpName, kUniformRealOpName}; tensor::TensorPtr CreateTensor(int64_t seed) { // 1 create seed tensor std::vector indices_shape = {1}; diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_fusion.cc index a7fef2bdd67..c0b863b81f7 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_fusion.cc @@ -14,10 +14,8 @@ * limitations under the License. */ #include "plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_fusion.h" -#include "backend/common/optimizer/helper.h" #include "backend/common/session/anf_runtime_algorithm.h" -#include "include/common/utils/anfalgo.h" -#include "utils/trace_base.h" +#include "plugin/device/ascend/optimizer/ascend_helper.h" namespace mindspore { namespace opt { const BaseRef AdamApplyOneFusion::DefinePattern() const { diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_with_decay_rule.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_with_decay_rule.cc index 59751e935e7..dcb79ee6dc6 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_with_decay_rule.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_with_decay_rule.cc @@ -15,6 +15,7 @@ */ #include "plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_with_decay_rule.h" #include "backend/common/session/anf_runtime_algorithm.h" +#include "plugin/device/ascend/optimizer/ascend_helper.h" #include "include/common/utils/anfalgo.h" #include "ir/primitive.h" #include "backend/common/optimizer/helper.h" diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_rule.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_rule.cc index ea1be4bc33f..c5eb3df8682 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_rule.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_rule.cc @@ -21,6 +21,7 @@ #include "include/common/utils/utils.h" #include "backend/common/optimizer/helper.h" #include "mindspore/core/ops/core_ops.h" +#include "plugin/device/ascend/optimizer/ascend_helper.h" namespace mindspore { namespace opt { diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_with_decay_rule.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_with_decay_rule.cc index 2e299e03b19..560a4180f49 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_with_decay_rule.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_with_decay_rule.cc @@ -16,6 +16,7 @@ #include "plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_with_decay_rule.h" #include #include "backend/common/session/anf_runtime_algorithm.h" +#include "plugin/device/ascend/optimizer/ascend_helper.h" #include "include/common/utils/anfalgo.h" #include "frontend/optimizer/opt.h" #include "utils/trace_base.h" diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc index ee435cfabcb..b3db5cd4b3f 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_with_decay_v1_rule.cc @@ -19,7 +19,7 @@ #include #include #include - +#include "plugin/device/ascend/optimizer/ascend_helper.h" #include "backend/common/session/anf_runtime_algorithm.h" #include "include/common/utils/anfalgo.h" #include "frontend/optimizer/opt.h" diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_right_rule.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_right_rule.cc index c5fbec4d5bd..9a7ddfe7b26 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_right_rule.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_next_right_rule.cc @@ -16,6 +16,7 @@ #include "plugin/device/ascend/optimizer/ir_fusion/lamb_next_right_rule.h" #include #include "backend/common/optimizer/helper.h" +#include "plugin/device/ascend/optimizer/ascend_helper.h" #include "utils/trace_base.h" namespace mindspore { namespace opt { diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_update_with_lr_rule_fusion.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_update_with_lr_rule_fusion.cc index ecf0c70661e..1373171cadc 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_update_with_lr_rule_fusion.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_update_with_lr_rule_fusion.cc @@ -17,7 +17,7 @@ #include #include - +#include "plugin/device/ascend/optimizer/ascend_helper.h" #include "backend/common/session/anf_runtime_algorithm.h" #include "include/common/utils/anfalgo.h" #include "ir/primitive.h" diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_update_with_lr_v2.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_update_with_lr_v2.cc index acf8a26196d..93eecbc536d 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_update_with_lr_v2.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ir_fusion/lamb_update_with_lr_v2.cc @@ -19,6 +19,7 @@ #include #include "include/common/utils/utils.h" #include "mindspore/core/ops/core_ops.h" +#include "plugin/device/ascend/optimizer/ascend_helper.h" namespace mindspore { namespace opt { diff --git a/mindspore/ccsrc/plugin/device/cpu/hal/device/kernel_select_cpu.cc b/mindspore/ccsrc/plugin/device/cpu/hal/device/kernel_select_cpu.cc index 3d44519feec..643928b47d7 100644 --- a/mindspore/ccsrc/plugin/device/cpu/hal/device/kernel_select_cpu.cc +++ b/mindspore/ccsrc/plugin/device/cpu/hal/device/kernel_select_cpu.cc @@ -439,7 +439,7 @@ std::pair SetKernelInfoWithMsg(const CNodePtr &kerne const std::string &op_name = common::AnfAlgo::GetCNodeName(kernel_node); if (IsPrimitiveCNode(kernel_node, prim::kPrimCustom)) { auto tp = common::AnfAlgo::GetNodeAttr(kernel_node, kAttrFuncType); - if (kCustomTypeAkg.find(tp) != kCustomTypeAkg.end()) { + if (IsOneOfCustomAkgType(tp)) { UpdateCustomKernelBuildInfo(kernel_node, true); return {}; } diff --git a/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.cc b/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.cc index 60ef76c475f..6e4f46115d9 100644 --- a/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.cc +++ b/mindspore/ccsrc/plugin/device/cpu/hal/hardware/cpu_device_context.cc @@ -345,8 +345,7 @@ bool CPUKernelExecutor::LaunchKernel(const CNodePtr &kernel, const std::vector(kernel_mod); MS_EXCEPTION_IF_NULL(cpu_kernel_mod); cpu_kernel_mod->InitKernel(kernel); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.cc index b2b23fc04a3..264ef7d76d5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.cc @@ -89,8 +89,6 @@ std::vector NativeCpuKernelMod::GetSupportFromOpLib(const std::strin return support_kernel_attrs; } -mindspore::HashMap> NativeCpuKernelMod::support_map_{}; - int DeprecatedNativeCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector &inputs, const std::vector &outputs, diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h index b2c3e1e79e6..5a248e62f12 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h @@ -169,7 +169,7 @@ class BACKEND_EXPORT NativeCpuKernelMod : public CpuKernelMod { private: std::vector GetAllSupportedList(const std::string &kernel_name); std::vector GetSupportFromOpLib(const std::string &kernel_name) const; - static mindspore::HashMap> support_map_; + inline static mindspore::HashMap> support_map_; }; class BACKEND_EXPORT DeprecatedNativeCpuKernelMod : public NativeCpuKernelMod { diff --git a/mindspore/ccsrc/plugin/device/gpu/CMakeLists.txt b/mindspore/ccsrc/plugin/device/gpu/CMakeLists.txt new file mode 100644 index 00000000000..258e6e624e6 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/CMakeLists.txt @@ -0,0 +1,62 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_BINARY_DIR}) +include_directories(${CUDNN_INCLUDE_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS} ${CUPTI_INCLUDE_DIRS}) +add_subdirectory(kernel/cuda_impl) + +########### mindspore_gpu.so ##### +set(GPU_SUB_COMP + hal/device + hal/hardware + hal/profiler + kernel + optimizer + ) + +if(ENABLE_GPU) + foreach(g_comp ${GPU_SUB_COMP}) + add_subdirectory(${g_comp}) + string(REPLACE "/" "_" sub ${g_comp}) + if(TARGET _mindspore_plugin_device_gpu_${sub}_obj) + list(APPEND GPU_SUB_OBJECTS_SRC $) + add_dependencies(_mindspore_plugin_device_gpu_${sub}_obj proto_input) + endif() + endforeach() +endif() + +add_library(mindspore_gpu SHARED ${GPU_SUB_OBJECTS_SRC}) +target_link_libraries(mindspore_gpu PUBLIC mindspore_backend_common) +target_link_libraries(mindspore_gpu PRIVATE mindspore_core mindspore_common proto_input mindspore::protobuf) +target_link_libraries(mindspore_gpu PRIVATE securec) +set_target_properties(mindspore_gpu PROPERTIES INSTALL_RPATH $ORIGIN) +target_link_libraries(mindspore_gpu PRIVATE mindspore::dnnl mindspore::mkldnn nnacl) +target_link_libraries(mindspore_gpu PRIVATE mindspore::ssl mindspore::crypto) +target_link_libraries(mindspore_gpu PRIVATE mindspore::event mindspore::event_pthreads + mindspore::event_openssl -Wl,--no-as-needed mindspore::event_core ps_cache) + +if(ENABLE_GPU) + message("add gpu lib to mindspore_gpu") + target_link_libraries(mindspore_gpu PRIVATE cuda_ops + ${CUBLAS_LIBRARY_PATH} + ${CUDA_PATH}/lib64/libcurand.so + ${CUDNN_LIBRARY_PATH} + ${CUDA_PATH}/lib64/libcudart.so + ${CUDA_PATH}/lib64/stubs/libcuda.so + ${CUDA_PATH}/lib64/libcusolver.so + ${CUDA_PATH}/lib64/libcufft.so + ${CUDA_PATH}/lib64/libcusparse.so) +endif() + +if(ENABLE_DEBUGGER) + # debugger: link grpc + if(ENABLE_GPU) + target_link_libraries(mindspore_gpu PRIVATE -Wl,--no-as-needed mindspore::grpc++) + endif() +endif() + +if(ENABLE_GPU) + if(ENABLE_MPI) + set_target_properties(_ms_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH}) + set_target_properties(nvidia_collective PROPERTIES INSTALL_RPATH ${ORIGIN_PATH}) + set_target_properties(gpu_collective PROPERTIES INSTALL_RPATH ${ORIGIN_PATH}) + endif() +endif() diff --git a/mindspore/ccsrc/plugin/device/gpu/hal/device/CMakeLists.txt b/mindspore/ccsrc/plugin/device/gpu/hal/device/CMakeLists.txt index 014bdf67d06..6889c852764 100644 --- a/mindspore/ccsrc/plugin/device/gpu/hal/device/CMakeLists.txt +++ b/mindspore/ccsrc/plugin/device/gpu/hal/device/CMakeLists.txt @@ -3,11 +3,29 @@ if("${ENABLE_HIDDEN}" STREQUAL "OFF") string(REPLACE " -fvisibility=hidden" " -fvisibility=default" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() +list(APPEND DEVICE_SRC_LIST "ps/gpu_ps_cache.cc") if(ENABLE_GPU) - list(APPEND DEVICE_SRC_LIST "distribution/collective_init.cc") - list(APPEND DEVICE_SRC_LIST "gpu_comm_manager.cc") -else() - list(APPEND DEVICE_SRC_LIST "distribution/collective_fake_init.cc") + list(APPEND DEVICE_SRC_LIST ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/common/mem_reuse/mem_reuse.cc) + list(APPEND DEVICE_SRC_LIST ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/common/mem_reuse/mem_swap_manager.cc) + list(APPEND DEVICE_SRC_LIST ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/data_queue/data_queue.h) + file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") + list(REMOVE_ITEM DEVICE_SRC_LIST + "mpi/mpi_initializer.cc" + "distribution/collective_wrapper.cc" + "distribution/mpi_wrapper.cc" + "distribution/nccl_wrapper.cc" + "trt_loader.cc") + if(NOT ${TENSORRT_HOME} STREQUAL "") + find_path(TENSORRT_HOME_INCLUDE NvInfer.h HINTS ${TENSORRT_HOME}/include) + if(TENSORRT_HOME_INCLUDE STREQUAL TENSORRT_HOME_INCLUDE-NOTFOUND) + message(FATAL_ERROR "Tensor-RT dir not exist ${TENSORRT_HOME}") + endif() + message("Enable GPU inference. Tensor-RT include dir: ${TENSORRT_HOME_INCLUDE}") + set(ENABLE_GPU_INFER TRUE) + add_compile_definitions(ENABLE_GPU_INFER) + include_directories(${TENSORRT_HOME_INCLUDE}) + list(APPEND DEVICE_SRC_LIST ${CMAKE_CURRENT_SOURCE_DIR}/trt_loader.cc) + endif() endif() if(ENABLE_GPU) @@ -19,8 +37,6 @@ if(ENABLE_GPU) endif() file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc" "*.cu") - - #set(GPU_QUEUE_SRCS "blocking_queue.cc" "gpu_buffer_mgr.cc" "data_queue.cc") set(GPU_COLLECTIVE_SRCS "distribution/collective_wrapper.cc" "distribution/mpi_wrapper.cc" "distribution/nccl_wrapper.cc") @@ -36,8 +52,6 @@ if(ENABLE_GPU) target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl) target_link_libraries(_ms_mpi PRIVATE gpu_collective) endif() - - # add_library(_mindspore_device_cuda_obj OBJECT ${CUDA_SRC_LIST}) endif() set_property(SOURCE ${DEVICE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) diff --git a/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_data_queue.h b/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_data_queue.h index e8ec407f4d9..a673740f42d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_data_queue.h +++ b/mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_data_queue.h @@ -24,10 +24,11 @@ #include #include "runtime/data_queue/data_queue.h" #include "runtime/hardware/device_context_manager.h" +#include "include/backend/visible.h" namespace mindspore { namespace device { -class GpuDataQueueDynamic : public DataQueue { +class BACKEND_EXPORT GpuDataQueueDynamic : public DataQueue { public: explicit GpuDataQueueDynamic(const size_t capacity); virtual ~GpuDataQueueDynamic() = default; @@ -49,7 +50,7 @@ class GpuDataQueueDynamic : public DataQueue { std::unique_ptr node_info_; }; -class GpuQueue : public DataQueue { +class BACKEND_EXPORT GpuQueue : public DataQueue { public: GpuQueue(void *addr, const std::vector &shape, const size_t &capacity); virtual ~GpuQueue(); diff --git a/mindspore/ccsrc/plugin/device/gpu/hal/device/kernel_info_setter.cc b/mindspore/ccsrc/plugin/device/gpu/hal/device/kernel_info_setter.cc index fb2f03dc56b..a47b1761060 100644 --- a/mindspore/ccsrc/plugin/device/gpu/hal/device/kernel_info_setter.cc +++ b/mindspore/ccsrc/plugin/device/gpu/hal/device/kernel_info_setter.cc @@ -193,7 +193,7 @@ bool SelectCustomKernel(const CNodePtr &kernel_node, const std::shared_ptr::Instance().Register( op_name, []() { return std::make_shared(); }); } - } else if (kCustomTypeAkg.find(func_type) != kCustomTypeAkg.end()) { + } else if (IsOneOfCustomAkgType(func_type)) { *kernel_type = KernelType::AKG_KERNEL; } else { MS_LOG(EXCEPTION) << "Unsupported func type [" << func_type << "] for Custom op [" << op_name << "] on GPU"; diff --git a/mindspore/ccsrc/ps/ps_cache/gpu/gpu_ps_cache.cc b/mindspore/ccsrc/plugin/device/gpu/hal/device/ps/gpu_ps_cache.cc similarity index 98% rename from mindspore/ccsrc/ps/ps_cache/gpu/gpu_ps_cache.cc rename to mindspore/ccsrc/plugin/device/gpu/hal/device/ps/gpu_ps_cache.cc index cd4632e1c9c..0a7853bffd3 100644 --- a/mindspore/ccsrc/ps/ps_cache/gpu/gpu_ps_cache.cc +++ b/mindspore/ccsrc/plugin/device/gpu/hal/device/ps/gpu_ps_cache.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ps/ps_cache/gpu/gpu_ps_cache.h" +#include "plugin/device/gpu/hal/device/ps/gpu_ps_cache.h" #include "ps/ps_cache/ps_cache_factory.h" #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/hash_impl.cuh" #include "plugin/device/gpu/hal/device/gpu_common.h" diff --git a/mindspore/ccsrc/ps/ps_cache/gpu/gpu_ps_cache.h b/mindspore/ccsrc/plugin/device/gpu/hal/device/ps/gpu_ps_cache.h similarity index 89% rename from mindspore/ccsrc/ps/ps_cache/gpu/gpu_ps_cache.h rename to mindspore/ccsrc/plugin/device/gpu/hal/device/ps/gpu_ps_cache.h index 6ef18becada..b3442c2c9db 100644 --- a/mindspore/ccsrc/ps/ps_cache/gpu/gpu_ps_cache.h +++ b/mindspore/ccsrc/plugin/device/gpu/hal/device/ps/gpu_ps_cache.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_GPU_GPU_PS_CACHE_H_ -#define MINDSPORE_CCSRC_PS_PS_CACHE_GPU_GPU_PS_CACHE_H_ +#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_ +#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_ #include #include @@ -47,4 +47,4 @@ class GPUPsCache : public PsCacheBasic { } // namespace gpu } // namespace ps } // namespace mindspore -#endif // MINDSPORE_CCSRC_PS_PS_CACHE_GPU_GPU_PS_CACHE_H_ +#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_session.cc b/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_session.cc index dc60c74c488..4d3fe57e347 100644 --- a/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_session.cc +++ b/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_session.cc @@ -659,8 +659,7 @@ KernelGraphPtr GPUSession::BuildOpImpl(const BackendOpRunInfoPtr &op_run_info, c const std::vector &tensors_mask) { // Check if the graph cache exists. auto it = run_op_graphs_.find(graph_info); - if (it != run_op_graphs_.end() && - kOpCacheBlackList.find(op_run_info->base_op_run_info.op_name) == kOpCacheBlackList.end()) { + if (it != run_op_graphs_.end() && !IsOneOfCacheBlackList(op_run_info->base_op_run_info.op_name)) { return it->second; } @@ -718,7 +717,7 @@ void GPUSession::RunOpImpl(const GraphInfo &graph_info, const BackendOpRunInfoPt UpdateOutputAbstract(kernel_graph, op_run_info); } RunOpClearMemory(kernel_graph.get()); - if (kOpCacheBlackList.find(op_run_info->base_op_run_info.op_name) != kOpCacheBlackList.end()) { + if (IsOneOfCacheBlackList(op_run_info->base_op_run_info.op_name)) { run_op_graphs_.erase(graph_info); } } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/CMakeLists.txt b/mindspore/ccsrc/plugin/device/gpu/kernel/CMakeLists.txt index e95c5ed0716..4f2f6f821fa 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/CMakeLists.txt +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/CMakeLists.txt @@ -3,8 +3,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") endif() if(ENABLE_GPU) - file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cu") - file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") file(GLOB_RECURSE _AKG_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "akg/*.cc") list(REMOVE_ITEM GPU_SRC_LIST ${_AKG_SRC_LIST}) diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/CMakeLists.txt b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/CMakeLists.txt similarity index 84% rename from mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/CMakeLists.txt rename to mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/CMakeLists.txt index 90edfd081ab..386b2acd466 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/CMakeLists.txt +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/CMakeLists.txt @@ -12,11 +12,12 @@ endif() set_property(SOURCE ${CUDA_OPS_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_KERNEL) if(ENABLE_GPU) - add_library(cuda_common_obj OBJECT cuda_common.cc cuda_device_info.cc) + add_library(cuda_common_obj OBJECT cuda_ops/cuda_common.cc cuda_ops/cuda_device_info.cc) target_compile_options(cuda_common_obj PRIVATE "-std=c++17") cuda_add_library(cuda_ops SHARED ${CUDA_OPS_SRC_LIST} $) message("add gpu lib to cuda_ops") - target_link_libraries(cuda_ops mindspore_core cublas + target_link_libraries(cuda_ops mindspore_core + ${CUBLAS_LIBRARY_PATH} ${CUDA_PATH}/lib64/libcurand.so ${CUDNN_LIBRARY_PATH} ${CUDA_PATH}/lib64/libcudart.so diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/lp_norm_impl.cu b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/lp_norm_impl.cu index e5f19aaecbb..354c35e030e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/lp_norm_impl.cu +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/lp_norm_impl.cu @@ -69,10 +69,11 @@ __global__ void NormCalHighPrecisionKernel(const float *middle_output, T *output } template <> -void CalLpNorm(const float *input, const size_t *input_shape, size_t input_shape_length, size_t input_elements, - const size_t *output_axis, const size_t *output_stride, size_t output_shape_length, - size_t output_elements, float p, float eps, float *middle_output, float *output, - const uint32_t &device_id, cudaStream_t cuda_stream) { +CUDA_LIB_EXPORT void CalLpNorm(const float *input, const size_t *input_shape, size_t input_shape_length, + size_t input_elements, const size_t *output_axis, const size_t *output_stride, + size_t output_shape_length, size_t output_elements, float p, float eps, + float *middle_output, float *output, const uint32_t &device_id, + cudaStream_t cuda_stream) { LpCalKernel<<>>( input, input_shape, input_shape_length, input_elements, output_axis, output_stride, output_shape_length, p, eps, output); @@ -81,10 +82,11 @@ void CalLpNorm(const float *input, const size_t *input_shape, size_t inpu } template <> -void CalLpNorm(const half *input, const size_t *input_shape, size_t input_shape_length, size_t input_elements, - const size_t *output_axis, const size_t *output_stride, size_t output_shape_length, - size_t output_elements, float p, float eps, float *middle_output, half *output, - const uint32_t &device_id, cudaStream_t cuda_stream) { +CUDA_LIB_EXPORT void CalLpNorm(const half *input, const size_t *input_shape, size_t input_shape_length, + size_t input_elements, const size_t *output_axis, const size_t *output_stride, + size_t output_shape_length, size_t output_elements, float p, float eps, + float *middle_output, half *output, const uint32_t &device_id, + cudaStream_t cuda_stream) { LpCalKernel<<>>( input, input_shape, input_shape_length, input_elements, output_axis, output_stride, output_shape_length, p, eps, middle_output); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/multi_margin_loss_grad_impl.cu b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/multi_margin_loss_grad_impl.cu index 55ea8d59db6..e273cae4ce5 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/multi_margin_loss_grad_impl.cu +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/multi_margin_loss_grad_impl.cu @@ -133,9 +133,9 @@ __global__ void MultiMarginLoss_backward_kernel_half(half *gradInput, const half // namespace str template -void MultiMarginLossGrad(int64_t p, float margin, int64_t reduction, int nframe, int dim, const T *output_grad, - const T *input, const int64_t *target, const T *weight, T *output, const uint32_t &device_id, - cudaStream_t cuda_stream) { +CUDA_LIB_EXPORT void MultiMarginLossGrad(int64_t p, float margin, int64_t reduction, int nframe, int dim, + const T *output_grad, const T *input, const int64_t *target, const T *weight, + T *output, const uint32_t &device_id, cudaStream_t cuda_stream) { dim3 blocks1(nframe); dim3 threads1(MULTIMARGIN_THREADS); bool reduce = false; @@ -158,9 +158,10 @@ void MultiMarginLossGrad(int64_t p, float margin, int64_t reduction, int nframe, // namespace str template <> -void MultiMarginLossGrad(int64_t p, float margin, int64_t reduction, int nframe, int dim, const half *output_grad, - const half *input, const int64_t *target, const half *weight, half *output, - const uint32_t &device_id, cudaStream_t cuda_stream) { +CUDA_LIB_EXPORT void MultiMarginLossGrad(int64_t p, float margin, int64_t reduction, int nframe, int dim, + const half *output_grad, const half *input, const int64_t *target, + const half *weight, half *output, const uint32_t &device_id, + cudaStream_t cuda_stream) { dim3 blocks1(nframe); dim3 threads1(MULTIMARGIN_THREADS); bool reduce = false; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/multi_margin_loss_impl.cu b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/multi_margin_loss_impl.cu index aae43eddd8e..d5c86daaeb1 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/multi_margin_loss_impl.cu +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/multi_margin_loss_impl.cu @@ -286,9 +286,9 @@ __global__ void MultiMarginLossReduceKernel(int dim, T *output) { // namespace str template -void MultiMarginLoss(int64_t p, float margin, int64_t reduction, int nframe, int dim, const T *input, - const int64_t *target, const T *weight, T *output, const uint32_t &device_id, - cudaStream_t cuda_stream) { +CUDA_LIB_EXPORT void MultiMarginLoss(int64_t p, float margin, int64_t reduction, int nframe, int dim, const T *input, + const int64_t *target, const T *weight, T *output, const uint32_t &device_id, + cudaStream_t cuda_stream) { dim3 blocks(nframe); dim3 threads(MULTIMARGIN_THREADS); bool sizeAverage = false; @@ -311,9 +311,9 @@ void MultiMarginLoss(int64_t p, float margin, int64_t reduction, int nframe, int // namespace str template <> -void MultiMarginLoss(int64_t p, float margin, int64_t reduction, int nframe, int dim, const half *input, - const int64_t *target, const half *weight, half *output, const uint32_t &device_id, - cudaStream_t cuda_stream) { +CUDA_LIB_EXPORT void MultiMarginLoss(int64_t p, float margin, int64_t reduction, int nframe, int dim, const half *input, + const int64_t *target, const half *weight, half *output, const uint32_t &device_id, + cudaStream_t cuda_stream) { dim3 blocks(nframe); dim3 threads(128); bool sizeAverage = false; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/renorm_impl.cu b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/renorm_impl.cu index e5acf51726f..932c01ec278 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/renorm_impl.cu +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/renorm_impl.cu @@ -58,8 +58,7 @@ __global__ void CalNormValFun1(const Complex *input, size_t input_elemen } __global__ void CalNormValFun2(float *norm_value, int p, size_t axis_size, float max_norm) { - for (size_t index = blockIdx.x * blockDim.x + threadIdx.x; index < (axis_size); - index += blockDim.x * gridDim.x) { + for (size_t index = blockIdx.x * blockDim.x + threadIdx.x; index < (axis_size); index += blockDim.x * gridDim.x) { float temp = pow(norm_value[index], static_cast(1.0 / p)); if (temp > max_norm) { norm_value[index] = max_norm / temp; @@ -84,61 +83,61 @@ __global__ void CalNormValFun3(const T *input, size_t inner_size, size_t axis_si } template <> -void CalRenorm(const half *input, size_t input_elements, size_t inner_size, size_t axis_size, int p, - float *norm_value, half *output, const uint32_t &device_id, cudaStream_t cuda_stream, - float max_norm) { +CUDA_LIB_EXPORT void CalRenorm(const half *input, size_t input_elements, size_t inner_size, size_t axis_size, + int p, float *norm_value, half *output, const uint32_t &device_id, + cudaStream_t cuda_stream, float max_norm) { CalNormValFun1<<>>( input, input_elements, inner_size, axis_size, p, norm_value); - CalNormValFun2<<>>( - norm_value, p, axis_size, max_norm); + CalNormValFun2<<>>(norm_value, p, + axis_size, max_norm); CalNormValFun3<<>>( input, inner_size, axis_size, input_elements, output, norm_value); } template <> -void CalRenorm(const float *input, size_t input_elements, size_t inner_size, size_t axis_size, int p, - float *norm_value, float *output, const uint32_t &device_id, cudaStream_t cuda_stream, - float max_norm) { +CUDA_LIB_EXPORT void CalRenorm(const float *input, size_t input_elements, size_t inner_size, size_t axis_size, + int p, float *norm_value, float *output, const uint32_t &device_id, + cudaStream_t cuda_stream, float max_norm) { CalNormValFun1<<>>( input, input_elements, inner_size, axis_size, p, norm_value); - CalNormValFun2<<>>( - norm_value, p, axis_size, max_norm); + CalNormValFun2<<>>(norm_value, p, + axis_size, max_norm); CalNormValFun3<<>>( input, inner_size, axis_size, input_elements, output, norm_value); } template <> -void CalRenorm(const double *input, size_t input_elements, size_t inner_size, size_t axis_size, int p, - float *norm_value, double *output, const uint32_t &device_id, cudaStream_t cuda_stream, - float max_norm) { +CUDA_LIB_EXPORT void CalRenorm(const double *input, size_t input_elements, size_t inner_size, size_t axis_size, + int p, float *norm_value, double *output, const uint32_t &device_id, + cudaStream_t cuda_stream, float max_norm) { CalNormValFun1<<>>( input, input_elements, inner_size, axis_size, p, norm_value); - CalNormValFun2<<>>( - norm_value, p, axis_size, max_norm); + CalNormValFun2<<>>(norm_value, p, + axis_size, max_norm); CalNormValFun3<<>>( input, inner_size, axis_size, input_elements, output, norm_value); } template <> -void CalRenorm>(const Complex *input, size_t input_elements, size_t inner_size, size_t axis_size, - int p, float *norm_value, Complex *output, const uint32_t &device_id, - cudaStream_t cuda_stream, float max_norm) { +CUDA_LIB_EXPORT void CalRenorm>(const Complex *input, size_t input_elements, size_t inner_size, + size_t axis_size, int p, float *norm_value, Complex *output, + const uint32_t &device_id, cudaStream_t cuda_stream, float max_norm) { CalNormValFun1<<>>( input, input_elements, inner_size, axis_size, p, norm_value); - CalNormValFun2<<>>( - norm_value, p, axis_size, max_norm); + CalNormValFun2<<>>(norm_value, p, + axis_size, max_norm); CalNormValFun3<<>>( input, inner_size, axis_size, input_elements, output, norm_value); } template <> -void CalRenorm>(const Complex *input, size_t input_elements, size_t inner_size, - size_t axis_size, int p, float *norm_value, Complex *output, - const uint32_t &device_id, cudaStream_t cuda_stream, float max_norm) { +CUDA_LIB_EXPORT void CalRenorm>(const Complex *input, size_t input_elements, size_t inner_size, + size_t axis_size, int p, float *norm_value, Complex *output, + const uint32_t &device_id, cudaStream_t cuda_stream, float max_norm) { CalNormValFun1<<>>( input, input_elements, inner_size, axis_size, p, norm_value); - CalNormValFun2<<>>( - norm_value, p, axis_size, max_norm); + CalNormValFun2<<>>(norm_value, p, + axis_size, max_norm); CalNormValFun3<<>>( input, inner_size, axis_size, input_elements, output, norm_value); } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/scale_grad_impl.cu b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/scale_grad_impl.cu index 50694ecade4..c872e183395 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/scale_grad_impl.cu +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/scale_grad_impl.cu @@ -28,16 +28,16 @@ __global__ void ScaleGrad(const int nums, const T *x0, const S &x1, T *y) { } template -void ScaleGradKernel(const int &nums, const T *x0, const S &x1, T *y, cudaStream_t stream) { +CUDA_LIB_EXPORT void ScaleGradKernel(const int &nums, const T *x0, const S &x1, T *y, cudaStream_t stream) { ScaleGrad<<<(nums + 255) / 256, 256, 0, stream>>>(nums, x0, x1, y); return; } -template void ScaleGradKernel(const int &nums, const float *x0, const float &x1, float *y, - cudaStream_t stream); -template void ScaleGradKernel(const int &nums, const float *x0, const half &x1, float *y, - cudaStream_t stream); -template void ScaleGradKernel(const int &nums, const half *x0, const float &x1, half *y, - cudaStream_t stream); -template void ScaleGradKernel(const int &nums, const half *x0, const half &x1, half *y, - cudaStream_t stream); +template CUDA_LIB_EXPORT void ScaleGradKernel(const int &nums, const float *x0, const float &x1, float *y, + cudaStream_t stream); +template CUDA_LIB_EXPORT void ScaleGradKernel(const int &nums, const float *x0, const half &x1, float *y, + cudaStream_t stream); +template CUDA_LIB_EXPORT void ScaleGradKernel(const int &nums, const half *x0, const float &x1, half *y, + cudaStream_t stream); +template CUDA_LIB_EXPORT void ScaleGradKernel(const int &nums, const half *x0, const half &x1, half *y, + cudaStream_t stream); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/scale_grad_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/scale_grad_impl.cuh index 172337c3566..8f9151dc803 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/scale_grad_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/scale_grad_impl.cuh @@ -18,6 +18,7 @@ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_SCALE_GRAD_IMPL_H_ #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" template void ScaleGradKernel(const int &nums, const T *x0, const S &x1, T *y, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPLIT_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/discounted_return_impl.cu b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/discounted_return_impl.cu index 88e621dfdb5..117fdbdf050 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/discounted_return_impl.cu +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/discounted_return_impl.cu @@ -39,9 +39,9 @@ __global__ void DiscountedReturnKernel(const int timestep, const int num_env, co } template -void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, const float &gamma, - const T *reward, const bool *done, const T *last_value, T *discouted_return, - cudaStream_t stream) { +CUDA_LIB_EXPORT void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, + const float &gamma, const T *reward, const bool *done, const T *last_value, + T *discouted_return, cudaStream_t stream) { // Every block process M element, 256 is a common tile size. const int element_per_step = num_env * num_element; const int element_per_block = std::min(256, element_per_step); @@ -51,9 +51,9 @@ void DiscountedReturn(const int ×tep, const int &num_env, const int &num_el done, last_value, discouted_return); } -template void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, const float &gamma, - const float *reward, const bool *done, const float *last_value, float *discouted_return, - cudaStream_t stream); -template void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, const float &gamma, - const half *reward, const bool *done, const half *last_value, half *discouted_return, - cudaStream_t stream); +template CUDA_LIB_EXPORT void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, + const float &gamma, const float *reward, const bool *done, + const float *last_value, float *discouted_return, cudaStream_t stream); +template CUDA_LIB_EXPORT void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, + const float &gamma, const half *reward, const bool *done, + const half *last_value, half *discouted_return, cudaStream_t stream); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/discounted_return_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/discounted_return_impl.cuh index 2e69f5d12b8..eab53e03634 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/discounted_return_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/discounted_return_impl.cuh @@ -16,8 +16,9 @@ #ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_DISCONTED_RETURN_IMPL_H_ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_DISCONTED_RETURN_IMPL_H_ - +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" template -void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, const float &gamma, - const T *reward, const bool *done, const T *last_value, T *discouted_return, cudaStream_t stream); +CUDA_LIB_EXPORT void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, + const float &gamma, const T *reward, const bool *done, const T *last_value, + T *discouted_return, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_DISCONTED_RETURN_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/priority_replay_buffer.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/priority_replay_buffer.cuh index 6c6be21ad44..c8b32ef88a7 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/priority_replay_buffer.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/priority_replay_buffer.cuh @@ -18,21 +18,22 @@ #define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMP_PRIORITY_REPLAY_BUFFER_IMPL_H_ #include - +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" struct SumTree { float sum; float min; }; -void SumTreeInit(SumTree *tree, float *max_priority, const size_t &capacity, cudaStream_t stream); -void InitRandState(const size_t &batch_size, const uint64_t &seed, curandState *state, cudaStream_t stream); -void SumTreePush(SumTree *tree, const float &alpha, const size_t &idx, const size_t &capacity, float *priority, - float *max_priority, cudaStream_t stream); -void SumTreeSample(SumTree *tree, curandState *state, const size_t &capacity, float *beta, const size_t &batch_size, - size_t *indices, float *weights, cudaStream_t stream); -void SumTreeUpdate(SumTree *tree, const size_t &capacity, const float &alpha, float *max_priority, size_t *indices, - float *priorities, const size_t &batch_size, cudaStream_t stream); -void FifoSlice(const uint8_t *input, const size_t *indice, uint8_t *output, size_t batch_size, size_t column, - cudaStream_t stream); +CUDA_LIB_EXPORT void SumTreeInit(SumTree *tree, float *max_priority, const size_t &capacity, cudaStream_t stream); +CUDA_LIB_EXPORT void InitRandState(const size_t &batch_size, const uint64_t &seed, curandState *state, + cudaStream_t stream); +CUDA_LIB_EXPORT void SumTreePush(SumTree *tree, const float &alpha, const size_t &idx, const size_t &capacity, + float *priority, float *max_priority, cudaStream_t stream); +CUDA_LIB_EXPORT void SumTreeSample(SumTree *tree, curandState *state, const size_t &capacity, float *beta, + const size_t &batch_size, size_t *indices, float *weights, cudaStream_t stream); +CUDA_LIB_EXPORT void SumTreeUpdate(SumTree *tree, const size_t &capacity, const float &alpha, float *max_priority, + size_t *indices, float *priorities, const size_t &batch_size, cudaStream_t stream); +CUDA_LIB_EXPORT void FifoSlice(const uint8_t *input, const size_t *indice, uint8_t *output, size_t batch_size, + size_t column, cudaStream_t stream); #endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMP_PRIORITY_REPLAY_BUFFER_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/rl_buffer_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/rl_buffer_impl.cuh index 9ffcec4e663..1f719bfe120 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/rl_buffer_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/rl_buffer_impl.cuh @@ -18,21 +18,24 @@ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_RL_BUFFER_IMPL_H_ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" -void BufferAppend(const int64_t capacity, const size_t size, const int *index, const int exp_batch, - unsigned char *buffer, const unsigned char *exp, cudaStream_t cuda_stream); -void IncreaseCount(const int64_t capacity, const int exp_batch, int *count, int *head, int *index, - cudaStream_t cuda_stream); -void ReMappingIndex(const int *count, const int *head, const int *origin_index, int *index, cudaStream_t cuda_stream); -void BufferGetItem(const size_t size, const int *index, const size_t one_exp_len, const unsigned char *buffer, - unsigned char *out, cudaStream_t cuda_stream); -void CheckBatchSize(const int *count, const int *head, const size_t batch_size, const int64_t capacity, - cudaStream_t cuda_stream); -void BufferSample(const size_t size, const size_t one_element, const unsigned int *index, const unsigned char *buffer, - unsigned char *out, cudaStream_t cuda_stream); -void RandomGen(const int size, curandState *globalState, unsigned int *value, unsigned int *key, cudaStream_t stream); -void RandInit(const int size, const int seed, curandState *state, cudaStream_t stream); +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" +CUDA_LIB_EXPORT void BufferAppend(const int64_t capacity, const size_t size, const int *index, const int exp_batch, + unsigned char *buffer, const unsigned char *exp, cudaStream_t cuda_stream); +CUDA_LIB_EXPORT void IncreaseCount(const int64_t capacity, const int exp_batch, int *count, int *head, int *index, + cudaStream_t cuda_stream); +CUDA_LIB_EXPORT void ReMappingIndex(const int *count, const int *head, const int *origin_index, int *index, + cudaStream_t cuda_stream); +CUDA_LIB_EXPORT void BufferGetItem(const size_t size, const int *index, const size_t one_exp_len, + const unsigned char *buffer, unsigned char *out, cudaStream_t cuda_stream); +CUDA_LIB_EXPORT void CheckBatchSize(const int *count, const int *head, const size_t batch_size, const int64_t capacity, + cudaStream_t cuda_stream); +CUDA_LIB_EXPORT void BufferSample(const size_t size, const size_t one_element, const unsigned int *index, + const unsigned char *buffer, unsigned char *out, cudaStream_t cuda_stream); +CUDA_LIB_EXPORT void RandomGen(const int size, curandState *globalState, unsigned int *value, unsigned int *key, + cudaStream_t stream); +CUDA_LIB_EXPORT void RandInit(const int size, const int seed, curandState *state, cudaStream_t stream); template -void RandomGenUniform(const int size, curandState *globalState, const int up_bound, T *indexes, - cudaStream_t cuda_stream); +CUDA_LIB_EXPORT void RandomGenUniform(const int size, curandState *globalState, const int up_bound, T *indexes, + cudaStream_t cuda_stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ADAM_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/tag_env_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/tag_env_impl.cuh index e4b61b62184..bcd1c9b0ec4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/tag_env_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/rl/tag_env_impl.cuh @@ -18,6 +18,7 @@ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_TAG_ENV_IMPL_H_ #include +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" constexpr int kFeatureNum = 4; constexpr int kPartiallyObsFeatureNum = 6; @@ -46,14 +47,16 @@ struct AgentState { int *time_step; }; -void InitEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *state, - cudaStream_t stream); -void ResetEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, float *state, - cudaStream_t stream); -void StepBindBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, - const int *action, float *state, float *reward, bool *done, cudaStream_t stream); -void StepCrossBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, - const int *action, float *state, float *reward, bool *done, float *team_reward, int *distance, - cudaStream_t stream); -void AgentStateCopy(const int env_num, const int agent_num, AgentState *dst, AgentState *src, cudaStream_t stream); +CUDA_LIB_EXPORT void InitEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *state, + cudaStream_t stream); +CUDA_LIB_EXPORT void ResetEnv(const int env_num, const int agent_num, const GameSetting *setting, + AgentState *agent_state, float *state, cudaStream_t stream); +CUDA_LIB_EXPORT void StepBindBlock(const int env_num, const int agent_num, const GameSetting *setting, + AgentState *agent_state, const int *action, float *state, float *reward, bool *done, + cudaStream_t stream); +CUDA_LIB_EXPORT void StepCrossBlock(const int env_num, const int agent_num, const GameSetting *setting, + AgentState *agent_state, const int *action, float *state, float *reward, bool *done, + float *team_reward, int *distance, cudaStream_t stream); +CUDA_LIB_EXPORT void AgentStateCopy(const int env_num, const int agent_num, AgentState *dst, AgentState *src, + cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_TAG_ENV_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_atom_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_atom_energy_impl.cuh index f09f72de66a..538652a1bc2 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_atom_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_atom_energy_impl.cuh @@ -19,8 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void AngleAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, - const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k, - const float *angle_theta0, float *ene, cudaStream_t stream); +CUDA_LIB_EXPORT void AngleAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, + const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k, + const float *angle_theta0, float *ene, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_energy_impl.cuh index be75db9a29c..b1eeb53a72b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_energy_impl.cuh @@ -19,7 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void AngleEnergy(int angle_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a, const int *atom_b, - const int *atom_c, const float *angle_k, const float *angle_theta0, float *ene, cudaStream_t stream); +CUDA_LIB_EXPORT void AngleEnergy(int angle_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a, + const int *atom_b, const int *atom_c, const float *angle_k, const float *angle_theta0, + float *ene, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_impl.cuh index 24276a23860..15314403924 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_impl.cuh @@ -19,8 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void AngleForce(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a, - const int *atom_b, const int *atom_c, const float *angle_k, const float *angle_theta0, float *frc_f, - cudaStream_t stream); +CUDA_LIB_EXPORT void AngleForce(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, + const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k, + const float *angle_theta0, float *frc_f, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_with_atom_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_with_atom_energy_impl.cuh index c8f647fc1e8..ac2445b52dc 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_with_atom_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_force_with_atom_energy_impl.cuh @@ -19,8 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void AngleForceWithAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, - const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k, - const float *angle_theta0, float *frc_f, float *ene, cudaStream_t stream); +CUDA_LIB_EXPORT void AngleForceWithAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, + const float *scaler_f, const int *atom_a, const int *atom_b, + const int *atom_c, const float *angle_k, const float *angle_theta0, + float *frc_f, float *ene, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_atom_energy_cuda_gpu_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_atom_energy_cuda_gpu_impl.cuh index 832eea77397..96f97609e87 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_atom_energy_cuda_gpu_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_atom_energy_cuda_gpu_impl.cuh @@ -19,8 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void BondAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a, - const int *atom_b, const float *bond_k, const float *bond_r0, float *atom_ene, cudaStream_t stream); +CUDA_LIB_EXPORT void BondAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, + const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0, + float *atom_ene, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_ATOM_ENERGY_GPU_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_energy_cuda_gpu_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_energy_cuda_gpu_impl.cuh index fbba36e5d38..cd728d86d4c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_energy_cuda_gpu_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_energy_cuda_gpu_impl.cuh @@ -19,9 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void BondEnergy(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f, - const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0, - float *bond_ene, cudaStream_t stream); +CUDA_LIB_EXPORT void BondEnergy(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, + const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k, + const float *bond_r0, float *bond_ene, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_ENERGY_CUDA_GPU_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_cuda_gpu_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_cuda_gpu_impl.cuh index e401f9deba4..5e9899a8cb9 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_cuda_gpu_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_cuda_gpu_impl.cuh @@ -19,8 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void BondForce(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f, - const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0, - float *frc_f, cudaStream_t stream); +CUDA_LIB_EXPORT void BondForce(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, + const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k, + const float *bond_r0, float *frc_f, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_FORCE_CUDA_GPU_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh index 98319adde34..fa5f0408729 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_and_virial_impl.cuh @@ -19,9 +19,11 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, - const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k, - const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v, - cudaStream_t stream); +CUDA_LIB_EXPORT void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, + const unsigned int *uint_crd_f, const float *scaler_f, + const int *atom_a, const int *atom_b, const float *bond_k, + const float *bond_r0, float *frc_f, float *atom_energy, + float *atom_v, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_impl.cuh index 4b11524afc9..4841de06336 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_energy_impl.cuh @@ -19,8 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void BondForceWithAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, - const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0, - float *frc_f, float *atom_e, cudaStream_t stream); +CUDA_LIB_EXPORT void BondForceWithAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, + const float *scaler_f, const int *atom_a, const int *atom_b, + const float *bond_k, const float *bond_r0, float *frc_f, float *atom_e, + cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_virial_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_virial_impl.cuh index 9757c081ea1..0ebe0dbb4a6 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_virial_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/bond/bond_force_with_atom_virial_impl.cuh @@ -19,8 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void BondForceWithAtomVirial(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, - const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0, - float *frc_f, float *atom_v, cudaStream_t stream); +CUDA_LIB_EXPORT void BondForceWithAtomVirial(int bond_numbers, int atom_numbers, const int *uint_crd_f, + const float *scaler_f, const int *atom_a, const int *atom_b, + const float *bond_k, const float *bond_r0, float *frc_f, float *atom_v, + cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/atomcrdtocv_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/atomcrdtocv_impl.cuh index 0c235d4fd9d..85338ade301 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/atomcrdtocv_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/atomcrdtocv_impl.cuh @@ -18,9 +18,10 @@ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_ATOMCRDTOCV_IMPL_H_ #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void AtomCrdToCV(int atom_numbers, int start_serial, int end_serial, int number, const float *crd_f, - const float *old_crd, float *nowarp_crd, int *box_map_times, float *box, float *g_radial, - float *g_angular, cudaStream_t stream); +CUDA_LIB_EXPORT void AtomCrdToCV(int atom_numbers, int start_serial, int end_serial, int number, const float *crd_f, + const float *old_crd, float *nowarp_crd, int *box_map_times, float *box, + float *g_radial, float *g_angular, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_ATOMCRDTOCV_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_impl.cuh index 5ab563f8e1c..99f13cdeac7 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_impl.cuh @@ -19,8 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void CrdToUintCrd(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f, - unsigned int *uint_crd_f, cudaStream_t stream); +CUDA_LIB_EXPORT void CrdToUintCrd(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f, + unsigned int *uint_crd_f, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_quarter_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_quarter_impl.cuh index 18b531fc58d..e2360b42fc8 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_quarter_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/crd_to_uint_crd_quarter_impl.cuh @@ -19,8 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void CrdToUintCrdQuarter(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f, - unsigned int *uint_crd_f, cudaStream_t stream); +CUDA_LIB_EXPORT void CrdToUintCrdQuarter(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f, + unsigned int *uint_crd_f, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_QUARTER_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/get_center_of_mass_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/get_center_of_mass_impl.cuh index e3fcfb3bcce..75bd130e5ac 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/get_center_of_mass_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/get_center_of_mass_impl.cuh @@ -19,8 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass, - float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream); +CUDA_LIB_EXPORT void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass, + float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/getcenter_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/getcenter_impl.cuh index 6355344e09f..df366b2fede 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/getcenter_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/getcenter_impl.cuh @@ -19,8 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms, - const float *crd_f, float *center_of_geometry_f, cudaStream_t stream); +CUDA_LIB_EXPORT void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, + const int *center_atoms, const float *crd_f, float *center_of_geometry_f, + cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/map_center_of_mass_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/map_center_of_mass_impl.cuh index c60c9a4df4c..151ba37eb15 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/map_center_of_mass_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/map_center_of_mass_impl.cuh @@ -19,8 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void MapCenterOfMass(int residue_numbers, int *start, int *end, float *center_of_mass_f, - float *box_length_f, float *no_wrap_crd_f, float *crd_f, float* scaler, cudaStream_t stream); +CUDA_LIB_EXPORT void MapCenterOfMass(int residue_numbers, int *start, int *end, float *center_of_mass_f, + float *box_length_f, float *no_wrap_crd_f, float *crd_f, float *scaler, + cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/mdtemperature_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/mdtemperature_impl.cuh index 1a41f87f92e..b2b08648f3f 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/mdtemperature_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/mdtemperature_impl.cuh @@ -19,7 +19,8 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f, - const float *atom_mass, float *ek, cudaStream_t stream); +CUDA_LIB_EXPORT void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f, + const float *atom_mass, float *ek, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/total_c6_get_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/total_c6_get_impl.cuh index 1f2bdb4251e..76d582b7ae5 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/total_c6_get_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/common/total_c6_get_impl.cuh @@ -18,7 +18,9 @@ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_TOTAL_C6_GET_IMPL_H_ #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void total_c6_get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor, cudaStream_t stream); +CUDA_LIB_EXPORT void total_c6_get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor, + cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_TOTAL_C6_GET_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/cal_no_wrap_crd_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/cal_no_wrap_crd_impl.cuh index 27116d8aa26..3e925124f70 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/cal_no_wrap_crd_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/cal_no_wrap_crd_impl.cuh @@ -20,8 +20,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void calculatenowrapcrd(int atom_numbers, int *box_map_times_f, float *box_f, float *crd_f, float *nowrap_crd_f, - cudaStream_t stream); +CUDA_LIB_EXPORT void calculatenowrapcrd(int atom_numbers, int *box_map_times_f, float *box_f, float *crd_f, + float *nowrap_crd_f, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_CAL_NO_WRAP_CRD_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/refresh_boxmaptimes_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/refresh_boxmaptimes_impl.cuh index 3218bd5fba5..cbd8402bf35 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/refresh_boxmaptimes_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/crdmcmap/refresh_boxmaptimes_impl.cuh @@ -20,8 +20,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void refresh_boxmaptimes(int atom_numbers, float *box_length_inverse, float *crd_f, float *old_crd_f, - int *box_map_times_f, cudaStream_t stream); +CUDA_LIB_EXPORT void refresh_boxmaptimes(int atom_numbers, float *box_length_inverse, float *crd_f, float *old_crd_f, + int *box_map_times_f, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_REFRESH_BOXMAPTIMES_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cuh index 56d1baba7b9..adc6b16ad9e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_atom_energy_impl.cuh @@ -19,9 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void DihedralAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, - const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn, - const float *pk, const float *gamc, const float *gams, const float *pn, float *ene, - cudaStream_t stream); +CUDA_LIB_EXPORT void DihedralAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, + const float *scaler_f, const int *atom_a, const int *atom_b, const int *atom_c, + const int *atom_d, const int *ipn, const float *pk, const float *gamc, + const float *gams, const float *pn, float *ene, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_energy_impl.cuh index eb8dc46e75c..f8fcb5dfe72 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_energy_impl.cuh @@ -19,8 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void DihedralEnergy(int dihedral_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a, - const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn, const float *pk, - const float *gamc, const float *gams, const float *pn, float *ene, cudaStream_t stream); +CUDA_LIB_EXPORT void DihedralEnergy(int dihedral_numbers, const int *uint_crd_f, const float *scaler_f, + const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, + const int *ipn, const float *pk, const float *gamc, const float *gams, + const float *pn, float *ene, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_impl.cuh index 5804e3b825b..33038fac47a 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_impl.cuh @@ -19,9 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void DihedralForce(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, - const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn, - const float *pk, const float *gamc, const float *gams, const float *pn, float *frc_f, - cudaStream_t stream); +CUDA_LIB_EXPORT void DihedralForce(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, + const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, + const int *ipn, const float *pk, const float *gamc, const float *gams, + const float *pn, float *frc_f, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_with_atom_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_with_atom_energy_impl.cuh index dc9ca6ea818..82dc3e26bb6 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_with_atom_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/dihedral/dihedral_force_with_atom_energy_impl.cuh @@ -19,9 +19,11 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void DihedralForceWithAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, - const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, - const int *ipn, const float *pk, const float *gamc, const float *gams, const float *pn, - float *frc_f, float *ene, cudaStream_t stream); +CUDA_LIB_EXPORT void DihedralForceWithAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, + const float *scaler_f, const int *atom_a, const int *atom_b, + const int *atom_c, const int *atom_d, const int *ipn, const float *pk, + const float *gamc, const float *gams, const float *pn, float *frc_f, + float *ene, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh index b51a52ef1a4..5ad9f2f4609 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_direct_cf_force_with_lj_virial_direct_cf_energy_impl.cuh @@ -23,8 +23,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy( +CUDA_LIB_EXPORT void LJ_Direct_CF_Force_With_LJ_Virial_Direct_CF_Energy( const int atom_numbers, const float cutoff, const float pme_beta, const unsigned int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_lj_virial, float *atom_energy, diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_energy_impl.cuh index 25045a5ebed..a97a462aad8 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_energy_impl.cuh @@ -19,9 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, - const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, - int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *d_LJ_energy_atom, - cudaStream_t stream); +CUDA_LIB_EXPORT void LJEnergy(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ, + int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A, + const float *d_LJ_B, float *d_LJ_energy_atom, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_impl.cuh index c37fdcdea97..5c8462144b6 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_impl.cuh @@ -19,9 +19,11 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, const int *LJtype, - const float *charge, const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, - int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, float *frc_f, cudaStream_t stream); +CUDA_LIB_EXPORT void LJForce(const int atom_numbers, const float cutoff_square, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ, + int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A, + const float *d_LJ_B, float *frc_f, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh index 2bd59ee88e0..22c603c9642 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_force_with_pme_direct_force_impl.cuh @@ -19,10 +19,12 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, const int *uint_crd_f, - const int *LJtype, const float *charge, const float *scaler_f, float *uint_crd_with_LJ, - int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *d_LJ_A, - const float *d_LJ_B, float *frc_f, cudaStream_t stream); +CUDA_LIB_EXPORT void LJForceWithPMEDirectForce(const int atom_numbers, const float cutoff, const float pme_beta, + const int *uint_crd_f, const int *LJtype, const float *charge, + const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, + int *nl_atom_serial, int *nl, const float *d_LJ_A, const float *d_LJ_B, + float *frc_f, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cuh index 0e22abe4352..e23c5de34b6 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/lj/lj_pme_direct_force_with_atom_energy_impl.cuh @@ -19,11 +19,13 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta, - const int *uint_crd_f, const int *LJtype, const float *charge, const float *scaler_f, - float *uint_crd_with_LJ, int *nl_atom_numbers, int *nl_atom_serial, int *nl, - const float *d_LJ_A, const float *d_LJ_B, float *frc_f, float *atom_energy, - cudaStream_t stream); +CUDA_LIB_EXPORT void LJDirectCFForceWithAtomEnergy(const int atom_numbers, const float cutoff, const float pme_beta, + const int *uint_crd_f, const int *LJtype, const float *charge, + const float *scaler_f, float *uint_crd_with_LJ, int *nl_atom_numbers, + int *nl_atom_serial, int *nl, const float *d_LJ_A, + const float *d_LJ_B, float *frc_f, float *atom_energy, + cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh index 616ce19b6d8..eb0f80a46ec 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_atom_energy_impl.cuh @@ -18,8 +18,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, - const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, - const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream); +CUDA_LIB_EXPORT void Dihedral14CFAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, + const int *uint_crd_f, const int *LJtype, const float *charge, + const float *boxlength_f, const int *a_14, const int *b_14, + const float *cf_scale_factor, float *ene, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh index dfc31a357f0..bc5aea7af1d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_cf_energy_impl.cuh @@ -18,8 +18,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, - const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, - const int *b_14, const float *cf_scale_factor, float *ene, cudaStream_t stream); +CUDA_LIB_EXPORT void Dihedral14CFEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, float *uint_crd_with_LJ_f, + const float *boxlength_f, const int *a_14, const int *b_14, + const float *cf_scale_factor, float *ene, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_CF_ENERGY_IMPL_H diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh index 286bc9d5ac0..a1943aefe2f 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_atom_energy_impl.cuh @@ -18,9 +18,11 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, - const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, - const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, - const float *LJ_type_B, float *ene, cudaStream_t stream); +CUDA_LIB_EXPORT void Dihedral14LJAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, + const int *uint_crd_f, const int *LJtype, const float *charge, + const float *boxlength_f, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *LJ_type_A, + const float *LJ_type_B, float *ene, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ATOM_ENERGY_IMPL_H diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh index 914d6e0a010..91018f1fac4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_and_virial_impl.cuh @@ -18,12 +18,11 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void Dihedral14LJCFForceWithAtomEnergyAndVirial(const int dihedral_14_numbers, const int atom_numbers, - const int *uint_crd_f, const int *LJtype, const float *charge, - float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, - const int *b_14, const float *lj_scale_factor, - const float *cf_scale_factor, const float *LJ_type_A, - const float *LJ_type_B, float *frc_f, float *atom_energy, - float *atom_virial, cudaStream_t stream); +CUDA_LIB_EXPORT void Dihedral14LJCFForceWithAtomEnergyAndVirial( + const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, const float *charge, + float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, const int *b_14, const float *lj_scale_factor, + const float *cf_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy, + float *atom_virial, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh index ad2957b79c0..7d5560c78e0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_cf_force_with_atom_energy_impl.cuh @@ -18,11 +18,13 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, - const int *LJtype, const float *charge, float *uint_crd_with_LJ_f, - const float *boxlength_f, const int *a_14, const int *b_14, - const float *lj_scale_factor, const float *cf_scale_factor, - const float *LJ_type_A, const float *LJ_type_B, float *frc_f, float *atom_energy, - cudaStream_t stream); +CUDA_LIB_EXPORT void Dihedral14LJCFForceWithAtomEnergy(const int dihedral_14_numbers, const int atom_numbers, + const int *uint_crd_f, const int *LJtype, const float *charge, + float *uint_crd_with_LJ_f, const float *boxlength_f, + const int *a_14, const int *b_14, const float *lj_scale_factor, + const float *cf_scale_factor, const float *LJ_type_A, + const float *LJ_type_B, float *frc_f, float *atom_energy, + cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_CF_FORCE_WITH_ATOM_ENERGY_IMPL_H diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh index cd9c125c899..7202a596df3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_energy_impl.cuh @@ -18,10 +18,12 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, - const float *charge, float *uint_crd_with_LJ_f, const float *boxlength_f, const int *a_14, - const int *b_14, const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, - float *ene, cudaStream_t stream); +CUDA_LIB_EXPORT void Dihedral14LJEnergy(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, float *uint_crd_with_LJ_f, + const float *boxlength_f, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, + float *ene, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_ENERGY_IMPL_H diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh index ab67d0c076c..61849b1abc8 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_impl.cuh @@ -18,9 +18,11 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, const int *LJtype, - const float *charge, const float *boxlength_f, const int *a_14, const int *b_14, - const float *lj_scale_factor, const float *LJ_type_A, const float *LJ_type_B, float *frc_f, - cudaStream_t stream); +CUDA_LIB_EXPORT void Dihedral14LJForce(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, + const int *LJtype, const float *charge, const float *boxlength_f, + const int *a_14, const int *b_14, const float *lj_scale_factor, + const float *LJ_type_A, const float *LJ_type_B, float *frc_f, + cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_IMPL_H diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh index 0b97b56b685..901aa4f56cc 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nb14/dihedral_14_lj_force_with_direct_cf_impl.cuh @@ -18,9 +18,12 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, const int *uint_crd_f, - const int *LJtype, const float *charge, const float *boxlength_f, const int *a_14, - const int *b_14, const float *lj_scale_factor, const float *cf_scale_factor, - const float *LJ_type_A, const float *LJ_type_B, float *frc_f, cudaStream_t stream); +CUDA_LIB_EXPORT void Dihedral14LJForceWithDirectCF(const int dihedral_14_numbers, const int atom_numbers, + const int *uint_crd_f, const int *LJtype, const float *charge, + const float *boxlength_f, const int *a_14, const int *b_14, + const float *lj_scale_factor, const float *cf_scale_factor, + const float *LJ_type_A, const float *LJ_type_B, float *frc_f, + cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NB14_DIHEDRAL_14_LJ_FORCE_WITH_DIRECT_CF_IMPL_H diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh index a91b582c5d3..f5204e8de84 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/neighbor_list/neighbor_list_impl.cuh @@ -22,6 +22,7 @@ #define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_NEIGHBOR_LIST_IMPL_H_ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" struct VECTOR { float x; @@ -49,33 +50,33 @@ struct GRID_POINTER { int *grid_serial; }; -void ConstructNeighborList(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial, - NEIGHBOR_LIST *nl, cudaStream_t stream); +CUDA_LIB_EXPORT void ConstructNeighborList(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, + int *nl_atom_serial, NEIGHBOR_LIST *nl, cudaStream_t stream); -void CopyNeighborList(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, - int *nl_atom_serial, cudaStream_t stream); +CUDA_LIB_EXPORT void CopyNeighborList(int atom_numbers, int max_neighbor_numbers, NEIGHBOR_LIST *nl, + int *nl_atom_numbers, int *nl_atom_serial, cudaStream_t stream); -void NeighborListRefresh(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval, - int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square, - int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse, - int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd, - float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, - float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, - int *excluded_list_start, int *excluded_list, int *excluded_numbers, float half_skin_square, - int *is_need_refresh_neighbor_list, int forced_update, int forced_check, cudaStream_t stream); +CUDA_LIB_EXPORT void NeighborListRefresh( + int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval, int not_first_time, float skin, + int nxy, float cutoff_square, float cutoff_with_skin_square, int *grid_N, float *box_length, + int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket, + float *crd, float *old_crd, float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, + float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list, + int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list, int forced_update, + int forced_check, cudaStream_t stream); -void ConstructNeighborListHalf(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, int *nl_atom_serial, - NEIGHBOR_LIST *nl, cudaStream_t stream); +CUDA_LIB_EXPORT void ConstructNeighborListHalf(int grid_numbers, int max_neighbor_numbers, int *nl_atom_numbers, + int *nl_atom_serial, NEIGHBOR_LIST *nl, cudaStream_t stream); -void CopyNeighborListHalf(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, cudaStream_t stream); +CUDA_LIB_EXPORT void CopyNeighborListHalf(int atom_numbers, NEIGHBOR_LIST *nl, int *nl_atom_numbers, + cudaStream_t stream); -void NeighborListUpdate(int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval, - int not_first_time, float skin, int nxy, float cutoff_square, float cutoff_with_skin_square, - int *grid_N, float *box_length, int *atom_numbers_in_grid_bucket, float *grid_length_inverse, - int *atom_in_grid_serial, GRID_BUCKET *bucket, float *crd, float *old_crd, - float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, - float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, - int *excluded_list, int *excluded_numbers, float half_skin_square, - int *is_need_refresh_neighbor_list, cudaStream_t stream); +CUDA_LIB_EXPORT void NeighborListUpdate( + int grid_numbers, int atom_numbers, int *d_refresh_count, int refresh_interval, int not_first_time, float skin, + int nxy, float cutoff_square, float cutoff_with_skin_square, int *grid_N, float *box_length, + int *atom_numbers_in_grid_bucket, float *grid_length_inverse, int *atom_in_grid_serial, GRID_BUCKET *bucket, + float *crd, float *old_crd, float *crd_to_uint_crd_cof, float *half_crd_to_uint_crd_cof, unsigned int *uint_crd, + float *uint_dr_to_dr_cof, GRID_POINTER *gpointer, NEIGHBOR_LIST *d_nl, int *excluded_list_start, int *excluded_list, + int *excluded_numbers, float half_skin_square, int *is_need_refresh_neighbor_list, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_gradient_descent_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_gradient_descent_impl.cuh index 0455f49219a..8299fc491ca 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_gradient_descent_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_gradient_descent_impl.cuh @@ -19,7 +19,8 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" -void MDIterationGradientDescent(const int atom_numbers, float *crd, float *frc, const float learning_rate, - cudaStream_t stream); +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" +CUDA_LIB_EXPORT void MDIterationGradientDescent(const int atom_numbers, float *crd, float *frc, + const float learning_rate, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh index af5756a7fa6..61d149060b7 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_impl.cuh @@ -23,8 +23,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc, - const float *inverse_mass, const float dt, cudaStream_t stream); +CUDA_LIB_EXPORT void MDIterationLeapFrog(const int atom_numbers, float *vel, float *crd, float *frc, float *acc, + const float *inverse_mass, const float dt, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_gpu_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_gpu_impl.cuh index 58bd87184c4..77e42a3f2d6 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_gpu_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_gpu_impl.cuh @@ -19,10 +19,11 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" -void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt, - const float exp_gamma, int float4_numbers, float *inverse_mass, - float *sqrt_mass_inverse, float *vel, float *crd, float *frc, float *acc, - curandStatePhilox4_32_10_t *rand_state, float *rand_frc, float *output, - cudaStream_t stream); +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" +CUDA_LIB_EXPORT void MD_Iteration_Leap_Frog_With_LiuJian(const int atom_numbers, const float half_dt, const float dt, + const float exp_gamma, int float4_numbers, float *inverse_mass, + float *sqrt_mass_inverse, float *vel, float *crd, float *frc, + float *acc, curandStatePhilox4_32_10_t *rand_state, + float *rand_frc, float *output, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_GPU_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_with_max_vel_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_with_max_vel_impl.cuh index cf832952d7b..7872f70cfd8 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_with_max_vel_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_liujian_with_max_vel_impl.cuh @@ -19,11 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" -void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Vel(const int atom_numbers, const float half_dt, const float dt, - const float exp_gamma, int float4_numbers, float *inverse_mass, - float *sqrt_mass_inverse, float *vel, float *crd, float *frc, - float *acc, curandStatePhilox4_32_10_t *rand_state, - float *rand_frc, float *output, const float max_vel, - cudaStream_t stream); +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" +CUDA_LIB_EXPORT void MD_Iteration_Leap_Frog_With_LiuJian_With_Max_Vel( + const int atom_numbers, const float half_dt, const float dt, const float exp_gamma, int float4_numbers, + float *inverse_mass, float *sqrt_mass_inverse, float *vel, float *crd, float *frc, float *acc, + curandStatePhilox4_32_10_t *rand_state, float *rand_frc, float *output, const float max_vel, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_LIUJIAN_WITH_MAX_VEL_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_with_max_vel_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_with_max_vel_impl.cuh index 4c6fa476483..3bdb3dd60dc 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_with_max_vel_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_leap_frog_with_max_vel_impl.cuh @@ -19,8 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" -void MDIterationLeapFrogWithMaxVelocity(const int atom_numbers, float *vel, float *crd, float *frc, float *acc, - const float *inverse_mass, const float dt, const float max_velocity, - cudaStream_t stream); +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" +CUDA_LIB_EXPORT void MDIterationLeapFrogWithMaxVelocity(const int atom_numbers, float *vel, float *crd, float *frc, + float *acc, const float *inverse_mass, const float dt, + const float max_velocity, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_MD_ITERATION_LEAP_FROG_WITH_MAX_VEL_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_setup_random_state_gpu_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_setup_random_state_gpu_impl.cuh index b3d268c3013..8fda94fc3a7 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_setup_random_state_gpu_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/nvtit/md_iteration_setup_random_state_gpu_impl.cuh @@ -18,6 +18,8 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" -void MD_Iteration_Setup_Random_State(int float4_numbers, curandStatePhilox4_32_10_t *rand_state, int seed, - cudaStream_t stream); +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" + +CUDA_LIB_EXPORT void MD_Iteration_Setup_Random_State(int float4_numbers, curandStatePhilox4_32_10_t *rand_state, + int seed, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/fft_3d_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/fft_3d_impl.cuh index bb2f6c87bab..6df893b71ea 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/fft_3d_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/fft_3d_impl.cuh @@ -19,8 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h" #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" template -void FFT3D(int Nfft, T *input_tensor, Complex *output_tensor, const cufftHandle &FFT_plan_r2c, cudaStream_t stream); +CUDA_LIB_EXPORT void FFT3D(int Nfft, T *input_tensor, Complex *output_tensor, const cufftHandle &FFT_plan_r2c, + cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/ifft_3d_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/ifft_3d_impl.cuh index 68d12d08cae..0d740e97a6b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/ifft_3d_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/ifft_3d_impl.cuh @@ -19,8 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h" #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" template -void IFFT3D(int Nfft, Complex *input_tensor, T *output_tensor, const cufftHandle &FFT_plan_c2r, cudaStream_t stream); +CUDA_LIB_EXPORT void IFFT3D(int Nfft, Complex *input_tensor, T *output_tensor, const cufftHandle &FFT_plan_c2r, + cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_batched_fft_2d_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_batched_fft_2d_impl.cuh index 99fca45954f..71f1aa919d5 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_batched_fft_2d_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_batched_fft_2d_impl.cuh @@ -19,9 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h" #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" template -void PMEBatchedFFT2D(Complex *input_tensor, Complex *output_tensor, - const cufftHandle &FFT_plan_c2c, int direction, cudaStream_t stream); +CUDA_LIB_EXPORT void PMEBatchedFFT2D(Complex *input_tensor, Complex *output_tensor, + const cufftHandle &FFT_plan_c2c, int direction, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_impl.cuh index f5697b779ee..24b0db6d046 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_impl.cuh @@ -18,13 +18,15 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, - float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, const int *uint_crd_f, - const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, const float *scaler_f, - const int *excluded_list_start, const int *excluded_list, const int *excluded_atom_numbers, - float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, float *d_correction_ene, - dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall, const cufftHandle &PME_plan_r2c, - const cufftHandle &PME_plan_c2r, cudaStream_t stream); +CUDA_LIB_EXPORT void PMEEnergy(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, + float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, + const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, + int *nl, const float *scaler_f, const int *excluded_list_start, const int *excluded_list, + const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, + float *d_direct_ene, float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft, + int PME_Nall, const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, + cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_update_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_update_impl.cuh index 49de269ca66..3a26a7eccc4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_update_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_energy_update_impl.cuh @@ -18,14 +18,17 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, - float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, - const int *uint_crd_f, const float *charge, int *nl_atom_numbers, int *nl_atom_serial, int *nl, - const float *scaler_f, const int *excluded_list_start, const int *excluded_list, - const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, float *d_direct_ene, - float *d_correction_ene, dim3 thread_PME, int PME_Nin, int PME_Nfft, int PME_Nall, - const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, float *neutralizing_factor, - float *charge_sum, int max_neighbor_numbers, cudaStream_t stream); +CUDA_LIB_EXPORT void PMEEnergyUpdate(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, + int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, + int *pme_kxyz, const int *uint_crd_f, const float *charge, int *nl_atom_numbers, + int *nl_atom_serial, int *nl, const float *scaler_f, + const int *excluded_list_start, const int *excluded_list, + const int *excluded_atom_numbers, float *d_reciprocal_ene, float *d_self_ene, + float *d_direct_ene, float *d_correction_ene, dim3 thread_PME, int PME_Nin, + int PME_Nfft, int PME_Nall, const cufftHandle &PME_plan_r2c, + const cufftHandle &PME_plan_c2r, float *neutralizing_factor, float *charge_sum, + int max_neighbor_numbers, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh index 8cbbf282054..bd08b11c1d8 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_excluded_force_impl.cuh @@ -18,9 +18,11 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, const float *sacler_f, - const float *charge, const int *excluded_list_start, const int *excluded_list, - const int *excluded_atom_numbers, float *frc_f, cudaStream_t stream); +CUDA_LIB_EXPORT void PMEExcludedForce(const int atom_numbers, const float pme_beta, const int *uint_crd_f, + const float *sacler_f, const float *charge, const int *excluded_list_start, + const int *excluded_list, const int *excluded_atom_numbers, float *frc_f, + cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_fft_1d_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_fft_1d_impl.cuh index f3bcccfc3d3..0ecde226b81 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_fft_1d_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_fft_1d_impl.cuh @@ -21,7 +21,7 @@ #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" template -void PMEFFT1D(int Nfft, Complex *input_tensor, Complex *output_tensor, - const cufftHandle &FFT_plan_c2c, cudaStream_t stream); +CUDA_LIB_EXPORT void PMEFFT1D(int Nfft, Complex *input_tensor, Complex *output_tensor, + const cufftHandle &FFT_plan_c2c, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_fft_2d_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_fft_2d_impl.cuh index 87b10052f4a..3218a1ac46e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_fft_2d_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_fft_2d_impl.cuh @@ -21,7 +21,7 @@ #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" template -void PMEFFT2D(int Nfft, Complex *input_tensor, Complex *output_tensor, - const cufftHandle &FFT_plan_c2c, cudaStream_t stream); +CUDA_LIB_EXPORT void PMEFFT2D(int Nfft, Complex *input_tensor, Complex *output_tensor, + const cufftHandle &FFT_plan_c2c, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_ifft_1d_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_ifft_1d_impl.cuh index f486472bb00..524195b0d7c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_ifft_1d_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_ifft_1d_impl.cuh @@ -21,7 +21,7 @@ #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" template -void PMEIFFT1D(int Nfft, Complex *input_tensor, Complex *output_tensor, - const cufftHandle &FFT_plan_c2c, cudaStream_t stream); +CUDA_LIB_EXPORT void PMEIFFT1D(int Nfft, Complex *input_tensor, Complex *output_tensor, + const cufftHandle &FFT_plan_c2c, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_ifft_2d_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_ifft_2d_impl.cuh index c598ddbbdd6..4d3e7f44a7e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_ifft_2d_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_ifft_2d_impl.cuh @@ -21,7 +21,7 @@ #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" template -void PMEIFFT2D(int Nfft, Complex *input_tensor, Complex *output_tensor, - const cufftHandle &FFT_plan_c2c, cudaStream_t stream); +CUDA_LIB_EXPORT void PMEIFFT2D(int Nfft, Complex *input_tensor, Complex *output_tensor, + const cufftHandle &FFT_plan_c2c, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_irfft_2d_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_irfft_2d_impl.cuh index 40b76c890ee..3610537d68d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_irfft_2d_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_irfft_2d_impl.cuh @@ -21,7 +21,7 @@ #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" template -void PMEIRFFT2D(int Nfft, Complex *input_tensor, T *output_tensor, - const cufftHandle &FFT_plan_c2r, cudaStream_t stream); +CUDA_LIB_EXPORT void PMEIRFFT2D(int Nfft, Complex *input_tensor, T *output_tensor, const cufftHandle &FFT_plan_c2r, + cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh index e21cd655f6c..5faa797bdef 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_reciprocal_force_impl.cuh @@ -24,10 +24,11 @@ struct _VECTOR { float y; float z; }; -void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, int *pme_uxyz, - float *pme_frxyz, float *PME_Q, float *pme_fq, int *PME_atom_near, int *pme_kxyz, - const int *uint_crd_f, const float *charge, float *force, int PME_Nin, int PME_Nall, - int PME_Nfft, const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, - const _VECTOR &PME_inverse_box_vector, cudaStream_t stream); +CUDA_LIB_EXPORT void PMEReciprocalForce(int fftx, int ffty, int fftz, int atom_numbers, float beta, float *PME_BC, + int *pme_uxyz, float *pme_frxyz, float *PME_Q, float *pme_fq, + int *PME_atom_near, int *pme_kxyz, const int *uint_crd_f, const float *charge, + float *force, int PME_Nin, int PME_Nall, int PME_Nfft, + const cufftHandle &PME_plan_r2c, const cufftHandle &PME_plan_c2r, + const _VECTOR &PME_inverse_box_vector, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_rfft_2d_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_rfft_2d_impl.cuh index 27569bedcf4..f6648bb33e2 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_rfft_2d_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/pme/pme_rfft_2d_impl.cuh @@ -19,9 +19,10 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h" #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" template -void PMERFFT2D(int Nfft, T *input_tensor, Complex *output_tensor, const cufftHandle &FFT_plan_r2c, - cudaStream_t stream); +CUDA_LIB_EXPORT void PMERFFT2D(int Nfft, T *input_tensor, Complex *output_tensor, const cufftHandle &FFT_plan_r2c, + cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_energy_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_energy_impl.cuh index a58fca6cbfe..6402e158c1d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_energy_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_energy_impl.cuh @@ -19,7 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void restrainenergy(int restrain_numbers, int atom_numbers, float weight, const int *restrain_list, const float *crd_f, - const float *crd_ref, const float *boxlength_f, float *ene, cudaStream_t stream); +CUDA_LIB_EXPORT void restrainenergy(int restrain_numbers, int atom_numbers, float weight, const int *restrain_list, + const float *crd_f, const float *crd_ref, const float *boxlength_f, float *ene, + cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_RESTRAIN_RESTRAIN_ENERGY_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_force_atom_energy_virial_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_force_atom_energy_virial_impl.cuh index 94f3398e8d8..099bd7435a3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_force_atom_energy_virial_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_force_atom_energy_virial_impl.cuh @@ -19,9 +19,11 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void restrainforcewithatomenergyandvirial(int restrain_numbers, int atom_numbers, const int *restrain_list, - const float *crd_f, const float *crd_ref_f, const float weight, - const float *boxlength_f, float *atom_ene, float *atom_virial, float *frc_f, - cudaStream_t stream); +CUDA_LIB_EXPORT void restrainforcewithatomenergyandvirial(int restrain_numbers, int atom_numbers, + const int *restrain_list, const float *crd_f, + const float *crd_ref_f, const float weight, + const float *boxlength_f, float *atom_ene, float *atom_virial, + float *frc_f, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_RESTRAIN_RESTRAIN_FORCE_ATOM_ENERGY_VIRIAL_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_force_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_force_impl.cuh index 1788b9c55c3..f1cd09b0298 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_force_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/restrain/restrain_force_impl.cuh @@ -19,8 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void restrainforce(int restrain_numbers, int atom_numbers, const int *restrain_list, const int *uint_crd_f, - const int *uint_crd_ref, const float factor, const float *scaler_f, float *frc_f, - cudaStream_t stream); +CUDA_LIB_EXPORT void restrainforce(int restrain_numbers, int atom_numbers, const int *restrain_list, + const int *uint_crd_f, const int *uint_crd_ref, const float factor, + const float *scaler_f, float *frc_f, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_RESTRAIN_RESTAIN_FORCE_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh index 6af297ba7a0..502e6e65823 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_cycle_impl.cuh @@ -23,10 +23,12 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void Constrain_Force_Cycle(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f, - const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f, - const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs, - const float *constrain_ks, float *test_frc_f, cudaStream_t stream); +CUDA_LIB_EXPORT void Constrain_Force_Cycle(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f, + const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f, + const int *atom_i_serials, const int *atom_j_serials, + const float *constant_rs, const float *constrain_ks, float *test_frc_f, + cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh index 3c77e8d0355..bbc9064d1e9 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_cycle_with_virial_impl.cuh @@ -23,10 +23,12 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f, - const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f, - const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs, - const float *constrain_ks, float *test_frc_f, float *d_atom_virial, - cudaStream_t stream); +CUDA_LIB_EXPORT void Constrain_Force_Cycle_With_Virial(int atom_numbers, int constrain_pair_numbers, + const unsigned int *uint_crd_f, const float *scaler_f, + float *constrain_pair_f, const float *pair_dr_f, + const int *atom_i_serials, const int *atom_j_serials, + const float *constant_rs, const float *constrain_ks, + float *test_frc_f, float *d_atom_virial, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh index 05b351c053b..289455b9327 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/constrain_force_virial_impl.cuh @@ -23,26 +23,27 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void constrain_force_cycle_update(int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f, - const float *scaler_f, float *constrain_pair_f, const float *pair_dr_f, - const int *atom_i_serials, const int *atom_j_serials, const float *constant_rs, - const float *constrain_ks, float *test_frc_f, cudaStream_t stream); +CUDA_LIB_EXPORT void constrain_force_cycle_update(int atom_numbers, int constrain_pair_numbers, + const unsigned int *uint_crd_f, const float *scaler_f, + float *constrain_pair_f, const float *pair_dr_f, + const int *atom_i_serials, const int *atom_j_serials, + const float *constant_rs, const float *constrain_ks, + float *test_frc_f, cudaStream_t stream); -void constrain_force_cycle_with_virial_update(int atom_numbers, int constrain_pair_numbers, - const unsigned int *uint_crd_f, const float *scaler_f, - float *constrain_pair_f, const float *pair_dr_f, - const int *atom_i_serials, const int *atom_j_serials, - const float *constant_rs, const float *constrain_ks, float *test_frc_f, - float *d_atom_virial, cudaStream_t stream); +CUDA_LIB_EXPORT void constrain_force_cycle_with_virial_update( + int atom_numbers, int constrain_pair_numbers, const unsigned int *uint_crd_f, const float *scaler_f, + float *constrain_pair_f, const float *pair_dr_f, const int *atom_i_serials, const int *atom_j_serials, + const float *constant_rs, const float *constrain_ks, float *test_frc_f, float *d_atom_virial, cudaStream_t stream); -void refresh_uint_crd_update(int atom_numbers, float half_exp_gamma_plus_half, const float *crd_f, - const float *quarter_crd_to_uint_crd_cof_f, float *test_frc_f, const float *mass_inverse, - unsigned int *uint_crd_f, cudaStream_t stream); +CUDA_LIB_EXPORT void refresh_uint_crd_update(int atom_numbers, float half_exp_gamma_plus_half, const float *crd_f, + const float *quarter_crd_to_uint_crd_cof_f, float *test_frc_f, + const float *mass_inverse, unsigned int *uint_crd_f, cudaStream_t stream); -void set_zero_force_with_virial(int atom_numbers, int constrain_pair_numbers, float *test_frc_f, float *d_atom_virial, - cudaStream_t stream); +CUDA_LIB_EXPORT void set_zero_force_with_virial(int atom_numbers, int constrain_pair_numbers, float *test_frc_f, + float *d_atom_virial, cudaStream_t stream); -void set_zero(int numbers, float *x, cudaStream_t stream); +CUDA_LIB_EXPORT void set_zero(int numbers, float *x, cudaStream_t stream); #endif diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/last_crd_to_dr_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/last_crd_to_dr_impl.cuh index 2d71397680a..becb141bfe0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/last_crd_to_dr_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/last_crd_to_dr_impl.cuh @@ -19,9 +19,11 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void lastcrdtodr(int constrain_pair_numbers, const float *atom_crd_f, const float *quarter_crd_to_uint_crd_cof_f, - const float *uint_dr_to_dr_f, float *constrain_pair_f, const int *atom_i_serials, - const int *atom_j_serials, const float *constant_rs, const float *constrain_ks, float *pair_dr_f, - cudaStream_t stream); +CUDA_LIB_EXPORT void lastcrdtodr(int constrain_pair_numbers, const float *atom_crd_f, + const float *quarter_crd_to_uint_crd_cof_f, const float *uint_dr_to_dr_f, + float *constrain_pair_f, const int *atom_i_serials, const int *atom_j_serials, + const float *constant_rs, const float *constrain_ks, float *pair_dr_f, + cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_LAST_CRD_TO_DR_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/refresh_crd_vel_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/refresh_crd_vel_impl.cuh index a3c3d40f452..f3c27147830 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/refresh_crd_vel_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/refresh_crd_vel_impl.cuh @@ -19,7 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void refreshcrdvel(int atom_numbers, float dt_inverse, float dt, float exp_gamma, float half_exp_gamma_plus_half, - float *test_frc_f, float *mass_inverse, float *crd_f, float *vel_f, cudaStream_t stream); +CUDA_LIB_EXPORT void refreshcrdvel(int atom_numbers, float dt_inverse, float dt, float exp_gamma, + float half_exp_gamma_plus_half, float *test_frc_f, float *mass_inverse, float *crd_f, + float *vel_f, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_REFRESH_CRD_VEL_IMPL_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/refresh_uint_crd_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/refresh_uint_crd_impl.cuh index ecdb5c7ec72..1a6e3df7d9c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/refresh_uint_crd_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/sponge/simple_constrain/refresh_uint_crd_impl.cuh @@ -19,8 +19,9 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" -void refreshuintcrd(int atom_numbers, float half_exp_gamma_plus_half, const float *crd_f, - const float *quarter_crd_to_uint_crd_cof_f, const float *test_frc_f, const float *mass_inverse, - unsigned int *uint_crd_f, cudaStream_t stream); +CUDA_LIB_EXPORT void refreshuintcrd(int atom_numbers, float half_exp_gamma_plus_half, const float *crd_f, + const float *quarter_crd_to_uint_crd_cof_f, const float *test_frc_f, + const float *mass_inverse, unsigned int *uint_crd_f, cudaStream_t stream); #endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_SIMPLE_CONSTRAIN_REFRESH_UINT_CRD_IMPL_H_ diff --git a/mindspore/ccsrc/profiler/device/profiling.h b/mindspore/ccsrc/profiler/device/profiling.h index 209b21ef12d..3a4ddfc2c23 100644 --- a/mindspore/ccsrc/profiler/device/profiling.h +++ b/mindspore/ccsrc/profiler/device/profiling.h @@ -27,6 +27,7 @@ #include #include #include "utils/hash_map.h" +#include "include/backend/visible.h" namespace mindspore { namespace profiler { @@ -122,7 +123,7 @@ class Profiler { std::string profiling_options_; private: - inline static HashMap> instance_map_ = {}; + BACKEND_EXPORT inline static std::map> instance_map_ = {}; }; } // namespace profiler } // namespace mindspore diff --git a/mindspore/ccsrc/ps/util.cc b/mindspore/ccsrc/ps/util.cc index b231efd4088..905af140306 100644 --- a/mindspore/ccsrc/ps/util.cc +++ b/mindspore/ccsrc/ps/util.cc @@ -24,26 +24,28 @@ namespace mindspore { namespace ps { -mindspore::HashMap Util::optimizer_to_ids{ +namespace { +static mindspore::HashMap optimizer_to_ids = { {kApplyMomentum, 0}, {kSparseAdam, 1}, {kSparseLazyAdam, 2}, {kSparseFtrl, 3}, }; -mindspore::HashMap Util::id_to_optimizers{ +static mindspore::HashMap id_to_optimizers = { {0, kApplyMomentum}, {1, kSparseAdam}, {2, kSparseLazyAdam}, {3, kSparseFtrl}, }; -mindspore::HashMap Util::id_to_optimizer_nodes{ +static mindspore::HashMap id_to_optimizer_nodes = { {0, kApplyMomentumOp}, {1, kSparseAdamOp}, {2, kSparseLazyAdamOp}, {3, kSparseFtrlOp}, }; +} // namespace bool Util::IsRoleOfPServer() { return PSContext::instance()->is_server(); } diff --git a/mindspore/ccsrc/ps/util.h b/mindspore/ccsrc/ps/util.h index ad39c6db670..3508b3487ad 100644 --- a/mindspore/ccsrc/ps/util.h +++ b/mindspore/ccsrc/ps/util.h @@ -67,11 +67,6 @@ class BACKEND_EXPORT Util { static void DoFusion(const FuncGraphPtr &func_graph, const std::string &cnode_name, const std::string &fused_cnode_name); static kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const std::vector &node_list); - - static mindspore::HashMap optimizer_to_ids; - static mindspore::HashMap id_to_optimizers; - static mindspore::HashMap id_to_optimizer_nodes; - static int64_t rank_id_; }; } // namespace ps } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/hal/device/distribution/collective_fake_init.cc b/mindspore/ccsrc/runtime/collective/collective_fake_init.cc similarity index 95% rename from mindspore/ccsrc/plugin/device/gpu/hal/device/distribution/collective_fake_init.cc rename to mindspore/ccsrc/runtime/collective/collective_fake_init.cc index 6bc3cd5e513..b639bf3e505 100644 --- a/mindspore/ccsrc/plugin/device/gpu/hal/device/distribution/collective_fake_init.cc +++ b/mindspore/ccsrc/runtime/collective/collective_fake_init.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "plugin/device/gpu/hal/device/distribution/collective_fake_init.h" +#include "runtime/collective/collective_fake_init.h" #include "utils/log_adapter.h" namespace mindspore { diff --git a/mindspore/ccsrc/plugin/device/gpu/hal/device/distribution/collective_fake_init.h b/mindspore/ccsrc/runtime/collective/collective_fake_init.h similarity index 100% rename from mindspore/ccsrc/plugin/device/gpu/hal/device/distribution/collective_fake_init.h rename to mindspore/ccsrc/runtime/collective/collective_fake_init.h diff --git a/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.cc b/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.cc index 3ca7182f95e..d78342c6626 100644 --- a/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.cc +++ b/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.cc @@ -252,8 +252,7 @@ bool IsNeedPadding(const std::string &format, size_t shape_size) { if (shape_size == 0) { return false; } - if (format == kOpFormat_DEFAULT || format == kOpFormat_NCHW || - kNoPaddingFormatSet.find(format) != kNoPaddingFormatSet.end()) { + if (format == kOpFormat_DEFAULT || format == kOpFormat_NCHW || IsOneOfNoPaddingFormat(format)) { return false; } else if (shape_size < kDim4) { return true; @@ -494,12 +493,12 @@ ShapeVector DeviceShapeTransfer::TransCore(const ShapeVector &shape, const std:: return NDRNNBiasDeviceShape(shape, type, input_hidden_size[1]); } auto temp_shape = shape; - if (kNoPaddingFormatSet.find(format) == kNoPaddingFormatSet.end() && format != kOpFormat_FRACTAL_ZN_LSTM && - shape.size() < kDim4 && k3DFormatSet.find(format) == k3DFormatSet.end()) { + if (!IsOneOfNoPaddingFormat(format) && format != kOpFormat_FRACTAL_ZN_LSTM && shape.size() < kDim4 && + !IsOneOf3DFormat(format)) { MS_LOG(WARNING) << "Origin shape size is less than 4, should be Padding shape by Default firstly"; temp_shape = PaddingShapeTo4dDefault(shape); } - if (shape.size() != kDim5 && k3DFormatSet.find(format) != k3DFormatSet.end()) { + if (shape.size() != kDim5 && IsOneOf3DFormat(format)) { temp_shape = PaddingShapeTo5dDefault(shape); } auto iter = device_shape_map.find(format); @@ -1799,11 +1798,11 @@ RangePair ShapeRangeTransfer::GetRealRange(const RangePair &ori_range, const std return FRAC_NZRange(ori_range, type); } auto temp_range = ori_range; - if (ori_range.size() < kDim4 && k3DFormatSet.find(format) == k3DFormatSet.end()) { + if (ori_range.size() < kDim4 && !IsOneOf3DFormat(format)) { MS_LOG(DEBUG) << "A special format:" << format << " with a range size less than 4, so padding the range firstly"; temp_range = PaddingRangeTo4D(ori_range, padding_str); } - if (ori_range.size() < kDim5 && k3DFormatSet.find(format) != k3DFormatSet.end()) { + if (ori_range.size() < kDim5 && IsOneOf3DFormat(format)) { MS_LOG(DEBUG) << "A special format:" << format << " with a range size less than 5, so padding the range firstly"; temp_range = PaddingRangeTo5D(ori_range, padding_str); } diff --git a/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.h b/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.h index 2a2f443ebe1..c912441f671 100644 --- a/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.h +++ b/mindspore/ccsrc/runtime/device/ms_device_shape_transfer.h @@ -443,7 +443,7 @@ std::vector PaddingShape(const std::vector &shape, const std::string &form << ", detail info: " << node->DebugString(); } std::vector host_shape; - if (k3DFormatSet.find(format) != k3DFormatSet.end()) { + if (IsOneOf3DFormat(format)) { if (shape.size() >= kDim5) { return shape; } @@ -466,7 +466,7 @@ std::vector StringToAxisVector(const std::vector &shape, const std::stri } std::vector padding_axis; - if (k3DFormatSet.find(format) != k3DFormatSet.end()) { + if (IsOneOf3DFormat(format)) { if (shape.size() >= kDim5) { return padding_axis; } diff --git a/mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc b/mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc index dd3ffa78ad6..eb26de0b234 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc @@ -481,8 +481,7 @@ std::set FetchNopNodeNotSupportEliminate(const KernelGraph *const grap } // kernel not support multi-thread execute will be inited in launch kernel, so its input cannot be eliminated. - if (kOpNotSupportMultiThreadExecList.find(common::AnfAlgo::GetCNodeName(cnode)) != - kOpNotSupportMultiThreadExecList.end() || + if (IsOneOfNotSupportMultiThreadExec(common::AnfAlgo::GetCNodeName(cnode)) || (kCPUOpNoEliminateList.find(common::AnfAlgo::GetCNodeName(cnode)) != kCPUOpNoEliminateList.end())) { const auto &inputs = cnode->inputs(); for (const auto &input : inputs) { diff --git a/mindspore/ccsrc/runtime/pynative/run_op_helper.cc b/mindspore/ccsrc/runtime/pynative/run_op_helper.cc index 81e4b1e9aa8..a777339a340 100644 --- a/mindspore/ccsrc/runtime/pynative/run_op_helper.cc +++ b/mindspore/ccsrc/runtime/pynative/run_op_helper.cc @@ -503,7 +503,7 @@ void ReleaseKernelResource(const KernelGraphPtr &graph) { const auto &kernels = graph->execution_order(); for (const auto &kernel : kernels) { MS_EXCEPTION_IF_NULL(kernel); - if (kOpCacheBlackList.find(common::AnfAlgo::GetCNodeName(kernel)) != kOpCacheBlackList.end()) { + if (IsOneOfCacheBlackList(common::AnfAlgo::GetCNodeName(kernel))) { auto kernel_mod = AnfAlgo::GetKernelMod(kernel); if (kernel_mod) { kernel_mod->ReleaseResource(); diff --git a/mindspore/ccsrc/transform/graph_ir/df_graph_manager.h b/mindspore/ccsrc/transform/graph_ir/df_graph_manager.h index 9f3b0134fc8..9165fc4266d 100644 --- a/mindspore/ccsrc/transform/graph_ir/df_graph_manager.h +++ b/mindspore/ccsrc/transform/graph_ir/df_graph_manager.h @@ -24,12 +24,13 @@ #include #include "include/transform/graph_ir/types.h" #include "ir/anf.h" +#include "include/backend/visible.h" namespace mindspore { namespace transform { class GraphRunner; -class DfGraphManager { +class BACKEND_EXPORT DfGraphManager { public: ~DfGraphManager(); void ClearGraph() noexcept; diff --git a/mindspore/ccsrc/utils/anfalgo.cc b/mindspore/ccsrc/utils/anfalgo.cc index b84a55024a2..13321809bf5 100644 --- a/mindspore/ccsrc/utils/anfalgo.cc +++ b/mindspore/ccsrc/utils/anfalgo.cc @@ -815,7 +815,7 @@ bool AnfAlgo::IsUpdateParameterKernel(const CNodePtr &node) { if (HasNodeAttr(kAttrAsync, node) && GetNodeAttr(node, kAttrAsync)) { return false; } - if (kOptOperatorSet.find(node_name) == kOptOperatorSet.end() && node_name.find("Assign") == string::npos) { + if (!IsOneOfOperator(node_name) && node_name.find("Assign") == string::npos) { return false; } return true; @@ -1031,7 +1031,7 @@ void FindDelayExecPosition(const std::vector &nodes, size_t current_in auto &child = nodes[j]; auto child_name = AnfAlgo::GetCNodeName(child); if (child_name == kAssignAddOpName || child_name == kAssignSubOpName || child_name == kAssignOpName || - kOptOperatorSet.find(child_name) != kOptOperatorSet.end()) { + IsOneOfOperator(child_name)) { return; } @@ -1112,7 +1112,7 @@ void AnfAlgo::ReorderPosteriorExecList(NotNull *> node_lis for (const auto &node : *node_list) { MS_EXCEPTION_IF_NULL(node); - if (kPosteriorOperatorSet.find(AnfAlgo::GetCNodeName(node)) != kPosteriorOperatorSet.end()) { + if (IsOneOfPosteriorOperator(AnfAlgo::GetCNodeName(node))) { posterior_node_list.emplace_back(node); } else { ordinary_node_list.emplace_back(node); diff --git a/mindspore/ccsrc/utils/utils.cc b/mindspore/ccsrc/utils/utils.cc new file mode 100644 index 00000000000..2b368430a76 --- /dev/null +++ b/mindspore/ccsrc/utils/utils.cc @@ -0,0 +1,214 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "include/common/utils/utils.h" +#include +#include +namespace mindspore { +bool IsOneOfPosteriorOperator(const std::string &name) { + const std::set kPosteriorOperatorSet = {kPullOpName}; + + auto iter = kPosteriorOperatorSet.find(name); + return iter != kPosteriorOperatorSet.end(); +} + +bool IsOneOfCacheBlackList(const std::string &name) { + const std::set kOpCacheBlackList = {kUniformCandidateSamplerOpName, kInitDatasetQueueOpName, + kGetNextOpName}; + + auto iter = kOpCacheBlackList.find(name); + return iter != kOpCacheBlackList.end(); +} + +bool IsOneOfNotSupportMultiThreadExec(const std::string &name) { + const std::set kOpNotSupportMultiThreadExecList = {kAvgPoolOpName, kAvgPoolGradOpName, kMaxPoolOpName, + kBatchNorm, kBatchNormGradOpName}; + + auto iter = kOpNotSupportMultiThreadExecList.find(name); + return iter != kOpNotSupportMultiThreadExecList.end(); +} + +bool IsOneOf3DFormat(const std::string &format) { + const std::set k3DFormatSet = {kOpFormat_NCDHW, kOpFormat_NDC1HWC0, kOpFormat_FRACTAL_Z_3D, + kOpFormat_NDHWC, kOpFormat_DHWCN, kOpFormat_DHWNC}; + + auto iter = k3DFormatSet.find(format); + return iter != k3DFormatSet.end(); +} + +bool IsOneOfNoPaddingFormat(const std::string &format) { + const std::set kNoPaddingFormatSet = {kOpFormat_ChannelLast, kOpFormat_FRAC_NZ, kOpFormat_FRACTAL_ZN_RNN, + kOpFormat_ND_RNN_BIAS}; + + auto iter = kNoPaddingFormatSet.find(format); + return iter != kNoPaddingFormatSet.end(); +} + +bool IsOneOfDynamicShapeConstInputToAttr(const std::string &name) { + const std::set DynamicShapeConstInputToAttr = { + kCastOpName, kExpandDimsOpName, kEmbeddingLookupOpName, kReduceMinOpName, kReduceMeanOpName, + kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName, kTransposeOpName}; + + auto iter = DynamicShapeConstInputToAttr.find(name); + return iter != DynamicShapeConstInputToAttr.end(); +} + +bool IsOneOfDynamicShapeConstInputToAttrCPU(const std::string &name) { + const std::set DynamicShapeConstInputToAttrCPU = { + kCastOpName, kExpandDimsOpName, kEmbeddingLookupOpName, kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, + kReduceAllOpName, kReduceAnyOpName, kConcatOpName, kReduceSumOpName, kTransposeOpName}; + + auto iter = DynamicShapeConstInputToAttrCPU.find(name); + return iter != DynamicShapeConstInputToAttrCPU.end(); +} + +bool IsOneOfDynamicShapeConstInputToAttrGPU(const std::string &name) { + const std::set DynamicShapeConstInputToAttrGPU = { + kCastOpName, kExpandDimsOpName, kReshapeOpName, kEmbeddingLookupOpName, kTransposeOpName, + kReduceSumOpName, kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName, + kReduceAnyOpName, kConcatOpName, kScatterNdOpName, kGatherV2OpName, kAvgPool3DGradOpName}; + + auto iter = DynamicShapeConstInputToAttrGPU.find(name); + return iter != DynamicShapeConstInputToAttrGPU.end(); +} + +bool IsOneOfCustomAkgType(const std::string &name) { + const std::set kCustomTypeAkg = {"ir_builder", "tvm_compute", "hybrid"}; + + auto iter = kCustomTypeAkg.find(name); + return iter != kCustomTypeAkg.end(); +} + +bool IsOneOfOperator(const std::string &name) { + const std::set kOptOperatorSet = {kMomentumOpName, + kApplyMomentumOpName, + kApplyAdadeltaOpName, + kApplyAdagradOpName, + kApplyAdagradDAName, + kApplyAdamOpName, + kApplyAdaMaxOpName, + kApplyAddSignOpName, + kApplyCenteredRMSPOpName, + kApplyFtrlOpName, + kApplyFtrlV2OpName, + kApplyGradientDescentOpName, + kApplyPowerSignOpName, + kApplyProximalAdagradOpName, + kApplyProximalGradientDescentOpName, + kApplyRMSPropOpName, + kAdamApplyOneWithDecayOpName, + kAdamApplyOneWithDecayAssignOpName, + kFusedAdamWeightDecayName, + kAdamWeightDecayName, + kFusedCastAdamWeightDecayName, + kFusedAdamName, + kFusedAdaFactorName, + kFusedAdaFactorWithGlobalNormName, + kFusedSparseAdamName, + kFusedMulApplyMomentumOpName, + kFusedWeightScaleApplyMomentum, + kFusedScaleApplyMomentum, + kApplyCenteredRMSPropOpName, + kFusedSparseFtrlName, + kFusedSparseProximalAdagradName, + kFusedSparseLazyAdamName, + kSparseApplyFtrlName, + kSparseApplyFtrlV2Name, + kSGDName, + kLARSUpdateName, + kCombineMomentumWeightOpName, + kCombineMomentumOpName, + kScatterAddOpName, + kScatterUpdateOpName, + kSparseApplyProximalAdagradOpName}; + + auto iter = kOptOperatorSet.find(name); + return iter != kOptOperatorSet.end(); +} + +bool IsOneOfComputeDepend(const std::string &name) { + const std::set kComputeDepend = {kUniqueOpName, + kUniqueConsecutiveOpName, + kComputeAccidentalHitsOpName, + kSubAndFilterOpName, + kPadAndShiftOpName, + kCTCGreedyDecoderOpName, + kMaskedSelectOpName, + kDynamicStitchOpName, + kGetNextOpName, + kListDiffOpName, + kNonMaxSuppressionV3OpName, + kNonMaxSuppressionWithOverlapsOpName, + kCoalesceOpName, + kTruncatedNormal, + kNonDeterministicInts, + kFractionalAvgPoolGradOpName, + kDenseToDenseSetOperation, + kSegmentMaxOpName, + kCSRSparseMatrixToSparseTensorOpName, + kSegmentMinOpName, + kLuUnpackOpName, + kSegmentSumOpName, + kResizeBicubicOpName, + kResizeAreaOpName, + kSegmentMeanOpName, + kSegmentProdOpName, + kNonZeroOpName, + kSparseSparseMinimumOpName, + kRpcRecvOpName, + kAdaptiveMaxPool3DGradOpName}; + + auto iter = kComputeDepend.find(name); + return iter != kComputeDepend.end(); +} + +bool IsOneOfHWSpecialFormat(const std::string &format) { + const std::set kHWSpecialFormatSet = { + kOpFormat_FRACTAL_Z_3D, kOpFormat_NC1KHKWHWC0, kOpFormat_NC1HWC0, kOpFormat_FRAC_NZ, + kOpFormat_C1HWNCoC0, kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04, kOpFormat_FRACTAL_ZN_LSTM, + kOpFormat_FRACTAL_ZN_RNN, kOpFormat_NDC1HWC0, kOpFormat_FRAC_Z}; + + auto iter = kHWSpecialFormatSet.find(format); + return iter != kHWSpecialFormatSet.end(); +} + +bool IsOneOfFormat(const std::string &format) { + const std::set kOpFormatList = {kOpFormat_DEFAULT, + kOpFormat_NC1KHKWHWC0, + kOpFormat_ND, + kOpFormat_NCHW, + kOpFormat_NHWC, + kOpFormat_HWCN, + kOpFormat_NC1HWC0, + kOpFormat_FRAC_Z, + kOpFormat_C1HWNCoC0, + kOpFormat_FRAC_NZ, + kOpFormat_NC1HWC0_C04, + kOpFormat_FRACTAL_Z_C04, + kOpFormat_NDHWC, + kOpFormat_FRACTAL_ZN_LSTM, + kOpFormat_FRACTAL_ZN_RNN, + kOpFormat_ND_RNN_BIAS, + kOpFormat_NDC1HWC0, + kOpFormat_NCDHW, + kOpFormat_FRACTAL_Z_3D, + kOpFormat_DHWNC, + kOpFormat_DHWCN}; + + auto iter = kOpFormatList.find(format); + return iter != kOpFormatList.end(); +} +} // namespace mindspore diff --git a/mindspore/lite/src/extendrt/CMakeLists.txt b/mindspore/lite/src/extendrt/CMakeLists.txt index d6c0cbb06e4..53b6f145000 100644 --- a/mindspore/lite/src/extendrt/CMakeLists.txt +++ b/mindspore/lite/src/extendrt/CMakeLists.txt @@ -88,6 +88,7 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE) set(ANF_ALG_SRC ${ANF_ALG_SRC} ${CCSRC_DIR}/utils/anfalgo.cc + ${CCSRC_DIR}/utils/utils.cc ${CCSRC_DIR}/utils/parallel_context.cc ${CCSRC_DIR}/utils/convert_utils.cc) add_library(mindspore-infer-anfalgo OBJECT ${ANF_ALG_SRC}) diff --git a/mindspore/lite/src/extendrt/utils/kernel_build_utils.cc b/mindspore/lite/src/extendrt/utils/kernel_build_utils.cc index c5e97446b3b..7537a7405ae 100644 --- a/mindspore/lite/src/extendrt/utils/kernel_build_utils.cc +++ b/mindspore/lite/src/extendrt/utils/kernel_build_utils.cc @@ -405,7 +405,7 @@ std::pair SetKernelInfoWithMsg(const CNodePtr &kerne const std::string &op_name = common::AnfAlgo::GetCNodeName(kernel_node); if (IsPrimitiveCNode(kernel_node, prim::kPrimCustom)) { auto tp = common::AnfAlgo::GetNodeAttr(kernel_node, kAttrFuncType); - if (kCustomTypeAkg.find(tp) != kCustomTypeAkg.end()) { + if (IsOneOfCustomAkgType(tp)) { UpdateCustomKernelBuildInfo(kernel_node, true); return {}; }