plugin compiling independent
This commit is contained in:
parent
11cee9c0fa
commit
95b5c5b23c
|
@ -95,6 +95,22 @@ install(
|
|||
COMPONENT mindspore
|
||||
)
|
||||
|
||||
if(ENABLE_D)
|
||||
install(
|
||||
TARGETS mindspore_ascend
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
install(
|
||||
TARGETS mindspore_gpu
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
|
||||
if(USE_GLOG)
|
||||
install(FILES ${glog_LIBPATH}/libmindspore_glog.so.0.4.0
|
||||
DESTINATION ${INSTALL_LIB_DIR} RENAME libmindspore_glog.so.0 COMPONENT mindspore)
|
||||
|
|
|
@ -32,6 +32,22 @@ install(
|
|||
COMPONENT mindspore
|
||||
)
|
||||
|
||||
if(ENABLE_D)
|
||||
install(
|
||||
TARGETS mindspore_ascend
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
install(
|
||||
TARGETS mindspore_gpu
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif()
|
||||
|
||||
if(USE_GLOG)
|
||||
file(GLOB_RECURSE GLOG_LIB_LIST ${glog_LIBPATH}/libmindspore_glog*)
|
||||
install(
|
||||
|
|
|
@ -14,9 +14,6 @@ set(FBS_FILES
|
|||
ms_build_flatbuffers(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}../../schema generated_fbs_files ${SERVER_FLATBUFFER_OUTPUT})
|
||||
|
||||
if(ENABLE_D OR ENABLE_ACL)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/op_tiling)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling)
|
||||
set(MINDSPORE_RPATH
|
||||
|
@ -24,9 +21,77 @@ if(ENABLE_D OR ENABLE_ACL)
|
|||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling)
|
||||
endif()
|
||||
|
||||
if(ENABLE_D)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/ascend/kernel/aicpu/aicpu_ops)
|
||||
add_subdirectory(plugin/device/ascend/kernel/aicpu/aicpu_ops)
|
||||
if(ENABLE_GPU)
|
||||
find_package(CUDA REQUIRED)
|
||||
find_package(Threads)
|
||||
if(${CUDA_VERSION} VERSION_LESS ${MS_REQUIRE_CUDA_VERSION})
|
||||
message(FATAL_ERROR "The minimum CUDA version ${MS_REQUIRE_CUDA_VERSION} is required, \
|
||||
but only CUDA ${CUDA_VERSION} found.")
|
||||
endif()
|
||||
enable_language(CUDA)
|
||||
if(NOT CUDA_PATH OR CUDA_PATH STREQUAL "")
|
||||
if(DEFINED ENV{CUDA_HOME} AND NOT $ENV{CUDA_HOME} STREQUAL "")
|
||||
set(CUDA_PATH $ENV{CUDA_HOME})
|
||||
else()
|
||||
set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{CUDNN_HOME} AND NOT $ENV{CUDNN_HOME} STREQUAL "")
|
||||
set(CUDNN_INCLUDE_DIR $ENV{CUDNN_HOME}/include)
|
||||
set(CUDNN_LIBRARY_DIR $ENV{CUDNN_HOME}/lib64)
|
||||
find_path(CUDNN_INCLUDE_PATH cudnn.h HINTS ${CUDNN_INCLUDE_DIR} NO_DEFAULT_PATH)
|
||||
find_library(CUDNN_LIBRARY_PATH "cudnn" HINTS ${CUDNN_LIBRARY_DIR} NO_DEFAULT_PATH)
|
||||
find_library(CUBLAS_LIBRARY_PATH "cublas" HINTS ${CUDNN_LIBRARY_DIR})
|
||||
if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND)
|
||||
message(FATAL_ERROR "Failed to find cudnn header file, please set environment variable CUDNN_HOME to \
|
||||
cudnn installation position.")
|
||||
endif()
|
||||
if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND)
|
||||
message(FATAL_ERROR "Failed to find cudnn library file, please set environment variable CUDNN_HOME to \
|
||||
cudnn installation position.")
|
||||
endif()
|
||||
else()
|
||||
list(APPEND CMAKE_PREFIX_PATH ${CUDA_TOOLKIT_ROOT_DIR})
|
||||
find_path(CUDNN_INCLUDE_PATH cudnn.h PATH_SUFFIXES cuda/inclulde include cuda)
|
||||
find_library(CUDNN_LIBRARY_PATH "cudnn" PATH_SUFFIXES cuda/lib64 lib64 lib cuda/lib lib/x86_64-linux-gnu)
|
||||
find_library(CUBLAS_LIBRARY_PATH "cublas" PATH_SUFFIXES cuda/lib64 lib64 lib cuda/lib lib/x86_64-linux-gnu)
|
||||
if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND)
|
||||
message(FATAL_ERROR "Failed to find cudnn header file, if cudnn library is not installed, please put \
|
||||
cudnn header file in cuda include path or user include path(eg. /usr/local/cuda/include; \
|
||||
/usr/local/include; /usr/include), if cudnn library is installed in other position, please \
|
||||
set environment variable CUDNN_HOME to cudnn installation position, there should be cudnn.h \
|
||||
in {CUDNN_HOME}/include.")
|
||||
endif()
|
||||
if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND)
|
||||
message(FATAL_ERROR "Failed to find cudnn library file, if cudnn library is not installed, please put \
|
||||
cudnn library file in cuda library path or user library path(eg. /usr/local/cuda/lib64; \
|
||||
/usr/local/lib64; /usr/lib64; /usr/local/lib; /usr/lib), if cudnn library is installed in other \
|
||||
position, please set environment variable CUDNN_HOME to cudnn installation position, there should \
|
||||
be cudnn library file in {CUDNN_HOME}/lib64.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT CUPTI_INCLUDE_DIRS OR CUPTI_INCLUDE_DIRS STREQUAL "")
|
||||
set(CUPTI_INCLUDE_DIRS ${CUDA_PATH}/extras/CUPTI/include)
|
||||
endif()
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:${CUDA_PATH}/lib64)
|
||||
message("CUDA_PATH: ${CUDA_PATH}")
|
||||
message("CUDA_INCLUDE_DIRS: ${CUDA_INCLUDE_DIRS}")
|
||||
message("CUDNN_INCLUDE_PATH: ${CUDNN_INCLUDE_PATH}")
|
||||
message("CUDNN_LIBRARY_PATH: ${CUDNN_LIBRARY_PATH}")
|
||||
message("CUBLAS_LIBRARY_PATH: ${CUBLAS_LIBRARY_PATH}")
|
||||
message("CUPTI_INCLUDE_DIRS: ${CUPTI_INCLUDE_DIRS}")
|
||||
include_directories(${CUDNN_INCLUDE_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS} ${CUPTI_INCLUDE_DIRS})
|
||||
|
||||
list(APPEND CUDA_NVCC_FLAGS -arch=sm_53 --expt-relaxed-constexpr)
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||
list(APPEND CUDA_NVCC_FLAGS -G)
|
||||
message("CUDA_NVCC_FLAGS" ${CUDA_NVCC_FLAGS})
|
||||
endif()
|
||||
set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
|
||||
add_compile_definitions(ENABLE_GPU)
|
||||
endif()
|
||||
|
||||
if(ENABLE_CPU)
|
||||
|
@ -59,111 +124,6 @@ if(ENABLE_MPI)
|
|||
add_compile_definitions(ENABLE_MPI)
|
||||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
find_package(CUDA REQUIRED)
|
||||
find_package(Threads)
|
||||
if(${CUDA_VERSION} VERSION_LESS ${MS_REQUIRE_CUDA_VERSION})
|
||||
message(FATAL_ERROR "The minimum CUDA version ${MS_REQUIRE_CUDA_VERSION} is required, \
|
||||
but only CUDA ${CUDA_VERSION} found.")
|
||||
endif()
|
||||
enable_language(CUDA)
|
||||
if(NOT CUDA_PATH OR CUDA_PATH STREQUAL "")
|
||||
if(DEFINED ENV{CUDA_HOME} AND NOT $ENV{CUDA_HOME} STREQUAL "")
|
||||
set(CUDA_PATH $ENV{CUDA_HOME})
|
||||
else()
|
||||
set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{CUDNN_HOME} AND NOT $ENV{CUDNN_HOME} STREQUAL "")
|
||||
set(CUDNN_INCLUDE_DIR $ENV{CUDNN_HOME}/include)
|
||||
set(CUDNN_LIBRARY_DIR $ENV{CUDNN_HOME}/lib64)
|
||||
find_path(CUDNN_INCLUDE_PATH cudnn.h HINTS ${CUDNN_INCLUDE_DIR} NO_DEFAULT_PATH)
|
||||
find_library(CUDNN_LIBRARY_PATH "cudnn" HINTS ${CUDNN_LIBRARY_DIR} NO_DEFAULT_PATH)
|
||||
if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND)
|
||||
message(FATAL_ERROR "Failed to find cudnn header file, please set environment variable CUDNN_HOME to \
|
||||
cudnn installation position.")
|
||||
endif()
|
||||
if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND)
|
||||
message(FATAL_ERROR "Failed to find cudnn library file, please set environment variable CUDNN_HOME to \
|
||||
cudnn installation position.")
|
||||
endif()
|
||||
else()
|
||||
list(APPEND CMAKE_PREFIX_PATH ${CUDA_TOOLKIT_ROOT_DIR})
|
||||
find_path(CUDNN_INCLUDE_PATH cudnn.h PATH_SUFFIXES cuda/inclulde include cuda)
|
||||
find_library(CUDNN_LIBRARY_PATH "cudnn" PATH_SUFFIXES cuda/lib64 lib64 lib cuda/lib lib/x86_64-linux-gnu)
|
||||
if(CUDNN_INCLUDE_PATH STREQUAL CUDNN_INCLUDE_PATH-NOTFOUND)
|
||||
message(FATAL_ERROR "Failed to find cudnn header file, if cudnn library is not installed, please put \
|
||||
cudnn header file in cuda include path or user include path(eg. /usr/local/cuda/include; \
|
||||
/usr/local/include; /usr/include), if cudnn library is installed in other position, please \
|
||||
set environment variable CUDNN_HOME to cudnn installation position, there should be cudnn.h \
|
||||
in {CUDNN_HOME}/include.")
|
||||
endif()
|
||||
if(CUDNN_LIBRARY_PATH STREQUAL CUDNN_LIBRARY_PATH-NOTFOUND)
|
||||
message(FATAL_ERROR "Failed to find cudnn library file, if cudnn library is not installed, please put \
|
||||
cudnn library file in cuda library path or user library path(eg. /usr/local/cuda/lib64; \
|
||||
/usr/local/lib64; /usr/lib64; /usr/local/lib; /usr/lib), if cudnn library is installed in other \
|
||||
position, please set environment variable CUDNN_HOME to cudnn installation position, there should \
|
||||
be cudnn library file in {CUDNN_HOME}/lib64.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT CUPTI_INCLUDE_DIRS OR CUPTI_INCLUDE_DIRS STREQUAL "")
|
||||
set(CUPTI_INCLUDE_DIRS ${CUDA_PATH}/extras/CUPTI/include)
|
||||
endif()
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:${CUDA_PATH}/lib64)
|
||||
message("CUDA_PATH: ${CUDA_PATH}")
|
||||
message("CUDA_INCLUDE_DIRS: ${CUDA_INCLUDE_DIRS}")
|
||||
message("CUDNN_INCLUDE_PATH: ${CUDNN_INCLUDE_PATH}")
|
||||
message("CUDNN_LIBRARY_PATH: ${CUDNN_LIBRARY_PATH}")
|
||||
message("CUPTI_INCLUDE_DIRS: ${CUPTI_INCLUDE_DIRS}")
|
||||
include_directories(${CUDNN_INCLUDE_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS} ${CUPTI_INCLUDE_DIRS})
|
||||
|
||||
file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"plugin/device/gpu/hal/device/*.cc"
|
||||
"plugin/device/gpu/hal/device/*.cu"
|
||||
"plugin/device/gpu/kernel/*.cu"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM GPU_SRC_LIST "plugin/device/gpu/kernel/cuda_impl/cuda_ops/*.cu")
|
||||
|
||||
list(APPEND CUDA_NVCC_FLAGS -arch=sm_53 --expt-relaxed-constexpr)
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||
list(APPEND CUDA_NVCC_FLAGS -G)
|
||||
message("CUDA_NVCC_FLAGS" ${CUDA_NVCC_FLAGS})
|
||||
endif()
|
||||
list(REMOVE_ITEM GPU_SRC_LIST "plugin/device/gpu/hal/device/mpi/mpi_initializer.cc"
|
||||
"plugin/device/gpu/hal/device/distribution/collective_wrapper.cc"
|
||||
"plugin/device/gpu/hal/device/distribution/mpi_wrapper.cc"
|
||||
"plugin/device/gpu/hal/device/distribution/nccl_wrapper.cc"
|
||||
"plugin/device/gpu/hal/device/trt_loader.cc")
|
||||
|
||||
if(NOT ${TENSORRT_HOME} STREQUAL "")
|
||||
find_path(TENSORRT_HOME_INCLUDE NvInfer.h HINTS ${TENSORRT_HOME}/include)
|
||||
if(TENSORRT_HOME_INCLUDE STREQUAL TENSORRT_HOME_INCLUDE-NOTFOUND)
|
||||
message(FATAL_ERROR "Tensor-RT dir not exist ${TENSORRT_HOME}")
|
||||
endif()
|
||||
message("Enable GPU inference. Tensor-RT include dir: ${TENSORRT_HOME_INCLUDE}")
|
||||
set(ENABLE_GPU_INFER TRUE)
|
||||
add_compile_definitions(ENABLE_GPU_INFER)
|
||||
include_directories(${TENSORRT_HOME_INCLUDE})
|
||||
list(APPEND GPU_SRC_LIST ${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/gpu/hal/device/trt_loader.cc)
|
||||
endif()
|
||||
|
||||
set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
if(${CUDA_VERSION} VERSION_LESS 11.0)
|
||||
string(REPLACE "-std=c++17" "-std=c++11" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
else()
|
||||
string(REPLACE "-std=c++17" "-std=c++14" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
|
||||
cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
|
||||
set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
|
||||
add_compile_definitions(ENABLE_GPU)
|
||||
add_dependencies(gpu_cuda_lib proto_input)
|
||||
add_subdirectory(plugin/device/gpu/kernel/cuda_impl/cuda_ops)
|
||||
endif()
|
||||
|
||||
## make protobuf files
|
||||
file(GLOB ONNX_PROTO "" ${CMAKE_SOURCE_DIR}/third_party/proto/onnx/onnx.proto)
|
||||
message("onnx proto path is :" ${ONNX_PROTO})
|
||||
|
@ -328,22 +288,11 @@ set(BACKEND_SUB_COMP
|
|||
runtime/hardware
|
||||
runtime/pynative
|
||||
runtime/data_queue
|
||||
plugin/device/ascend/hal/device
|
||||
plugin/device/ascend/hal/hardware
|
||||
plugin/device/ascend/hal/hccl_adapter
|
||||
plugin/device/ascend/hal/profiler
|
||||
plugin/device/ascend/kernel
|
||||
plugin/device/ascend/optimizer
|
||||
plugin/device/cpu/hal/device
|
||||
plugin/device/cpu/hal/hardware
|
||||
plugin/device/cpu/hal/profiler
|
||||
plugin/device/cpu/kernel
|
||||
plugin/device/cpu/optimizer
|
||||
plugin/device/gpu/hal/device
|
||||
plugin/device/gpu/hal/hardware
|
||||
plugin/device/gpu/hal/profiler
|
||||
plugin/device/gpu/kernel
|
||||
plugin/device/gpu/optimizer
|
||||
transform/graph_ir
|
||||
)
|
||||
|
||||
|
@ -370,14 +319,19 @@ endif()
|
|||
|
||||
set_property(SOURCE ${BACKEND_SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
|
||||
add_library(mindspore_backend SHARED ${BACKEND_SUB_OBJECTS_SRC})
|
||||
add_library(mindspore_backend_common STATIC ${BACKEND_SUB_OBJECTS_SRC})
|
||||
|
||||
if(MODE_ASCEND_ACL)
|
||||
add_library(mindspore_backend_static STATIC ${BACKEND_SUB_OBJECTS_SRC})
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
target_link_libraries(mindspore_backend PRIVATE mindspore::pybind11_module)
|
||||
endif()
|
||||
|
||||
target_link_libraries(mindspore_backend PRIVATE mindspore_core mindspore_common proto_input mindspore::protobuf)
|
||||
target_link_libraries(mindspore_backend PRIVATE securec)
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
|
||||
set_target_properties(mindspore_backend PROPERTIES MACOSX_RPATH ON)
|
||||
set_target_properties(mindspore_backend PROPERTIES INSTALL_RPATH @loader_path)
|
||||
|
@ -388,17 +342,6 @@ endif()
|
|||
if(ENABLE_CPU)
|
||||
target_link_libraries(mindspore_backend PRIVATE mindspore::dnnl mindspore::mkldnn nnacl)
|
||||
endif()
|
||||
if(ENABLE_GPU)
|
||||
message("add gpu lib to mindspore_backend")
|
||||
target_link_libraries(mindspore_backend PRIVATE gpu_cuda_lib cublas cuda_ops
|
||||
${CUDA_PATH}/lib64/libcurand.so
|
||||
${CUDNN_LIBRARY_PATH}
|
||||
${CUDA_PATH}/lib64/libcudart.so
|
||||
${CUDA_PATH}/lib64/stubs/libcuda.so
|
||||
${CUDA_PATH}/lib64/libcusolver.so
|
||||
${CUDA_PATH}/lib64/libcufft.so
|
||||
${CUDA_PATH}/lib64/libcusparse.so)
|
||||
endif()
|
||||
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(mindspore_backend PRIVATE mindspore::ssl mindspore::crypto)
|
||||
|
@ -421,28 +364,7 @@ elseif(ENABLE_CPU AND NOT WIN32)
|
|||
-Wl,--no-as-needed mindspore::event_core ps_cache)
|
||||
endif()
|
||||
|
||||
if(ENABLE_D)
|
||||
find_library(GE_RUNNER ge_runner ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(GRAPH graph ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(HCCL hccl ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
target_link_libraries(mindspore_backend PUBLIC ${GE_RUNNER} ${GRAPH} ${HCCL})
|
||||
endif()
|
||||
|
||||
if(MODE_ASCEND_ALL)
|
||||
MESSAGE("USE DAV LIB PATH: ${ASCEND_PATH}")
|
||||
find_library(ERROR_MANAGER error_manager ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(RUNTIME_LIB runtime ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(TSDCLIENT tsdclient HINTS ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(DATATRANSFER datatransfer HINTS ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(PROFILING msprofiler ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(ACL ascendcl ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(ACL_TDT_CHANNEL acl_tdt_channel ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(PLATFORM platform ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(OPT_FEATURE opt_feature ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(adump_server libadump_server.a ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(OPTILING optiling ${ASCEND_CANN_OPP_PATH} ${ASCEND_TOOLKIT_OPP_PATH})
|
||||
target_link_libraries(mindspore_backend PUBLIC ${RUNTIME_LIB} ${TSDCLIENT} ${DATATRANSFER} ${ERROR_MANAGER}
|
||||
-Wl,--no-as-needed ${OPTILING} ${PLATFORM} ${ACL} ${ACL_TDT_CHANNEL} ${OPT_FEATURE} ${PROFILING})
|
||||
target_link_libraries(mindspore PUBLIC -Wl,--start-group proto_input mindspore::protobuf -Wl,--end-group)
|
||||
elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
target_link_libraries(mindspore PUBLIC -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece
|
||||
|
@ -473,6 +395,18 @@ else()
|
|||
endif()
|
||||
set(MINDSPORE_RPATH ${ORIGIN_PATH}/lib:${MINDSPORE_RPATH})
|
||||
|
||||
if(ENABLE_D)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/ascend)
|
||||
add_subdirectory(plugin/device/ascend)
|
||||
target_link_libraries(mindspore_backend PRIVATE mindspore_ascend)
|
||||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/plugin/device/gpu)
|
||||
add_subdirectory(plugin/device/gpu)
|
||||
target_link_libraries(mindspore_backend PRIVATE mindspore_gpu)
|
||||
endif()
|
||||
|
||||
set_target_properties(_c_expression PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
|
@ -526,16 +460,6 @@ if(ENABLE_MINDDATA)
|
|||
add_subdirectory(minddata/dataset)
|
||||
endif()
|
||||
|
||||
if(MODE_ASCEND_ALL)
|
||||
target_link_libraries(_c_expression PRIVATE ${adump_server})
|
||||
endif()
|
||||
|
||||
if(ENABLE_D)
|
||||
if(ENABLE_MPI)
|
||||
set_target_properties(_ascend_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_TEST OR ENABLE_TESTCASES)
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
list(APPEND STUB_COMMON_SOURCE ${CMAKE_SOURCE_DIR}/tests/ut/cpp/stub/ge/ge_operator_stub.cc)
|
||||
|
|
|
@ -734,10 +734,10 @@ AbstractBasePtrList RectifyAbstractFromRegAttr(const PrimitivePtr &primitive,
|
|||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
if (device == kGPUDevice) {
|
||||
if (DynamicShapeConstInputToAttrGPU.find(primitive->name()) != DynamicShapeConstInputToAttrGPU.end()) {
|
||||
if (IsOneOfDynamicShapeConstInputToAttrGPU(primitive->name())) {
|
||||
return input_abstract;
|
||||
}
|
||||
} else if (DynamicShapeConstInputToAttr.find(primitive->name()) != DynamicShapeConstInputToAttr.end()) {
|
||||
} else if (IsOneOfDynamicShapeConstInputToAttr(primitive->name())) {
|
||||
return input_abstract;
|
||||
}
|
||||
auto convert_input_list = reg.GetConstInputAttrInfo();
|
||||
|
|
|
@ -133,7 +133,7 @@ const AnfNodePtr CustomOpRegInfoToAttr::Process(const FuncGraphPtr &, const AnfN
|
|||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
auto func_type = common::AnfAlgo::GetNodeAttr<std::string>(cnode, kAttrFuncType);
|
||||
// AKG/AICPU need to process attr, TBE will process later in the json creating phase.
|
||||
if (kCustomTypeAkg.find(func_type) == kCustomTypeAkg.end() || func_type == kCustomTypeAICPU) {
|
||||
if (!IsOneOfCustomAkgType(func_type) || func_type == kCustomTypeAICPU) {
|
||||
return nullptr;
|
||||
}
|
||||
// Early return if current node does not have attr
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "include/common/utils/anfalgo.h"
|
||||
#include "backend/common/session/kernel_graph.h"
|
||||
#include "runtime/hardware/device_type.h"
|
||||
#include "include/backend/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace somas {
|
||||
|
@ -248,7 +249,7 @@ using SomasPtr = std::shared_ptr<Somas>;
|
|||
using SomasCreator = std::function<std::shared_ptr<Somas>()>;
|
||||
|
||||
// @todo will delete when old runtime remove
|
||||
class SomasManager {
|
||||
class BACKEND_EXPORT SomasManager {
|
||||
public:
|
||||
static SomasManager &Instance() {
|
||||
static SomasManager instance{};
|
||||
|
|
|
@ -399,7 +399,7 @@ void ClearInputDeviceAddress(const KernelGraphPtr &graph, const DeviceContext *d
|
|||
}
|
||||
|
||||
bool OpInBlackList(const session::BackendOpRunInfoPtr &op_run_info) {
|
||||
return kOpCacheBlackList.find(op_run_info->base_op_run_info.op_name) != kOpCacheBlackList.end();
|
||||
return IsOneOfCacheBlackList(op_run_info->base_op_run_info.op_name);
|
||||
}
|
||||
|
||||
int GetExecutionMode() {
|
||||
|
|
|
@ -28,9 +28,6 @@
|
|||
#include "utils/ms_context.h"
|
||||
#include "ps/ps_context.h"
|
||||
#include "utils/anf_utils.h"
|
||||
#ifdef ENABLE_D
|
||||
#include "include/transform/graph_ir/utils.h"
|
||||
#endif
|
||||
namespace mindspore {
|
||||
const char kMsConvert[] = "ms";
|
||||
const char kMsVm[] = "vm";
|
||||
|
|
|
@ -194,12 +194,17 @@ void CallbackImpl::ResetKernelInfo(const AnfNodePtr &node) {
|
|||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (GetTargetFromContext() == kAscendDevice) {
|
||||
cnode->set_kernel_info(std::make_shared<device::KernelInfo>());
|
||||
auto kernel_info_setter = GraphKernelInfoManager::Instance().GetGraphKernelInfo(kAscendDevice);
|
||||
MS_EXCEPTION_IF_NULL(kernel_info_setter);
|
||||
kernel_info_setter->SetKernelInfo(cnode, KernelType::UNKNOWN_KERNEL_TYPE);
|
||||
} else if (GetTargetFromContext() == kGPUDevice) {
|
||||
cnode->set_kernel_info(std::make_shared<device::KernelInfo>());
|
||||
auto kernel_info_setter = GraphKernelInfoManager::Instance().GetGraphKernelInfo(kGPUDevice);
|
||||
MS_EXCEPTION_IF_NULL(kernel_info_setter);
|
||||
kernel_info_setter->SetKernelInfo(cnode, KernelType::UNKNOWN_KERNEL_TYPE);
|
||||
} else {
|
||||
cnode->set_kernel_info(std::make_shared<device::KernelInfo>());
|
||||
auto kernel_info_setter = GraphKernelInfoManager::Instance().GetGraphKernelInfo(kCPUDevice);
|
||||
if (kernel_info_setter != nullptr) {
|
||||
kernel_info_setter->SetKernelInfo(cnode, KernelType::UNKNOWN_KERNEL_TYPE);
|
||||
|
|
|
@ -21,10 +21,11 @@
|
|||
|
||||
#include "utils/hash_map.h"
|
||||
#include "common/graph_kernel/model/op_node.h"
|
||||
#include "include/backend/visible.h"
|
||||
|
||||
namespace mindspore::graphkernel::inner {
|
||||
using CreatorFunc = std::function<PrimOpPtr(const std::string &)>;
|
||||
class OpRegistry {
|
||||
class BACKEND_EXPORT OpRegistry {
|
||||
public:
|
||||
static OpRegistry &Instance() {
|
||||
static OpRegistry instance{};
|
||||
|
|
|
@ -62,7 +62,13 @@ if(ENABLE_D)
|
|||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/array.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/map.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/arrangement.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc")
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/frontend/parallel/tensor_layout/shape_util.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/pattern_engine.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/helper.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/node_pass.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/backend/common/optimizer/visit.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/kernel/kernel_build_info.cc"
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/device/kernel_info.cc")
|
||||
endif()
|
||||
|
||||
if(NOT ENABLE_TESTCASES AND NOT BUILD_LITE)
|
||||
|
@ -70,11 +76,22 @@ if(NOT ENABLE_TESTCASES AND NOT BUILD_LITE)
|
|||
set(MSLIB_SRC ${MSLIB_SRC} ${CMAKE_SOURCE_DIR}/mindspore/core/utils/status.cc)
|
||||
endif()
|
||||
|
||||
if(ENABLE_D OR ENABLE_ACL)
|
||||
list(APPEND MSLIB_SRC
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.cc")
|
||||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
list(APPEND MSLIB_SRC "${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/gpu/hal/device/cuda_driver.cc")
|
||||
endif()
|
||||
|
||||
if(BUILD_LITE)
|
||||
list(APPEND MSLIB_SRC "${CMAKE_CURRENT_SOURCE_DIR}/../../../mindspore/ccsrc/utils/config_manager.cc")
|
||||
file(GLOB_RECURSE ACL_REMOVE_SRC ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"model/acl/acl_vm/*.cc"
|
||||
)
|
||||
list(REMOVE_ITEM MSLIB_SRC
|
||||
"${CMAKE_SOURCE_DIR}/mindspore/ccsrc/plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.cc")
|
||||
list(REMOVE_ITEM MSLIB_SRC "${CMAKE_CURRENT_SOURCE_DIR}/akg_kernel_register.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/model/acl/acl_model_multi.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/model/acl/acl_model.cc"
|
||||
|
@ -157,7 +174,8 @@ if(ENABLE_D)
|
|||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
target_link_libraries(mindspore_shared_lib PRIVATE gpu_cuda_lib cublas cuda_ops
|
||||
target_link_libraries(mindspore_shared_lib PRIVATE cuda_ops
|
||||
${CUBLAS_LIBRARY_PATH}
|
||||
${CUDA_PATH}/lib64/libcurand.so
|
||||
${CUDNN_LIBRARY_PATH}
|
||||
${CUDA_PATH}/lib64/libcudart.so
|
||||
|
|
|
@ -15,9 +15,6 @@ set(_OFFLINE_SRC_LIST
|
|||
|
||||
if(ENABLE_DUMP_IR)
|
||||
file(GLOB_RECURSE _RDR_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "rdr/*.cc")
|
||||
if(NOT ENABLE_D)
|
||||
list(REMOVE_ITEM _RDR_SRC_LIST "rdr/task_debug_info_recorder.cc")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if("${ENABLE_HIDDEN}" STREQUAL "OFF")
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
#include "frontend/optimizer/optimizer_caller.h"
|
||||
#include "frontend/optimizer/anf_visitor.h"
|
||||
|
@ -40,6 +41,8 @@ namespace irpass {
|
|||
// {prim::kPrimListGetItem, L, N}
|
||||
// {prim::kPrimTupleSetItem, T, N, Z}
|
||||
// {prim::kPrimListSetItem, L, N, Z}
|
||||
const std::map<std::string, size_t> kSliceAttrToIndex = {{kSliceStart, 1}, {kSliceStop, 2}, {kSliceStep, 3}};
|
||||
|
||||
class TupleListConvertItemIndexToPositive : public AnfVisitor {
|
||||
public:
|
||||
AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
|
||||
#include "utils/log_adapter.h"
|
||||
#include "ir/dtype/type.h"
|
||||
#include "include/common/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
// op name. Op which not exists in operator/ops.h, so define it's name here
|
||||
|
@ -661,7 +662,6 @@ constexpr auto kCustomTypePyfunc = "pyfunc";
|
|||
constexpr auto kCustomTypeTbe = "tbe";
|
||||
constexpr auto kCustomTypeAICPU = "aicpu";
|
||||
constexpr auto kCustomTypeHybrid = "hybrid";
|
||||
const std::set<std::string> kCustomTypeAkg = {"ir_builder", "tvm_compute", "hybrid"};
|
||||
|
||||
// primal attr key name
|
||||
constexpr auto kPrimalAttrForwardNodeName = "forward_node_name";
|
||||
|
@ -791,145 +791,23 @@ constexpr auto kOpFormat_FRACTAL_Z_3D = "FRACTAL_Z_3D";
|
|||
constexpr auto kOpFormat_FRACTAL_ZN_LSTM = "FRACTAL_ZN_LSTM";
|
||||
constexpr auto kOpFormat_FRACTAL_ZN_RNN = "FRACTAL_ZN_RNN";
|
||||
constexpr auto kOpFormat_ND_RNN_BIAS = "ND_RNN_BIAS";
|
||||
|
||||
const std::set<std::string> kOpFormatList = {kOpFormat_DEFAULT,
|
||||
kOpFormat_NC1KHKWHWC0,
|
||||
kOpFormat_ND,
|
||||
kOpFormat_NCHW,
|
||||
kOpFormat_NHWC,
|
||||
kOpFormat_HWCN,
|
||||
kOpFormat_NC1HWC0,
|
||||
kOpFormat_FRAC_Z,
|
||||
kOpFormat_C1HWNCoC0,
|
||||
kOpFormat_FRAC_NZ,
|
||||
kOpFormat_NC1HWC0_C04,
|
||||
kOpFormat_FRACTAL_Z_C04,
|
||||
kOpFormat_NDHWC,
|
||||
kOpFormat_FRACTAL_ZN_LSTM,
|
||||
kOpFormat_FRACTAL_ZN_RNN,
|
||||
kOpFormat_ND_RNN_BIAS,
|
||||
kOpFormat_NDC1HWC0,
|
||||
kOpFormat_NCDHW,
|
||||
kOpFormat_FRACTAL_Z_3D,
|
||||
kOpFormat_DHWNC,
|
||||
kOpFormat_DHWCN};
|
||||
|
||||
constexpr auto kSliceStart = "start";
|
||||
constexpr auto kSliceStop = "stop";
|
||||
constexpr auto kSliceStep = "step";
|
||||
const std::map<std::string, size_t> kSliceAttrToIndex = {{kSliceStart, 1}, {kSliceStop, 2}, {kSliceStep, 3}};
|
||||
|
||||
const std::set<std::string> kDefaultCompatibleFormat = {kOpFormat_ND, kOpFormat_NCHW, kOpFormat_NHWC, kOpFormat_HWCN,
|
||||
kOpFormat_NCDHW};
|
||||
|
||||
const std::set<std::string> kOptOperatorSet = {kMomentumOpName,
|
||||
kApplyMomentumOpName,
|
||||
kApplyAdadeltaOpName,
|
||||
kApplyAdagradOpName,
|
||||
kApplyAdagradDAName,
|
||||
kApplyAdamOpName,
|
||||
kApplyAdaMaxOpName,
|
||||
kApplyAddSignOpName,
|
||||
kApplyCenteredRMSPOpName,
|
||||
kApplyFtrlOpName,
|
||||
kApplyFtrlV2OpName,
|
||||
kApplyGradientDescentOpName,
|
||||
kApplyPowerSignOpName,
|
||||
kApplyProximalAdagradOpName,
|
||||
kApplyProximalGradientDescentOpName,
|
||||
kApplyRMSPropOpName,
|
||||
kAdamApplyOneWithDecayOpName,
|
||||
kAdamApplyOneWithDecayAssignOpName,
|
||||
kFusedAdamWeightDecayName,
|
||||
kAdamWeightDecayName,
|
||||
kFusedCastAdamWeightDecayName,
|
||||
kFusedAdamName,
|
||||
kFusedAdaFactorName,
|
||||
kFusedAdaFactorWithGlobalNormName,
|
||||
kFusedSparseAdamName,
|
||||
kFusedMulApplyMomentumOpName,
|
||||
kFusedWeightScaleApplyMomentum,
|
||||
kFusedScaleApplyMomentum,
|
||||
kApplyCenteredRMSPropOpName,
|
||||
kFusedSparseFtrlName,
|
||||
kFusedSparseProximalAdagradName,
|
||||
kFusedSparseLazyAdamName,
|
||||
kSparseApplyFtrlName,
|
||||
kSparseApplyFtrlV2Name,
|
||||
kSGDName,
|
||||
kLARSUpdateName,
|
||||
kCombineMomentumWeightOpName,
|
||||
kCombineMomentumOpName,
|
||||
kScatterAddOpName,
|
||||
kScatterUpdateOpName,
|
||||
kSparseApplyProximalAdagradOpName};
|
||||
|
||||
const std::set<std::string> kNodeWithSeedOperators = {kGammaOpName, kPoissonOpName, kStandardLaplaceOpName,
|
||||
kStandardNormalOpName, kUniformIntOpName, kUniformRealOpName};
|
||||
const std::set<std::string> kPosteriorOperatorSet = {kPullOpName};
|
||||
|
||||
const std::set<std::string> kOpCacheBlackList = {kUniformCandidateSamplerOpName, kInitDatasetQueueOpName,
|
||||
kGetNextOpName};
|
||||
|
||||
const std::set<std::string> kOpNotSupportMultiThreadExecList = {kAvgPoolOpName, kAvgPoolGradOpName, kMaxPoolOpName,
|
||||
kBatchNorm, kBatchNormGradOpName};
|
||||
|
||||
const std::set<std::string> kHWSpecialFormatSet = {
|
||||
kOpFormat_FRACTAL_Z_3D, kOpFormat_NC1KHKWHWC0, kOpFormat_NC1HWC0, kOpFormat_FRAC_NZ,
|
||||
kOpFormat_C1HWNCoC0, kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04, kOpFormat_FRACTAL_ZN_LSTM,
|
||||
kOpFormat_FRACTAL_ZN_RNN, kOpFormat_NDC1HWC0, kOpFormat_FRAC_Z};
|
||||
|
||||
const std::set<TypeId> kFloatDataTypeSet = {kNumberTypeFloat16, kNumberTypeFloat32};
|
||||
|
||||
const std::set<std::string> kComputeDepend = {kUniqueOpName,
|
||||
kUniqueConsecutiveOpName,
|
||||
kComputeAccidentalHitsOpName,
|
||||
kSubAndFilterOpName,
|
||||
kPadAndShiftOpName,
|
||||
kCTCGreedyDecoderOpName,
|
||||
kMaskedSelectOpName,
|
||||
kDynamicStitchOpName,
|
||||
kGetNextOpName,
|
||||
kListDiffOpName,
|
||||
kNonMaxSuppressionV3OpName,
|
||||
kNonMaxSuppressionWithOverlapsOpName,
|
||||
kCoalesceOpName,
|
||||
kTruncatedNormal,
|
||||
kNonDeterministicInts,
|
||||
kFractionalAvgPoolGradOpName,
|
||||
kDenseToDenseSetOperation,
|
||||
kSegmentMaxOpName,
|
||||
kCSRSparseMatrixToSparseTensorOpName,
|
||||
kSegmentMinOpName,
|
||||
kLuUnpackOpName,
|
||||
kSegmentSumOpName,
|
||||
kResizeBicubicOpName,
|
||||
kResizeAreaOpName,
|
||||
kSegmentMeanOpName,
|
||||
kSegmentProdOpName,
|
||||
kNonZeroOpName,
|
||||
kSparseSparseMinimumOpName,
|
||||
kRpcRecvOpName,
|
||||
kAdaptiveMaxPool3DGradOpName};
|
||||
|
||||
const std::set<std::string> k3DFormatSet = {kOpFormat_NCDHW, kOpFormat_NDC1HWC0, kOpFormat_FRACTAL_Z_3D,
|
||||
kOpFormat_NDHWC, kOpFormat_DHWCN, kOpFormat_DHWNC};
|
||||
|
||||
const std::set<std::string> kNoPaddingFormatSet = {kOpFormat_ChannelLast, kOpFormat_FRAC_NZ, kOpFormat_FRACTAL_ZN_RNN,
|
||||
kOpFormat_ND_RNN_BIAS};
|
||||
|
||||
const std::set<std::string> DynamicShapeConstInputToAttr = {
|
||||
kCastOpName, kExpandDimsOpName, kEmbeddingLookupOpName, kReduceMinOpName, kReduceMeanOpName,
|
||||
kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName, kTransposeOpName};
|
||||
|
||||
const std::set<std::string> DynamicShapeConstInputToAttrCPU = {
|
||||
kCastOpName, kExpandDimsOpName, kEmbeddingLookupOpName, kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName,
|
||||
kReduceAllOpName, kReduceAnyOpName, kConcatOpName, kReduceSumOpName, kTransposeOpName};
|
||||
|
||||
const std::set<std::string> DynamicShapeConstInputToAttrGPU = {
|
||||
kCastOpName, kExpandDimsOpName, kReshapeOpName, kEmbeddingLookupOpName, kTransposeOpName,
|
||||
kReduceSumOpName, kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName,
|
||||
kReduceAnyOpName, kConcatOpName, kScatterNdOpName, kGatherV2OpName, kAvgPool3DGradOpName};
|
||||
COMMON_EXPORT bool IsOneOfCustomAkgType(const std::string &name);
|
||||
COMMON_EXPORT bool IsOneOfOperator(const std::string &name);
|
||||
COMMON_EXPORT bool IsOneOfPosteriorOperator(const std::string &name);
|
||||
COMMON_EXPORT bool IsOneOfCacheBlackList(const std::string &name);
|
||||
COMMON_EXPORT bool IsOneOfNotSupportMultiThreadExec(const std::string &name);
|
||||
COMMON_EXPORT bool IsOneOf3DFormat(const std::string &format);
|
||||
COMMON_EXPORT bool IsOneOfNoPaddingFormat(const std::string &format);
|
||||
COMMON_EXPORT bool IsOneOfDynamicShapeConstInputToAttr(const std::string &name);
|
||||
COMMON_EXPORT bool IsOneOfDynamicShapeConstInputToAttrCPU(const std::string &name);
|
||||
COMMON_EXPORT bool IsOneOfDynamicShapeConstInputToAttrGPU(const std::string &name);
|
||||
COMMON_EXPORT bool IsOneOfComputeDepend(const std::string &name);
|
||||
COMMON_EXPORT bool IsOneOfHWSpecialFormat(const std::string &format);
|
||||
COMMON_EXPORT bool IsOneOfFormat(const std::string &format);
|
||||
|
||||
// The map between kernel's output and input ref relationship.
|
||||
// Key is the output index while the value is input index which will be used as the reference of output.
|
||||
|
|
|
@ -21,12 +21,13 @@
|
|||
#include <utility>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "include/backend/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
using AkgKernelBuildCreator = std::function<std::shared_ptr<AkgKernelBuilder>()>;
|
||||
|
||||
class AkgKernelBuildManager {
|
||||
class BACKEND_EXPORT AkgKernelBuildManager {
|
||||
public:
|
||||
static AkgKernelBuildManager &Instance();
|
||||
void Register(const std::string &device_type, AkgKernelBuildCreator &&creator);
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "ir/dtype.h"
|
||||
#include "ir/kernel_info_dev.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "include/backend/visible.h"
|
||||
namespace mindspore {
|
||||
class GraphKernelInfo {
|
||||
public:
|
||||
|
@ -35,7 +36,7 @@ class GraphKernelInfo {
|
|||
|
||||
using GraphKernelInfoCreator = std::function<std::shared_ptr<GraphKernelInfo>()>;
|
||||
|
||||
class GraphKernelInfoManager {
|
||||
class BACKEND_EXPORT GraphKernelInfoManager {
|
||||
public:
|
||||
static GraphKernelInfoManager &Instance() {
|
||||
static GraphKernelInfoManager instance{};
|
||||
|
@ -52,6 +53,7 @@ class GraphKernelInfoManager {
|
|||
MS_EXCEPTION_IF_NULL(iter->second);
|
||||
return (iter->second)();
|
||||
}
|
||||
MS_LOG(WARNING) << "Can not get a graph kernel info ptr on device: " << device_type;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -67,7 +67,6 @@ constexpr auto kFormat = "format";
|
|||
constexpr auto kNeedCompile = "need_compile";
|
||||
constexpr auto kShape = "shape";
|
||||
constexpr auto kProcessor = "processor";
|
||||
std::multimap<std::string, std::shared_ptr<OpInfo>> OpLib::op_info_;
|
||||
|
||||
static std::string ImplTypeToStr(OpImplyType impl_type) {
|
||||
switch (impl_type) {
|
||||
|
|
|
@ -27,16 +27,16 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class OpLib {
|
||||
class BACKEND_EXPORT OpLib {
|
||||
public:
|
||||
OpLib() = default;
|
||||
virtual ~OpLib() = default;
|
||||
BACKEND_EXPORT static bool RegOp(const std::string &json_string, const std::string &impl_path);
|
||||
static bool RegOp(const std::string &json_string, const std::string &impl_path);
|
||||
static std::shared_ptr<OpInfo> FindOp(const std::string &op_name, OpImplyType imply_type,
|
||||
bool is_dynamic_shape = false);
|
||||
|
||||
protected:
|
||||
static std::multimap<std::string, std::shared_ptr<OpInfo>> op_info_;
|
||||
inline static std::multimap<std::string, std::shared_ptr<OpInfo>> op_info_ = {};
|
||||
|
||||
private:
|
||||
static bool RegOpFromLocalInfo();
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
#ifdef ENABLE_GPU_COLLECTIVE
|
||||
#include "plugin/device/gpu/hal/device/distribution/collective_init.h"
|
||||
#else
|
||||
#include "plugin/device/gpu/hal/device/distribution/collective_fake_init.h"
|
||||
#include "runtime/collective/collective_fake_init.h"
|
||||
#endif
|
||||
#if ((defined ENABLE_CPU) && (!defined _WIN32))
|
||||
#include "ps/util.h"
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
if(ENABLE_D OR ENABLE_ACL)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/op_tiling)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling)
|
||||
set(MINDSPORE_RPATH
|
||||
${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling)
|
||||
endif()
|
||||
|
||||
########### mindspore_ascend.so #####
|
||||
set(ASCEND_SUB_COMP
|
||||
hal/device
|
||||
hal/hardware
|
||||
hal/hccl_adapter
|
||||
hal/profiler
|
||||
kernel
|
||||
optimizer
|
||||
)
|
||||
|
||||
foreach(a_comp ${ASCEND_SUB_COMP})
|
||||
add_subdirectory(${a_comp})
|
||||
string(REPLACE "/" "_" sub ${a_comp})
|
||||
if(TARGET _mindspore_plugin_device_ascend_${sub}_obj)
|
||||
list(APPEND ASCEND_SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_plugin_device_ascend_${sub}_obj>)
|
||||
add_dependencies(_mindspore_plugin_device_ascend_${sub}_obj proto_input)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
add_library(mindspore_ascend SHARED ${ASCEND_SUB_OBJECTS_SRC})
|
||||
target_link_libraries(mindspore_ascend PUBLIC mindspore_backend_common)
|
||||
target_link_libraries(mindspore_ascend PRIVATE mindspore_core mindspore_common proto_input mindspore::protobuf)
|
||||
target_link_libraries(mindspore_ascend PRIVATE securec)
|
||||
|
||||
set_target_properties(mindspore_ascend PROPERTIES INSTALL_RPATH $ORIGIN)
|
||||
target_link_libraries(mindspore_ascend PRIVATE mindspore::dnnl mindspore::mkldnn mindspore::ssl
|
||||
mindspore::crypto nnacl)
|
||||
|
||||
if(ENABLE_DEBUGGER)
|
||||
# debugger: link grpc
|
||||
if(ENABLE_D)
|
||||
target_link_libraries(mindspore_ascend PRIVATE -Wl,--no-as-needed mindspore::grpc++)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_D)
|
||||
find_library(GE_RUNNER ge_runner ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(GRAPH graph ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(HCCL hccl ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
target_link_libraries(mindspore_ascend PUBLIC ${GE_RUNNER} ${GRAPH} ${HCCL})
|
||||
target_link_libraries(mindspore_ascend PRIVATE mindspore::event mindspore::event_pthreads
|
||||
mindspore::event_openssl -Wl,--no-as-needed mindspore::event_core ps_cache)
|
||||
endif()
|
||||
|
||||
if(MODE_ASCEND_ALL)
|
||||
MESSAGE("USE DAV LIB PATH: ${ASCEND_PATH}")
|
||||
find_library(ERROR_MANAGER error_manager ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(RUNTIME_LIB runtime ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(TSDCLIENT tsdclient HINTS ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(DATATRANSFER datatransfer HINTS ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(PROFILING msprofiler ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(ACL ascendcl ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(ACL_TDT_CHANNEL acl_tdt_channel ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(PLATFORM platform ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(OPT_FEATURE opt_feature ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(adump_server libadump_server.a ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(OPTILING optiling ${ASCEND_CANN_OPP_PATH} ${ASCEND_TOOLKIT_OPP_PATH})
|
||||
|
||||
target_link_libraries(mindspore_ascend PUBLIC ${RUNTIME_LIB} ${TSDCLIENT} ${DATATRANSFER} ${ERROR_MANAGER}
|
||||
-Wl,--no-as-needed ${OPTILING} ${PLATFORM} ${ACL} ${ACL_TDT_CHANNEL} ${OPT_FEATURE} ${PROFILING})
|
||||
target_link_libraries(mindspore_ascend PRIVATE ${adump_server})
|
||||
endif()
|
||||
|
||||
if(ENABLE_D)
|
||||
if(ENABLE_MPI)
|
||||
set_target_properties(_ascend_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kernel/aicpu/aicpu_ops)
|
||||
add_subdirectory(kernel/aicpu/aicpu_ops)
|
|
@ -24,10 +24,11 @@
|
|||
#include "runtime/hardware/device_context_manager.h"
|
||||
#include "runtime/data_queue/data_queue.h"
|
||||
#include "runtime/rt.h"
|
||||
#include "include/backend/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
class AscendDataQueueDynamic : public DataQueue {
|
||||
class BACKEND_EXPORT AscendDataQueueDynamic : public DataQueue {
|
||||
public:
|
||||
explicit AscendDataQueueDynamic(const size_t capacity);
|
||||
virtual ~AscendDataQueueDynamic() = default;
|
||||
|
|
|
@ -83,8 +83,7 @@ string GetPriorityMatchFormat(const CNodePtr &cnode) {
|
|||
size_t input_num = common::AnfAlgo::GetInputTensorNum(cnode);
|
||||
for (size_t index = 0; index < input_num; ++index) {
|
||||
auto pre_output_format = AnfAlgo::GetPrevNodeOutputFormat(cnode, index);
|
||||
if (AnfAlgo::IsFeatureMapInput(cnode, index) &&
|
||||
kHWSpecialFormatSet.find(pre_output_format) != kHWSpecialFormatSet.end()) {
|
||||
if (AnfAlgo::IsFeatureMapInput(cnode, index) && IsOneOfHWSpecialFormat(pre_output_format)) {
|
||||
priority_matched_format = !is_init ? pre_output_format : priority_matched_format;
|
||||
is_init = true;
|
||||
}
|
||||
|
@ -494,7 +493,7 @@ KernelSelectStatus SelectCustomKernelInfo(const CNodePtr &kernel_node, KernelTyp
|
|||
auto func_type = common::AnfAlgo::GetNodeAttr<std::string>(kernel_node, kAttrFuncType);
|
||||
if (func_type == kCustomTypeTbe) {
|
||||
*kernel_type = KernelType::TBE_KERNEL;
|
||||
} else if (kCustomTypeAkg.find(func_type) != kCustomTypeAkg.end()) {
|
||||
} else if (IsOneOfCustomAkgType(func_type)) {
|
||||
*kernel_type = KernelType::AKG_KERNEL;
|
||||
} else if (func_type == kCustomTypeAICPU) {
|
||||
*kernel_type = KernelType::AICPU_KERNEL;
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "runtime/base.h"
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "plugin/device/ascend/hal/device/profiling/profiling_utils.h"
|
||||
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
|
||||
|
||||
using mindspore::device::ascend::ProfilingUtils;
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "plugin/device/ascend/kernel/ascend_kernel_mod.h"
|
||||
#include "include/common/utils/utils.h"
|
||||
#include "backend/common/session/kernel_graph.h"
|
||||
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ps/ps_cache/ascend/ascend_ps_cache.h"
|
||||
#include "plugin/device/ascend/hal/device/ps/ascend_ps_cache.h"
|
||||
#include <google/protobuf/text_format.h>
|
||||
#include <string>
|
||||
#include <vector>
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_ASCEND_ASCEND_PS_CACHE_H_
|
||||
#define MINDSPORE_CCSRC_PS_PS_CACHE_ASCEND_ASCEND_PS_CACHE_H_
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
@ -72,4 +72,4 @@ class AscendPsCache : public PsCacheBasic {
|
|||
} // namespace ascend
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PS_PS_CACHE_ASCEND_ASCEND_PS_CACHE_H_
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_PS_ASCEND_PS_CACHE_H_
|
|
@ -13,7 +13,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "debug/rdr/task_debug_info_recorder.h"
|
||||
#include "plugin/device/ascend/hal/device/tasksink/task_debug_info_recorder.h"
|
||||
#include <utility>
|
||||
#include "plugin/device/ascend/hal/device/tasksink/task_generator.h"
|
||||
#include "include/common/debug/rdr/recorder_manager.h"
|
|
@ -13,8 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_
|
||||
#define MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_TASKSINK_TASK_DEBUG_INFO_RECORDER_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_TASKSINK_TASK_DEBUG_INFO_RECORDER_H_
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
@ -49,4 +49,4 @@ bool RecordTaskDebugInfo(SubModuleId module, const std::string &name,
|
|||
const std::vector<TaskDebugInfoPtr> &task_debug_info_list);
|
||||
} // namespace RDR
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_RDR_TASK_DEBUG_INFO_RECORDER_H_
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_DEVICE_TASKSINK_TASK_DEBUG_INFO_RECORDER_H_
|
|
@ -28,7 +28,7 @@
|
|||
#include "plugin/device/ascend/hal/device/profiling/profiling_manager.h"
|
||||
#endif
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
#include "debug/rdr/task_debug_info_recorder.h"
|
||||
#include "plugin/device/ascend/hal/device/tasksink/task_debug_info_recorder.h"
|
||||
#endif
|
||||
#include "mindspore/core/utils/file_utils.h"
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_RUNTIME_HARDWARE_ASCEND_ASCEND_UTILS_H_
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "plugin/device/ascend/hal/hardware/ascend_device_context.h"
|
||||
#include "backend/common/session/kernel_graph.h"
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
#include "utils/ms_context.h"
|
||||
#include "include/backend/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace profiler {
|
||||
|
@ -107,8 +108,8 @@ class MemoryProfiling {
|
|||
return instance;
|
||||
}
|
||||
|
||||
std::shared_ptr<GraphMemory> AddGraphMemoryNode(uint32_t graph_id);
|
||||
std::shared_ptr<GraphMemory> GetGraphMemoryNode(uint32_t graph_id) const;
|
||||
BACKEND_EXPORT std::shared_ptr<GraphMemory> AddGraphMemoryNode(uint32_t graph_id);
|
||||
BACKEND_EXPORT std::shared_ptr<GraphMemory> GetGraphMemoryNode(uint32_t graph_id) const;
|
||||
void SetDeviceMemSize(uint64_t size) { device_mem_size_ = size; }
|
||||
bool MemoryToPB();
|
||||
void SaveMemoryProfiling();
|
||||
|
|
|
@ -439,7 +439,7 @@ void CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_p
|
|||
|
||||
UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE;
|
||||
auto op_name = common::AnfAlgo::GetCNodeName(anf_node);
|
||||
if (kComputeDepend.find(op_name) != kComputeDepend.end()) {
|
||||
if (IsOneOfComputeDepend(op_name)) {
|
||||
shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
|
||||
}
|
||||
ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset, shape_type);
|
||||
|
|
|
@ -39,7 +39,7 @@ DynamicAicpuOpKernelMod::DynamicAicpuOpKernelMod(const AnfNodePtr &anf_node_ptr)
|
|||
auto cnode = anf_node_ptr->cast<CNodePtr>();
|
||||
if (cnode != nullptr) {
|
||||
auto op_name = common::AnfAlgo::GetCNodeName(cnode);
|
||||
if (kComputeDepend.find(op_name) != kComputeDepend.end()) {
|
||||
if (IsOneOfComputeDepend(op_name)) {
|
||||
unknow_type_ = device::ascend::UnknowShapeOpType::DEPEND_COMPUTE;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ bool AscendKernelMod::IsNeedRetrieveOutputShape() {
|
|||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
|
||||
auto op_name = common::AnfAlgo::GetCNodeName(cnode);
|
||||
if (kComputeDepend.find(op_name) != kComputeDepend.end()) {
|
||||
if (IsOneOfComputeDepend(op_name)) {
|
||||
is_need_retrieve_output_shape_ = true;
|
||||
}
|
||||
return is_need_retrieve_output_shape_;
|
||||
|
|
|
@ -166,8 +166,7 @@ void SingleTbeJsonCreator::GenInputDescJson(const AnfNodePtr &anf_node, size_t r
|
|||
auto def_format = TbeJsonUtils::IsNeedChangeDefaultFormat(anf_node) ? kOpFormat_NCDHW : kOpFormat_NCHW;
|
||||
auto format = AnfAlgo::GetInputFormat(anf_node, real_input_index);
|
||||
format = TbeAdapter::FormatPass(format, ori_shape.size());
|
||||
format =
|
||||
(def_format == kOpFormat_NCDHW && k3DFormatSet.find(format) == k3DFormatSet.end()) ? kOpFormat_NCDHW : format;
|
||||
format = (def_format == kOpFormat_NCDHW && !IsOneOf3DFormat(format)) ? kOpFormat_NCDHW : format;
|
||||
auto d_type = AnfAlgo::GetInputDeviceDataType(anf_node, real_input_index);
|
||||
(*input_desc)[kJDtype] = tbe::TypeIdToString(d_type);
|
||||
(*input_desc)[kJDataType] = GetJsonValue<std::string>(*input_desc, kJDtype);
|
||||
|
|
|
@ -426,8 +426,7 @@ void TbeJsonCreator::GenDescJson(const AnfNodePtr &anf_node, size_t node_out_idx
|
|||
auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx);
|
||||
format = tbe::TbeAdapter::FormatPass(format, ori_shape.size());
|
||||
auto def_format = TbeJsonUtils::IsNeedChangeDefaultFormat(anf_node) ? kOpFormat_NCDHW : kOpFormat_NCHW;
|
||||
format =
|
||||
(def_format == kOpFormat_NCDHW && k3DFormatSet.find(format) == k3DFormatSet.end()) ? kOpFormat_NCDHW : format;
|
||||
format = (def_format == kOpFormat_NCDHW && !IsOneOf3DFormat(format)) ? kOpFormat_NCDHW : format;
|
||||
|
||||
(*output_desc)[kJDataType] = tbe::TypeIdToString(AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx));
|
||||
(*output_desc)[kJDtype] = GetJsonValue<std::string>(*output_desc, kJDataType);
|
||||
|
|
|
@ -70,12 +70,12 @@ std::vector<int64_t> HostCheck::GetFinalInferShape(const AnfNodePtr &node, const
|
|||
}
|
||||
|
||||
auto temp_shape = infer_shape;
|
||||
if (kNoPaddingFormatSet.find(format) == kNoPaddingFormatSet.end() && format != kOpFormat_FRACTAL_ZN_LSTM &&
|
||||
infer_shape.size() < kShape4dDims && k3DFormatSet.find(format) == k3DFormatSet.end()) {
|
||||
if (!IsOneOfNoPaddingFormat(format) && format != kOpFormat_FRACTAL_ZN_LSTM && infer_shape.size() < kShape4dDims &&
|
||||
!IsOneOf3DFormat(format)) {
|
||||
MS_LOG(DEBUG) << "Get Device Shape using a shape size is less than 4 ,should be Padding shape by Default firstly";
|
||||
temp_shape = trans::PaddingShapeTo4dDefault(infer_shape, node);
|
||||
}
|
||||
if (infer_shape.size() != kNcdhwShapeSize && k3DFormatSet.find(format) != k3DFormatSet.end()) {
|
||||
if (infer_shape.size() != kNcdhwShapeSize && IsOneOf3DFormat(format)) {
|
||||
temp_shape = trans::PaddingShapeTo5dDefault(infer_shape, node);
|
||||
}
|
||||
return temp_shape;
|
||||
|
|
|
@ -41,7 +41,6 @@ constexpr auto kPrefixOutput = "output";
|
|||
constexpr char kParamTypeDynamic[] = "dynamic";
|
||||
constexpr char kParamTypeRequre[] = "required";
|
||||
constexpr char kParamTypeOptional[] = "optional";
|
||||
mindspore::HashMap<std::string, std::vector<std::shared_ptr<KernelBuildInfo>>> TbeKernelSelect::select_cache_ = {};
|
||||
|
||||
void TbeMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
|
||||
auto tbe_selecter = TbeKernelSelect(kernel_node, kernel_info_list);
|
||||
|
@ -236,7 +235,7 @@ void TbeKernelSelect::GetAgnosticPatternKernelInfo(const OpInfo &op_info) {
|
|||
MS_LOG(EXCEPTION) << "AgnosticPattern only support one input.";
|
||||
}
|
||||
auto format = AnfAlgo::GetPrevNodeOutputFormat(cnode_ptr_, 0);
|
||||
if (kOpFormatList.find(format) == kOpFormatList.end()) {
|
||||
if (!IsOneOfFormat(format)) {
|
||||
MS_LOG(INFO) << "Got the unknown format " << format;
|
||||
format = kOpFormat_DEFAULT;
|
||||
}
|
||||
|
@ -332,7 +331,7 @@ bool TbeKernelSelect::IsShapeMatchFormat(const ShapeVector &shape, const std::st
|
|||
}
|
||||
static const std::set<std::string> kServerNotSupportFormat = {kOpFormat_NC1HWC0_C04, kOpFormat_FRACTAL_Z_C04};
|
||||
// if format is default, it remarkes support all format
|
||||
if (kOpFormatList.find(format) == kOpFormatList.end()) {
|
||||
if (!IsOneOfFormat(format)) {
|
||||
MS_LOG(EXCEPTION) << "Got the unknown format " << format;
|
||||
}
|
||||
// server not support format with C04 suffix
|
||||
|
@ -346,7 +345,7 @@ bool TbeKernelSelect::IsShapeMatchFormat(const ShapeVector &shape, const std::st
|
|||
}
|
||||
// not support format:
|
||||
// 1 3d formats with shape size > 5
|
||||
if (k3DFormatSet.find(format) != k3DFormatSet.end() && shape.size() > kShape5dDims) {
|
||||
if (IsOneOf3DFormat(format) && shape.size() > kShape5dDims) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
|
|
@ -80,7 +80,7 @@ class TbeKernelSelect {
|
|||
nlohmann::json kernel_json;
|
||||
std::string kernel_hash_name;
|
||||
bool check_cnode;
|
||||
static mindspore::HashMap<std::string, std::vector<std::shared_ptr<KernelBuildInfo>>> select_cache_;
|
||||
inline static mindspore::HashMap<std::string, std::vector<std::shared_ptr<KernelBuildInfo>>> select_cache_ = {};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "include/common/utils/anfalgo.h"
|
||||
#include "include/common/utils/comm_manager.h"
|
||||
#include "include/common/utils/parallel_context.h"
|
||||
#include "runtime/graph_scheduler/graph_compiler.h"
|
||||
#include "plugin/device/ascend/hal/device/ascend_stream_assign.h"
|
||||
#include "plugin/device/ascend/optimizer/ascend_comm_op_reuse.h"
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "plugin/device/ascend/kernel/tbe/tbe_kernel_select/tbe_kernel_select.h"
|
||||
|
||||
namespace mindspore {
|
||||
const std::set<TypeId> kFloatDataTypeSet = {kNumberTypeFloat16, kNumberTypeFloat32};
|
||||
namespace opt {
|
||||
class KernelSelect {
|
||||
public:
|
||||
|
|
|
@ -32,7 +32,7 @@ const AnfNodePtr AddIoFormatAttrFor3DGraph::Process(const FuncGraphPtr &func_gra
|
|||
common::AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node);
|
||||
auto formats = AnfAlgo::GetAllOutputFormats(node);
|
||||
if (std::any_of(formats.begin(), formats.end(),
|
||||
[](const std::string &format) { return k3DFormatSet.find(format) != k3DFormatSet.end(); })) {
|
||||
[](const std::string &format) { return IsOneOf3DFormat(format); })) {
|
||||
common::AnfAlgo::SetNodeAttr(kAttrFormat, MakeValue(kOpFormat_NCDHW), node);
|
||||
}
|
||||
return node;
|
||||
|
|
|
@ -28,6 +28,9 @@
|
|||
namespace mindspore {
|
||||
namespace opt {
|
||||
namespace {
|
||||
const std::set<std::string> kDefaultCompatibleFormat = {kOpFormat_ND, kOpFormat_NCHW, kOpFormat_NHWC, kOpFormat_HWCN,
|
||||
kOpFormat_NCDHW};
|
||||
|
||||
bool CheckFormatForConsistency(const CNodePtr &node, const size_t input_index) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
// get prior node's device output format
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
*/
|
||||
|
||||
#include "plugin/device/ascend/optimizer/format_type/rectify_do_mask_kernel_info.h"
|
||||
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "kernel/kernel_build_info.h"
|
||||
|
@ -67,7 +66,7 @@ void RectifyDoMaskKernelInfo::RectifyKernelInfo(const std::vector<CNodePtr> &do_
|
|||
std::string convert_format;
|
||||
for (const auto &do_mask : do_mask_node_list) {
|
||||
auto do_mask_data_format = AnfAlgo::GetInputFormat(do_mask, 0);
|
||||
if (special_format.empty() && kHWSpecialFormatSet.find(do_mask_data_format) != kHWSpecialFormatSet.end()) {
|
||||
if (special_format.empty() && IsOneOfHWSpecialFormat(do_mask_data_format)) {
|
||||
special_format = do_mask_data_format;
|
||||
}
|
||||
if (format_counter.find(do_mask_data_format) == format_counter.end()) {
|
||||
|
@ -99,7 +98,7 @@ std::string RectifyDoMaskKernelInfo::GetConvertFormat(const std::map<std::string
|
|||
if (counter < iter.second) {
|
||||
convert_format = iter.first;
|
||||
counter = iter.second;
|
||||
} else if (counter == iter.second && kHWSpecialFormatSet.find(iter.first) != kHWSpecialFormatSet.end()) {
|
||||
} else if (counter == iter.second && IsOneOfHWSpecialFormat(iter.first)) {
|
||||
convert_format = iter.first;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,13 +39,13 @@ const AnfNodePtr TransOpFormatRefine::Process(const FuncGraphPtr &func_graph, co
|
|||
auto builder =
|
||||
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
|
||||
MS_EXCEPTION_IF_NULL(builder);
|
||||
if (in_format == kOpFormat_DEFAULT && k3DFormatSet.find(out_format) != k3DFormatSet.end()) {
|
||||
if (in_format == kOpFormat_DEFAULT && IsOneOf3DFormat(out_format)) {
|
||||
builder->SetInputsFormat({kOpFormat_NCDHW});
|
||||
builder->SetOutputsFormat({out_format});
|
||||
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
|
||||
common::AnfAlgo::SetNodeAttr(kAttrSrcFormat, MakeValue(kOpFormat_NCDHW), node);
|
||||
}
|
||||
if (out_format == kOpFormat_DEFAULT && k3DFormatSet.find(in_format) != k3DFormatSet.end()) {
|
||||
if (out_format == kOpFormat_DEFAULT && IsOneOf3DFormat(in_format)) {
|
||||
builder->SetInputsFormat({in_format});
|
||||
builder->SetOutputsFormat({kOpFormat_NCDHW});
|
||||
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
|
||||
namespace mindspore::opt {
|
||||
namespace {
|
||||
const std::set<std::string> kNodeWithSeedOperators = {kGammaOpName, kPoissonOpName, kStandardLaplaceOpName,
|
||||
kStandardNormalOpName, kUniformIntOpName, kUniformRealOpName};
|
||||
tensor::TensorPtr CreateTensor(int64_t seed) {
|
||||
// 1 create seed tensor
|
||||
std::vector<int64_t> indices_shape = {1};
|
||||
|
|
|
@ -14,10 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include "plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_fusion.h"
|
||||
#include "backend/common/optimizer/helper.h"
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "utils/trace_base.h"
|
||||
#include "plugin/device/ascend/optimizer/ascend_helper.h"
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
const BaseRef AdamApplyOneFusion::DefinePattern() const {
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
#include "plugin/device/ascend/optimizer/ir_fusion/adam_apply_one_with_decay_rule.h"
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "plugin/device/ascend/optimizer/ascend_helper.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "ir/primitive.h"
|
||||
#include "backend/common/optimizer/helper.h"
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "include/common/utils/utils.h"
|
||||
#include "backend/common/optimizer/helper.h"
|
||||
#include "mindspore/core/ops/core_ops.h"
|
||||
#include "plugin/device/ascend/optimizer/ascend_helper.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "plugin/device/ascend/optimizer/ir_fusion/lamb_next_mv_with_decay_rule.h"
|
||||
#include <utility>
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "plugin/device/ascend/optimizer/ascend_helper.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "frontend/optimizer/opt.h"
|
||||
#include "utils/trace_base.h"
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
#include "plugin/device/ascend/optimizer/ascend_helper.h"
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "frontend/optimizer/opt.h"
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "plugin/device/ascend/optimizer/ir_fusion/lamb_next_right_rule.h"
|
||||
#include <vector>
|
||||
#include "backend/common/optimizer/helper.h"
|
||||
#include "plugin/device/ascend/optimizer/ascend_helper.h"
|
||||
#include "utils/trace_base.h"
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "plugin/device/ascend/optimizer/ascend_helper.h"
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "ir/primitive.h"
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <algorithm>
|
||||
#include "include/common/utils/utils.h"
|
||||
#include "mindspore/core/ops/core_ops.h"
|
||||
#include "plugin/device/ascend/optimizer/ascend_helper.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
|
|
|
@ -439,7 +439,7 @@ std::pair<std::string, ExceptionType> SetKernelInfoWithMsg(const CNodePtr &kerne
|
|||
const std::string &op_name = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (IsPrimitiveCNode(kernel_node, prim::kPrimCustom)) {
|
||||
auto tp = common::AnfAlgo::GetNodeAttr<std::string>(kernel_node, kAttrFuncType);
|
||||
if (kCustomTypeAkg.find(tp) != kCustomTypeAkg.end()) {
|
||||
if (IsOneOfCustomAkgType(tp)) {
|
||||
UpdateCustomKernelBuildInfo(kernel_node, true);
|
||||
return {};
|
||||
}
|
||||
|
|
|
@ -345,8 +345,7 @@ bool CPUKernelExecutor::LaunchKernel(const CNodePtr &kernel, const std::vector<A
|
|||
|
||||
// Some CPU kernels can't initialize kernel and launch kernel in different thread, so reinitialize the kernels before
|
||||
// launch.
|
||||
if (kOpNotSupportMultiThreadExecList.find(common::AnfAlgo::GetCNodeName(kernel)) !=
|
||||
kOpNotSupportMultiThreadExecList.end()) {
|
||||
if (IsOneOfNotSupportMultiThreadExec(common::AnfAlgo::GetCNodeName(kernel))) {
|
||||
auto cpu_kernel_mod = dynamic_cast<kernel::DeprecatedNativeCpuKernelMod *>(kernel_mod);
|
||||
MS_EXCEPTION_IF_NULL(cpu_kernel_mod);
|
||||
cpu_kernel_mod->InitKernel(kernel);
|
||||
|
|
|
@ -89,8 +89,6 @@ std::vector<KernelAttr> NativeCpuKernelMod::GetSupportFromOpLib(const std::strin
|
|||
return support_kernel_attrs;
|
||||
}
|
||||
|
||||
mindspore::HashMap<std::string, std::vector<KernelAttr>> NativeCpuKernelMod::support_map_{};
|
||||
|
||||
int DeprecatedNativeCpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
|
||||
const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
|
|
|
@ -169,7 +169,7 @@ class BACKEND_EXPORT NativeCpuKernelMod : public CpuKernelMod {
|
|||
private:
|
||||
std::vector<KernelAttr> GetAllSupportedList(const std::string &kernel_name);
|
||||
std::vector<KernelAttr> GetSupportFromOpLib(const std::string &kernel_name) const;
|
||||
static mindspore::HashMap<std::string, std::vector<KernelAttr>> support_map_;
|
||||
inline static mindspore::HashMap<std::string, std::vector<KernelAttr>> support_map_;
|
||||
};
|
||||
|
||||
class BACKEND_EXPORT DeprecatedNativeCpuKernelMod : public NativeCpuKernelMod {
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
include_directories(${CUDNN_INCLUDE_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS} ${CUPTI_INCLUDE_DIRS})
|
||||
add_subdirectory(kernel/cuda_impl)
|
||||
|
||||
########### mindspore_gpu.so #####
|
||||
set(GPU_SUB_COMP
|
||||
hal/device
|
||||
hal/hardware
|
||||
hal/profiler
|
||||
kernel
|
||||
optimizer
|
||||
)
|
||||
|
||||
if(ENABLE_GPU)
|
||||
foreach(g_comp ${GPU_SUB_COMP})
|
||||
add_subdirectory(${g_comp})
|
||||
string(REPLACE "/" "_" sub ${g_comp})
|
||||
if(TARGET _mindspore_plugin_device_gpu_${sub}_obj)
|
||||
list(APPEND GPU_SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_plugin_device_gpu_${sub}_obj>)
|
||||
add_dependencies(_mindspore_plugin_device_gpu_${sub}_obj proto_input)
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
add_library(mindspore_gpu SHARED ${GPU_SUB_OBJECTS_SRC})
|
||||
target_link_libraries(mindspore_gpu PUBLIC mindspore_backend_common)
|
||||
target_link_libraries(mindspore_gpu PRIVATE mindspore_core mindspore_common proto_input mindspore::protobuf)
|
||||
target_link_libraries(mindspore_gpu PRIVATE securec)
|
||||
set_target_properties(mindspore_gpu PROPERTIES INSTALL_RPATH $ORIGIN)
|
||||
target_link_libraries(mindspore_gpu PRIVATE mindspore::dnnl mindspore::mkldnn nnacl)
|
||||
target_link_libraries(mindspore_gpu PRIVATE mindspore::ssl mindspore::crypto)
|
||||
target_link_libraries(mindspore_gpu PRIVATE mindspore::event mindspore::event_pthreads
|
||||
mindspore::event_openssl -Wl,--no-as-needed mindspore::event_core ps_cache)
|
||||
|
||||
if(ENABLE_GPU)
|
||||
message("add gpu lib to mindspore_gpu")
|
||||
target_link_libraries(mindspore_gpu PRIVATE cuda_ops
|
||||
${CUBLAS_LIBRARY_PATH}
|
||||
${CUDA_PATH}/lib64/libcurand.so
|
||||
${CUDNN_LIBRARY_PATH}
|
||||
${CUDA_PATH}/lib64/libcudart.so
|
||||
${CUDA_PATH}/lib64/stubs/libcuda.so
|
||||
${CUDA_PATH}/lib64/libcusolver.so
|
||||
${CUDA_PATH}/lib64/libcufft.so
|
||||
${CUDA_PATH}/lib64/libcusparse.so)
|
||||
endif()
|
||||
|
||||
if(ENABLE_DEBUGGER)
|
||||
# debugger: link grpc
|
||||
if(ENABLE_GPU)
|
||||
target_link_libraries(mindspore_gpu PRIVATE -Wl,--no-as-needed mindspore::grpc++)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
if(ENABLE_MPI)
|
||||
set_target_properties(_ms_mpi PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
|
||||
set_target_properties(nvidia_collective PROPERTIES INSTALL_RPATH ${ORIGIN_PATH})
|
||||
set_target_properties(gpu_collective PROPERTIES INSTALL_RPATH ${ORIGIN_PATH})
|
||||
endif()
|
||||
endif()
|
|
@ -3,11 +3,29 @@ if("${ENABLE_HIDDEN}" STREQUAL "OFF")
|
|||
string(REPLACE " -fvisibility=hidden" " -fvisibility=default" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
|
||||
list(APPEND DEVICE_SRC_LIST "ps/gpu_ps_cache.cc")
|
||||
if(ENABLE_GPU)
|
||||
list(APPEND DEVICE_SRC_LIST "distribution/collective_init.cc")
|
||||
list(APPEND DEVICE_SRC_LIST "gpu_comm_manager.cc")
|
||||
else()
|
||||
list(APPEND DEVICE_SRC_LIST "distribution/collective_fake_init.cc")
|
||||
list(APPEND DEVICE_SRC_LIST ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/common/mem_reuse/mem_reuse.cc)
|
||||
list(APPEND DEVICE_SRC_LIST ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/common/mem_reuse/mem_swap_manager.cc)
|
||||
list(APPEND DEVICE_SRC_LIST ${CMAKE_SOURCE_DIR}/mindspore/ccsrc/runtime/data_queue/data_queue.h)
|
||||
file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
list(REMOVE_ITEM DEVICE_SRC_LIST
|
||||
"mpi/mpi_initializer.cc"
|
||||
"distribution/collective_wrapper.cc"
|
||||
"distribution/mpi_wrapper.cc"
|
||||
"distribution/nccl_wrapper.cc"
|
||||
"trt_loader.cc")
|
||||
if(NOT ${TENSORRT_HOME} STREQUAL "")
|
||||
find_path(TENSORRT_HOME_INCLUDE NvInfer.h HINTS ${TENSORRT_HOME}/include)
|
||||
if(TENSORRT_HOME_INCLUDE STREQUAL TENSORRT_HOME_INCLUDE-NOTFOUND)
|
||||
message(FATAL_ERROR "Tensor-RT dir not exist ${TENSORRT_HOME}")
|
||||
endif()
|
||||
message("Enable GPU inference. Tensor-RT include dir: ${TENSORRT_HOME_INCLUDE}")
|
||||
set(ENABLE_GPU_INFER TRUE)
|
||||
add_compile_definitions(ENABLE_GPU_INFER)
|
||||
include_directories(${TENSORRT_HOME_INCLUDE})
|
||||
list(APPEND DEVICE_SRC_LIST ${CMAKE_CURRENT_SOURCE_DIR}/trt_loader.cc)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
|
@ -19,8 +37,6 @@ if(ENABLE_GPU)
|
|||
endif()
|
||||
|
||||
file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc" "*.cu")
|
||||
|
||||
#set(GPU_QUEUE_SRCS "blocking_queue.cc" "gpu_buffer_mgr.cc" "data_queue.cc")
|
||||
set(GPU_COLLECTIVE_SRCS "distribution/collective_wrapper.cc"
|
||||
"distribution/mpi_wrapper.cc"
|
||||
"distribution/nccl_wrapper.cc")
|
||||
|
@ -36,8 +52,6 @@ if(ENABLE_GPU)
|
|||
target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl)
|
||||
target_link_libraries(_ms_mpi PRIVATE gpu_collective)
|
||||
endif()
|
||||
|
||||
# add_library(_mindspore_device_cuda_obj OBJECT ${CUDA_SRC_LIST})
|
||||
endif()
|
||||
|
||||
set_property(SOURCE ${DEVICE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
|
||||
|
|
|
@ -24,10 +24,11 @@
|
|||
#include <functional>
|
||||
#include "runtime/data_queue/data_queue.h"
|
||||
#include "runtime/hardware/device_context_manager.h"
|
||||
#include "include/backend/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
class GpuDataQueueDynamic : public DataQueue {
|
||||
class BACKEND_EXPORT GpuDataQueueDynamic : public DataQueue {
|
||||
public:
|
||||
explicit GpuDataQueueDynamic(const size_t capacity);
|
||||
virtual ~GpuDataQueueDynamic() = default;
|
||||
|
@ -49,7 +50,7 @@ class GpuDataQueueDynamic : public DataQueue {
|
|||
std::unique_ptr<NodeInfo[]> node_info_;
|
||||
};
|
||||
|
||||
class GpuQueue : public DataQueue {
|
||||
class BACKEND_EXPORT GpuQueue : public DataQueue {
|
||||
public:
|
||||
GpuQueue(void *addr, const std::vector<size_t> &shape, const size_t &capacity);
|
||||
virtual ~GpuQueue();
|
||||
|
|
|
@ -193,7 +193,7 @@ bool SelectCustomKernel(const CNodePtr &kernel_node, const std::shared_ptr<Kerne
|
|||
kernel::Factory<kernel::NativeGpuKernelMod>::Instance().Register(
|
||||
op_name, []() { return std::make_shared<kernel::CustomAOTGpuKernelMod>(); });
|
||||
}
|
||||
} else if (kCustomTypeAkg.find(func_type) != kCustomTypeAkg.end()) {
|
||||
} else if (IsOneOfCustomAkgType(func_type)) {
|
||||
*kernel_type = KernelType::AKG_KERNEL;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Unsupported func type [" << func_type << "] for Custom op [" << op_name << "] on GPU";
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ps/ps_cache/gpu/gpu_ps_cache.h"
|
||||
#include "plugin/device/gpu/hal/device/ps/gpu_ps_cache.h"
|
||||
#include "ps/ps_cache/ps_cache_factory.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/hash_impl.cuh"
|
||||
#include "plugin/device/gpu/hal/device/gpu_common.h"
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_PS_PS_CACHE_GPU_GPU_PS_CACHE_H_
|
||||
#define MINDSPORE_CCSRC_PS_PS_CACHE_GPU_GPU_PS_CACHE_H_
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <memory>
|
||||
|
@ -47,4 +47,4 @@ class GPUPsCache : public PsCacheBasic {
|
|||
} // namespace gpu
|
||||
} // namespace ps
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_PS_PS_CACHE_GPU_GPU_PS_CACHE_H_
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_HAL_DEVICE_PS_GPU_PS_CACHE_H_
|
|
@ -659,8 +659,7 @@ KernelGraphPtr GPUSession::BuildOpImpl(const BackendOpRunInfoPtr &op_run_info, c
|
|||
const std::vector<int64_t> &tensors_mask) {
|
||||
// Check if the graph cache exists.
|
||||
auto it = run_op_graphs_.find(graph_info);
|
||||
if (it != run_op_graphs_.end() &&
|
||||
kOpCacheBlackList.find(op_run_info->base_op_run_info.op_name) == kOpCacheBlackList.end()) {
|
||||
if (it != run_op_graphs_.end() && !IsOneOfCacheBlackList(op_run_info->base_op_run_info.op_name)) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
|
@ -718,7 +717,7 @@ void GPUSession::RunOpImpl(const GraphInfo &graph_info, const BackendOpRunInfoPt
|
|||
UpdateOutputAbstract(kernel_graph, op_run_info);
|
||||
}
|
||||
RunOpClearMemory(kernel_graph.get());
|
||||
if (kOpCacheBlackList.find(op_run_info->base_op_run_info.op_name) != kOpCacheBlackList.end()) {
|
||||
if (IsOneOfCacheBlackList(op_run_info->base_op_run_info.op_name)) {
|
||||
run_op_graphs_.erase(graph_info);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,8 +3,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
|
|||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cu")
|
||||
|
||||
file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
file(GLOB_RECURSE _AKG_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "akg/*.cc")
|
||||
list(REMOVE_ITEM GPU_SRC_LIST ${_AKG_SRC_LIST})
|
||||
|
|
|
@ -12,11 +12,12 @@ endif()
|
|||
|
||||
set_property(SOURCE ${CUDA_OPS_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_KERNEL)
|
||||
if(ENABLE_GPU)
|
||||
add_library(cuda_common_obj OBJECT cuda_common.cc cuda_device_info.cc)
|
||||
add_library(cuda_common_obj OBJECT cuda_ops/cuda_common.cc cuda_ops/cuda_device_info.cc)
|
||||
target_compile_options(cuda_common_obj PRIVATE "-std=c++17")
|
||||
cuda_add_library(cuda_ops SHARED ${CUDA_OPS_SRC_LIST} $<TARGET_OBJECTS:cuda_common_obj>)
|
||||
message("add gpu lib to cuda_ops")
|
||||
target_link_libraries(cuda_ops mindspore_core cublas
|
||||
target_link_libraries(cuda_ops mindspore_core
|
||||
${CUBLAS_LIBRARY_PATH}
|
||||
${CUDA_PATH}/lib64/libcurand.so
|
||||
${CUDNN_LIBRARY_PATH}
|
||||
${CUDA_PATH}/lib64/libcudart.so
|
|
@ -69,10 +69,11 @@ __global__ void NormCalHighPrecisionKernel(const float *middle_output, T *output
|
|||
}
|
||||
|
||||
template <>
|
||||
void CalLpNorm<float>(const float *input, const size_t *input_shape, size_t input_shape_length, size_t input_elements,
|
||||
const size_t *output_axis, const size_t *output_stride, size_t output_shape_length,
|
||||
size_t output_elements, float p, float eps, float *middle_output, float *output,
|
||||
const uint32_t &device_id, cudaStream_t cuda_stream) {
|
||||
CUDA_LIB_EXPORT void CalLpNorm<float>(const float *input, const size_t *input_shape, size_t input_shape_length,
|
||||
size_t input_elements, const size_t *output_axis, const size_t *output_stride,
|
||||
size_t output_shape_length, size_t output_elements, float p, float eps,
|
||||
float *middle_output, float *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream) {
|
||||
LpCalKernel<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, input_shape, input_shape_length, input_elements, output_axis, output_stride, output_shape_length, p, eps,
|
||||
output);
|
||||
|
@ -81,10 +82,11 @@ void CalLpNorm<float>(const float *input, const size_t *input_shape, size_t inpu
|
|||
}
|
||||
|
||||
template <>
|
||||
void CalLpNorm<half>(const half *input, const size_t *input_shape, size_t input_shape_length, size_t input_elements,
|
||||
const size_t *output_axis, const size_t *output_stride, size_t output_shape_length,
|
||||
size_t output_elements, float p, float eps, float *middle_output, half *output,
|
||||
const uint32_t &device_id, cudaStream_t cuda_stream) {
|
||||
CUDA_LIB_EXPORT void CalLpNorm<half>(const half *input, const size_t *input_shape, size_t input_shape_length,
|
||||
size_t input_elements, const size_t *output_axis, const size_t *output_stride,
|
||||
size_t output_shape_length, size_t output_elements, float p, float eps,
|
||||
float *middle_output, half *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream) {
|
||||
LpCalKernel<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, input_shape, input_shape_length, input_elements, output_axis, output_stride, output_shape_length, p, eps,
|
||||
middle_output);
|
||||
|
|
|
@ -133,9 +133,9 @@ __global__ void MultiMarginLoss_backward_kernel_half(half *gradInput, const half
|
|||
|
||||
// namespace str
|
||||
template <typename T>
|
||||
void MultiMarginLossGrad(int64_t p, float margin, int64_t reduction, int nframe, int dim, const T *output_grad,
|
||||
const T *input, const int64_t *target, const T *weight, T *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream) {
|
||||
CUDA_LIB_EXPORT void MultiMarginLossGrad(int64_t p, float margin, int64_t reduction, int nframe, int dim,
|
||||
const T *output_grad, const T *input, const int64_t *target, const T *weight,
|
||||
T *output, const uint32_t &device_id, cudaStream_t cuda_stream) {
|
||||
dim3 blocks1(nframe);
|
||||
dim3 threads1(MULTIMARGIN_THREADS);
|
||||
bool reduce = false;
|
||||
|
@ -158,9 +158,10 @@ void MultiMarginLossGrad(int64_t p, float margin, int64_t reduction, int nframe,
|
|||
|
||||
// namespace str
|
||||
template <>
|
||||
void MultiMarginLossGrad(int64_t p, float margin, int64_t reduction, int nframe, int dim, const half *output_grad,
|
||||
const half *input, const int64_t *target, const half *weight, half *output,
|
||||
const uint32_t &device_id, cudaStream_t cuda_stream) {
|
||||
CUDA_LIB_EXPORT void MultiMarginLossGrad(int64_t p, float margin, int64_t reduction, int nframe, int dim,
|
||||
const half *output_grad, const half *input, const int64_t *target,
|
||||
const half *weight, half *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream) {
|
||||
dim3 blocks1(nframe);
|
||||
dim3 threads1(MULTIMARGIN_THREADS);
|
||||
bool reduce = false;
|
||||
|
|
|
@ -286,9 +286,9 @@ __global__ void MultiMarginLossReduceKernel(int dim, T *output) {
|
|||
|
||||
// namespace str
|
||||
template <typename T>
|
||||
void MultiMarginLoss(int64_t p, float margin, int64_t reduction, int nframe, int dim, const T *input,
|
||||
const int64_t *target, const T *weight, T *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream) {
|
||||
CUDA_LIB_EXPORT void MultiMarginLoss(int64_t p, float margin, int64_t reduction, int nframe, int dim, const T *input,
|
||||
const int64_t *target, const T *weight, T *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream) {
|
||||
dim3 blocks(nframe);
|
||||
dim3 threads(MULTIMARGIN_THREADS);
|
||||
bool sizeAverage = false;
|
||||
|
@ -311,9 +311,9 @@ void MultiMarginLoss(int64_t p, float margin, int64_t reduction, int nframe, int
|
|||
|
||||
// namespace str
|
||||
template <>
|
||||
void MultiMarginLoss(int64_t p, float margin, int64_t reduction, int nframe, int dim, const half *input,
|
||||
const int64_t *target, const half *weight, half *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream) {
|
||||
CUDA_LIB_EXPORT void MultiMarginLoss(int64_t p, float margin, int64_t reduction, int nframe, int dim, const half *input,
|
||||
const int64_t *target, const half *weight, half *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream) {
|
||||
dim3 blocks(nframe);
|
||||
dim3 threads(128);
|
||||
bool sizeAverage = false;
|
||||
|
|
|
@ -58,8 +58,7 @@ __global__ void CalNormValFun1(const Complex<double> *input, size_t input_elemen
|
|||
}
|
||||
|
||||
__global__ void CalNormValFun2(float *norm_value, int p, size_t axis_size, float max_norm) {
|
||||
for (size_t index = blockIdx.x * blockDim.x + threadIdx.x; index < (axis_size);
|
||||
index += blockDim.x * gridDim.x) {
|
||||
for (size_t index = blockIdx.x * blockDim.x + threadIdx.x; index < (axis_size); index += blockDim.x * gridDim.x) {
|
||||
float temp = pow(norm_value[index], static_cast<float>(1.0 / p));
|
||||
if (temp > max_norm) {
|
||||
norm_value[index] = max_norm / temp;
|
||||
|
@ -84,61 +83,61 @@ __global__ void CalNormValFun3(const T *input, size_t inner_size, size_t axis_si
|
|||
}
|
||||
|
||||
template <>
|
||||
void CalRenorm<half>(const half *input, size_t input_elements, size_t inner_size, size_t axis_size, int p,
|
||||
float *norm_value, half *output, const uint32_t &device_id, cudaStream_t cuda_stream,
|
||||
float max_norm) {
|
||||
CUDA_LIB_EXPORT void CalRenorm<half>(const half *input, size_t input_elements, size_t inner_size, size_t axis_size,
|
||||
int p, float *norm_value, half *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream, float max_norm) {
|
||||
CalNormValFun1<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, input_elements, inner_size, axis_size, p, norm_value);
|
||||
CalNormValFun2<<<CUDA_BLOCKS(device_id, axis_size), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
norm_value, p, axis_size, max_norm);
|
||||
CalNormValFun2<<<CUDA_BLOCKS(device_id, axis_size), CUDA_THREADS(device_id), 0, cuda_stream>>>(norm_value, p,
|
||||
axis_size, max_norm);
|
||||
CalNormValFun3<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, inner_size, axis_size, input_elements, output, norm_value);
|
||||
}
|
||||
|
||||
template <>
|
||||
void CalRenorm<float>(const float *input, size_t input_elements, size_t inner_size, size_t axis_size, int p,
|
||||
float *norm_value, float *output, const uint32_t &device_id, cudaStream_t cuda_stream,
|
||||
float max_norm) {
|
||||
CUDA_LIB_EXPORT void CalRenorm<float>(const float *input, size_t input_elements, size_t inner_size, size_t axis_size,
|
||||
int p, float *norm_value, float *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream, float max_norm) {
|
||||
CalNormValFun1<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, input_elements, inner_size, axis_size, p, norm_value);
|
||||
CalNormValFun2<<<CUDA_BLOCKS(device_id, axis_size), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
norm_value, p, axis_size, max_norm);
|
||||
CalNormValFun2<<<CUDA_BLOCKS(device_id, axis_size), CUDA_THREADS(device_id), 0, cuda_stream>>>(norm_value, p,
|
||||
axis_size, max_norm);
|
||||
CalNormValFun3<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, inner_size, axis_size, input_elements, output, norm_value);
|
||||
}
|
||||
|
||||
template <>
|
||||
void CalRenorm<double>(const double *input, size_t input_elements, size_t inner_size, size_t axis_size, int p,
|
||||
float *norm_value, double *output, const uint32_t &device_id, cudaStream_t cuda_stream,
|
||||
float max_norm) {
|
||||
CUDA_LIB_EXPORT void CalRenorm<double>(const double *input, size_t input_elements, size_t inner_size, size_t axis_size,
|
||||
int p, float *norm_value, double *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream, float max_norm) {
|
||||
CalNormValFun1<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, input_elements, inner_size, axis_size, p, norm_value);
|
||||
CalNormValFun2<<<CUDA_BLOCKS(device_id, axis_size), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
norm_value, p, axis_size, max_norm);
|
||||
CalNormValFun2<<<CUDA_BLOCKS(device_id, axis_size), CUDA_THREADS(device_id), 0, cuda_stream>>>(norm_value, p,
|
||||
axis_size, max_norm);
|
||||
CalNormValFun3<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, inner_size, axis_size, input_elements, output, norm_value);
|
||||
}
|
||||
|
||||
template <>
|
||||
void CalRenorm<Complex<float>>(const Complex<float> *input, size_t input_elements, size_t inner_size, size_t axis_size,
|
||||
int p, float *norm_value, Complex<float> *output, const uint32_t &device_id,
|
||||
cudaStream_t cuda_stream, float max_norm) {
|
||||
CUDA_LIB_EXPORT void CalRenorm<Complex<float>>(const Complex<float> *input, size_t input_elements, size_t inner_size,
|
||||
size_t axis_size, int p, float *norm_value, Complex<float> *output,
|
||||
const uint32_t &device_id, cudaStream_t cuda_stream, float max_norm) {
|
||||
CalNormValFun1<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, input_elements, inner_size, axis_size, p, norm_value);
|
||||
CalNormValFun2<<<CUDA_BLOCKS(device_id, axis_size), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
norm_value, p, axis_size, max_norm);
|
||||
CalNormValFun2<<<CUDA_BLOCKS(device_id, axis_size), CUDA_THREADS(device_id), 0, cuda_stream>>>(norm_value, p,
|
||||
axis_size, max_norm);
|
||||
CalNormValFun3<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, inner_size, axis_size, input_elements, output, norm_value);
|
||||
}
|
||||
|
||||
template <>
|
||||
void CalRenorm<Complex<double>>(const Complex<double> *input, size_t input_elements, size_t inner_size,
|
||||
size_t axis_size, int p, float *norm_value, Complex<double> *output,
|
||||
const uint32_t &device_id, cudaStream_t cuda_stream, float max_norm) {
|
||||
CUDA_LIB_EXPORT void CalRenorm<Complex<double>>(const Complex<double> *input, size_t input_elements, size_t inner_size,
|
||||
size_t axis_size, int p, float *norm_value, Complex<double> *output,
|
||||
const uint32_t &device_id, cudaStream_t cuda_stream, float max_norm) {
|
||||
CalNormValFun1<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, input_elements, inner_size, axis_size, p, norm_value);
|
||||
CalNormValFun2<<<CUDA_BLOCKS(device_id, axis_size), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
norm_value, p, axis_size, max_norm);
|
||||
CalNormValFun2<<<CUDA_BLOCKS(device_id, axis_size), CUDA_THREADS(device_id), 0, cuda_stream>>>(norm_value, p,
|
||||
axis_size, max_norm);
|
||||
CalNormValFun3<<<CUDA_BLOCKS(device_id, input_elements), CUDA_THREADS(device_id), 0, cuda_stream>>>(
|
||||
input, inner_size, axis_size, input_elements, output, norm_value);
|
||||
}
|
||||
|
|
|
@ -28,16 +28,16 @@ __global__ void ScaleGrad(const int nums, const T *x0, const S &x1, T *y) {
|
|||
}
|
||||
|
||||
template <typename T, typename S>
|
||||
void ScaleGradKernel(const int &nums, const T *x0, const S &x1, T *y, cudaStream_t stream) {
|
||||
CUDA_LIB_EXPORT void ScaleGradKernel(const int &nums, const T *x0, const S &x1, T *y, cudaStream_t stream) {
|
||||
ScaleGrad<<<(nums + 255) / 256, 256, 0, stream>>>(nums, x0, x1, y);
|
||||
return;
|
||||
}
|
||||
|
||||
template void ScaleGradKernel<float, float>(const int &nums, const float *x0, const float &x1, float *y,
|
||||
cudaStream_t stream);
|
||||
template void ScaleGradKernel<float, half>(const int &nums, const float *x0, const half &x1, float *y,
|
||||
cudaStream_t stream);
|
||||
template void ScaleGradKernel<half, float>(const int &nums, const half *x0, const float &x1, half *y,
|
||||
cudaStream_t stream);
|
||||
template void ScaleGradKernel<half, half>(const int &nums, const half *x0, const half &x1, half *y,
|
||||
cudaStream_t stream);
|
||||
template CUDA_LIB_EXPORT void ScaleGradKernel<float, float>(const int &nums, const float *x0, const float &x1, float *y,
|
||||
cudaStream_t stream);
|
||||
template CUDA_LIB_EXPORT void ScaleGradKernel<float, half>(const int &nums, const float *x0, const half &x1, float *y,
|
||||
cudaStream_t stream);
|
||||
template CUDA_LIB_EXPORT void ScaleGradKernel<half, float>(const int &nums, const half *x0, const float &x1, half *y,
|
||||
cudaStream_t stream);
|
||||
template CUDA_LIB_EXPORT void ScaleGradKernel<half, half>(const int &nums, const half *x0, const half &x1, half *y,
|
||||
cudaStream_t stream);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_SCALE_GRAD_IMPL_H_
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
template <typename T, typename S>
|
||||
void ScaleGradKernel(const int &nums, const T *x0, const S &x1, T *y, cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPLIT_H_
|
||||
|
|
|
@ -39,9 +39,9 @@ __global__ void DiscountedReturnKernel(const int timestep, const int num_env, co
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, const float &gamma,
|
||||
const T *reward, const bool *done, const T *last_value, T *discouted_return,
|
||||
cudaStream_t stream) {
|
||||
CUDA_LIB_EXPORT void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element,
|
||||
const float &gamma, const T *reward, const bool *done, const T *last_value,
|
||||
T *discouted_return, cudaStream_t stream) {
|
||||
// Every block process M element, 256 is a common tile size.
|
||||
const int element_per_step = num_env * num_element;
|
||||
const int element_per_block = std::min(256, element_per_step);
|
||||
|
@ -51,9 +51,9 @@ void DiscountedReturn(const int ×tep, const int &num_env, const int &num_el
|
|||
done, last_value, discouted_return);
|
||||
}
|
||||
|
||||
template void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, const float &gamma,
|
||||
const float *reward, const bool *done, const float *last_value, float *discouted_return,
|
||||
cudaStream_t stream);
|
||||
template void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, const float &gamma,
|
||||
const half *reward, const bool *done, const half *last_value, half *discouted_return,
|
||||
cudaStream_t stream);
|
||||
template CUDA_LIB_EXPORT void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element,
|
||||
const float &gamma, const float *reward, const bool *done,
|
||||
const float *last_value, float *discouted_return, cudaStream_t stream);
|
||||
template CUDA_LIB_EXPORT void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element,
|
||||
const float &gamma, const half *reward, const bool *done,
|
||||
const half *last_value, half *discouted_return, cudaStream_t stream);
|
||||
|
|
|
@ -16,8 +16,9 @@
|
|||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_DISCONTED_RETURN_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_DISCONTED_RETURN_IMPL_H_
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
template <typename T>
|
||||
void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element, const float &gamma,
|
||||
const T *reward, const bool *done, const T *last_value, T *discouted_return, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void DiscountedReturn(const int ×tep, const int &num_env, const int &num_element,
|
||||
const float &gamma, const T *reward, const bool *done, const T *last_value,
|
||||
T *discouted_return, cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_DISCONTED_RETURN_IMPL_H_
|
||||
|
|
|
@ -18,21 +18,22 @@
|
|||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMP_PRIORITY_REPLAY_BUFFER_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
struct SumTree {
|
||||
float sum;
|
||||
float min;
|
||||
};
|
||||
|
||||
void SumTreeInit(SumTree *tree, float *max_priority, const size_t &capacity, cudaStream_t stream);
|
||||
void InitRandState(const size_t &batch_size, const uint64_t &seed, curandState *state, cudaStream_t stream);
|
||||
void SumTreePush(SumTree *tree, const float &alpha, const size_t &idx, const size_t &capacity, float *priority,
|
||||
float *max_priority, cudaStream_t stream);
|
||||
void SumTreeSample(SumTree *tree, curandState *state, const size_t &capacity, float *beta, const size_t &batch_size,
|
||||
size_t *indices, float *weights, cudaStream_t stream);
|
||||
void SumTreeUpdate(SumTree *tree, const size_t &capacity, const float &alpha, float *max_priority, size_t *indices,
|
||||
float *priorities, const size_t &batch_size, cudaStream_t stream);
|
||||
void FifoSlice(const uint8_t *input, const size_t *indice, uint8_t *output, size_t batch_size, size_t column,
|
||||
cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void SumTreeInit(SumTree *tree, float *max_priority, const size_t &capacity, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void InitRandState(const size_t &batch_size, const uint64_t &seed, curandState *state,
|
||||
cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void SumTreePush(SumTree *tree, const float &alpha, const size_t &idx, const size_t &capacity,
|
||||
float *priority, float *max_priority, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void SumTreeSample(SumTree *tree, curandState *state, const size_t &capacity, float *beta,
|
||||
const size_t &batch_size, size_t *indices, float *weights, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void SumTreeUpdate(SumTree *tree, const size_t &capacity, const float &alpha, float *max_priority,
|
||||
size_t *indices, float *priorities, const size_t &batch_size, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void FifoSlice(const uint8_t *input, const size_t *indice, uint8_t *output, size_t batch_size,
|
||||
size_t column, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMP_PRIORITY_REPLAY_BUFFER_IMPL_H_
|
||||
|
|
|
@ -18,21 +18,24 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_RL_BUFFER_IMPL_H_
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
void BufferAppend(const int64_t capacity, const size_t size, const int *index, const int exp_batch,
|
||||
unsigned char *buffer, const unsigned char *exp, cudaStream_t cuda_stream);
|
||||
void IncreaseCount(const int64_t capacity, const int exp_batch, int *count, int *head, int *index,
|
||||
cudaStream_t cuda_stream);
|
||||
void ReMappingIndex(const int *count, const int *head, const int *origin_index, int *index, cudaStream_t cuda_stream);
|
||||
void BufferGetItem(const size_t size, const int *index, const size_t one_exp_len, const unsigned char *buffer,
|
||||
unsigned char *out, cudaStream_t cuda_stream);
|
||||
void CheckBatchSize(const int *count, const int *head, const size_t batch_size, const int64_t capacity,
|
||||
cudaStream_t cuda_stream);
|
||||
void BufferSample(const size_t size, const size_t one_element, const unsigned int *index, const unsigned char *buffer,
|
||||
unsigned char *out, cudaStream_t cuda_stream);
|
||||
void RandomGen(const int size, curandState *globalState, unsigned int *value, unsigned int *key, cudaStream_t stream);
|
||||
void RandInit(const int size, const int seed, curandState *state, cudaStream_t stream);
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
CUDA_LIB_EXPORT void BufferAppend(const int64_t capacity, const size_t size, const int *index, const int exp_batch,
|
||||
unsigned char *buffer, const unsigned char *exp, cudaStream_t cuda_stream);
|
||||
CUDA_LIB_EXPORT void IncreaseCount(const int64_t capacity, const int exp_batch, int *count, int *head, int *index,
|
||||
cudaStream_t cuda_stream);
|
||||
CUDA_LIB_EXPORT void ReMappingIndex(const int *count, const int *head, const int *origin_index, int *index,
|
||||
cudaStream_t cuda_stream);
|
||||
CUDA_LIB_EXPORT void BufferGetItem(const size_t size, const int *index, const size_t one_exp_len,
|
||||
const unsigned char *buffer, unsigned char *out, cudaStream_t cuda_stream);
|
||||
CUDA_LIB_EXPORT void CheckBatchSize(const int *count, const int *head, const size_t batch_size, const int64_t capacity,
|
||||
cudaStream_t cuda_stream);
|
||||
CUDA_LIB_EXPORT void BufferSample(const size_t size, const size_t one_element, const unsigned int *index,
|
||||
const unsigned char *buffer, unsigned char *out, cudaStream_t cuda_stream);
|
||||
CUDA_LIB_EXPORT void RandomGen(const int size, curandState *globalState, unsigned int *value, unsigned int *key,
|
||||
cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void RandInit(const int size, const int seed, curandState *state, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void RandomGenUniform(const int size, curandState *globalState, const int up_bound, T *indexes,
|
||||
cudaStream_t cuda_stream);
|
||||
CUDA_LIB_EXPORT void RandomGenUniform(const int size, curandState *globalState, const int up_bound, T *indexes,
|
||||
cudaStream_t cuda_stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_ADAM_IMPL_H_
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_TAG_ENV_IMPL_H_
|
||||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
constexpr int kFeatureNum = 4;
|
||||
constexpr int kPartiallyObsFeatureNum = 6;
|
||||
|
@ -46,14 +47,16 @@ struct AgentState {
|
|||
int *time_step;
|
||||
};
|
||||
|
||||
void InitEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *state,
|
||||
cudaStream_t stream);
|
||||
void ResetEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state, float *state,
|
||||
cudaStream_t stream);
|
||||
void StepBindBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state,
|
||||
const int *action, float *state, float *reward, bool *done, cudaStream_t stream);
|
||||
void StepCrossBlock(const int env_num, const int agent_num, const GameSetting *setting, AgentState *agent_state,
|
||||
const int *action, float *state, float *reward, bool *done, float *team_reward, int *distance,
|
||||
cudaStream_t stream);
|
||||
void AgentStateCopy(const int env_num, const int agent_num, AgentState *dst, AgentState *src, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void InitEnv(const int env_num, const int agent_num, const GameSetting *setting, AgentState *state,
|
||||
cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void ResetEnv(const int env_num, const int agent_num, const GameSetting *setting,
|
||||
AgentState *agent_state, float *state, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void StepBindBlock(const int env_num, const int agent_num, const GameSetting *setting,
|
||||
AgentState *agent_state, const int *action, float *state, float *reward, bool *done,
|
||||
cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void StepCrossBlock(const int env_num, const int agent_num, const GameSetting *setting,
|
||||
AgentState *agent_state, const int *action, float *state, float *reward, bool *done,
|
||||
float *team_reward, int *distance, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void AgentStateCopy(const int env_num, const int agent_num, AgentState *dst, AgentState *src,
|
||||
cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMP_TAG_ENV_IMPL_H_
|
||||
|
|
|
@ -19,8 +19,9 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void AngleAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *ene, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void AngleAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *ene, cudaStream_t stream);
|
||||
#endif
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void AngleEnergy(int angle_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a, const int *atom_b,
|
||||
const int *atom_c, const float *angle_k, const float *angle_theta0, float *ene, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void AngleEnergy(int angle_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
|
||||
const int *atom_b, const int *atom_c, const float *angle_k, const float *angle_theta0,
|
||||
float *ene, cudaStream_t stream);
|
||||
#endif
|
||||
|
|
|
@ -19,8 +19,9 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void AngleForce(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
|
||||
const int *atom_b, const int *atom_c, const float *angle_k, const float *angle_theta0, float *frc_f,
|
||||
cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void AngleForce(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *frc_f, cudaStream_t stream);
|
||||
#endif
|
||||
|
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void AngleForceWithAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const float *angle_k,
|
||||
const float *angle_theta0, float *frc_f, float *ene, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void AngleForceWithAtomEnergy(int angle_numbers, int atom_numbers, const int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b,
|
||||
const int *atom_c, const float *angle_k, const float *angle_theta0,
|
||||
float *frc_f, float *ene, cudaStream_t stream);
|
||||
#endif
|
||||
|
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void BondAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
|
||||
const int *atom_b, const float *bond_k, const float *bond_r0, float *atom_ene, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void BondAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *atom_ene, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_ATOM_ENERGY_GPU_IMPL_H_
|
||||
|
|
|
@ -19,9 +19,10 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void BondEnergy(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *bond_ene, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void BondEnergy(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
|
||||
const float *bond_r0, float *bond_ene, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_ENERGY_CUDA_GPU_IMPL_H_
|
||||
|
|
|
@ -19,8 +19,9 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void BondForce(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *frc_f, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void BondForce(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
|
||||
const float *bond_r0, float *frc_f, cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_BOND_FORCE_CUDA_GPU_IMPL_H_
|
||||
|
|
|
@ -19,9 +19,11 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers, const unsigned int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b, const float *bond_k,
|
||||
const float *bond_r0, float *frc_f, float *atom_energy, float *atom_v,
|
||||
cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void BondForceWithAtomEnergyAndVirial(int bond_numbers, int atom_numbers,
|
||||
const unsigned int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k,
|
||||
const float *bond_r0, float *frc_f, float *atom_energy,
|
||||
float *atom_v, cudaStream_t stream);
|
||||
#endif
|
||||
|
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void BondForceWithAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *frc_f, float *atom_e, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void BondForceWithAtomEnergy(int bond_numbers, int atom_numbers, const int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b,
|
||||
const float *bond_k, const float *bond_r0, float *frc_f, float *atom_e,
|
||||
cudaStream_t stream);
|
||||
#endif
|
||||
|
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void BondForceWithAtomVirial(int bond_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const float *bond_k, const float *bond_r0,
|
||||
float *frc_f, float *atom_v, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void BondForceWithAtomVirial(int bond_numbers, int atom_numbers, const int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b,
|
||||
const float *bond_k, const float *bond_r0, float *frc_f, float *atom_v,
|
||||
cudaStream_t stream);
|
||||
#endif
|
||||
|
|
|
@ -18,9 +18,10 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_ATOMCRDTOCV_IMPL_H_
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void AtomCrdToCV(int atom_numbers, int start_serial, int end_serial, int number, const float *crd_f,
|
||||
const float *old_crd, float *nowarp_crd, int *box_map_times, float *box, float *g_radial,
|
||||
float *g_angular, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void AtomCrdToCV(int atom_numbers, int start_serial, int end_serial, int number, const float *crd_f,
|
||||
const float *old_crd, float *nowarp_crd, int *box_map_times, float *box,
|
||||
float *g_radial, float *g_angular, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_ATOMCRDTOCV_IMPL_H_
|
||||
|
|
|
@ -19,8 +19,9 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void CrdToUintCrd(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void CrdToUintCrd(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_IMPL_H_
|
||||
|
|
|
@ -19,8 +19,9 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void CrdToUintCrdQuarter(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void CrdToUintCrdQuarter(const int atom_numbers, const float *crd_to_uint_crd_cof_f, const float *crd_f,
|
||||
unsigned int *uint_crd_f, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRD_TO_UINT_CRD_QUARTER_IMPL_H_
|
||||
|
|
|
@ -19,8 +19,9 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass,
|
||||
float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void GetCenterOfMass(int residue_numbers, int *start, int *end, float *crd_f, float *atom_mass,
|
||||
float *residue_mass_inverse, float *center_of_mass_f, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_
|
||||
|
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse, const int *center_atoms,
|
||||
const float *crd_f, float *center_of_geometry_f, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void GetCenterOfGeometry(const int center_numbers, float center_numbers_inverse,
|
||||
const int *center_atoms, const float *crd_f, float *center_of_geometry_f,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_GETCENTER_IMPL_H_
|
||||
|
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void MapCenterOfMass(int residue_numbers, int *start, int *end, float *center_of_mass_f,
|
||||
float *box_length_f, float *no_wrap_crd_f, float *crd_f, float* scaler, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void MapCenterOfMass(int residue_numbers, int *start, int *end, float *center_of_mass_f,
|
||||
float *box_length_f, float *no_wrap_crd_f, float *crd_f, float *scaler,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MAPCENTEROFMASS_IMPL_H_
|
||||
|
|
|
@ -19,7 +19,8 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f,
|
||||
const float *atom_mass, float *ek, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void MDTemperature(const int residue_numbers, const int *start, const int *end, const float *atom_vel_f,
|
||||
const float *atom_mass, float *ek, cudaStream_t stream);
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_MDTEMPERATURE_IMPL_H_
|
||||
|
|
|
@ -18,7 +18,9 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_TOTAL_C6_GET_IMPL_H_
|
||||
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void total_c6_get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void total_c6_get(int atom_numbers, int *atom_lj_type, float *d_lj_b, float *d_factor,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_COMMON_TOTAL_C6_GET_IMPL_H_
|
||||
|
|
|
@ -20,8 +20,9 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void calculatenowrapcrd(int atom_numbers, int *box_map_times_f, float *box_f, float *crd_f, float *nowrap_crd_f,
|
||||
cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void calculatenowrapcrd(int atom_numbers, int *box_map_times_f, float *box_f, float *crd_f,
|
||||
float *nowrap_crd_f, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_CAL_NO_WRAP_CRD_IMPL_H_
|
||||
|
|
|
@ -20,8 +20,9 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void refresh_boxmaptimes(int atom_numbers, float *box_length_inverse, float *crd_f, float *old_crd_f,
|
||||
int *box_map_times_f, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void refresh_boxmaptimes(int atom_numbers, float *box_length_inverse, float *crd_f, float *old_crd_f,
|
||||
int *box_map_times_f, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_SPONGE_CRDMCMAP_REFRESH_BOXMAPTIMES_IMPL_H_
|
||||
|
|
|
@ -19,9 +19,10 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void DihedralAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn,
|
||||
const float *pk, const float *gamc, const float *gams, const float *pn, float *ene,
|
||||
cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void DihedralAtomEnergy(int dihedral_numbers, int atom_numbers, const int *uint_crd_f,
|
||||
const float *scaler_f, const int *atom_a, const int *atom_b, const int *atom_c,
|
||||
const int *atom_d, const int *ipn, const float *pk, const float *gamc,
|
||||
const float *gams, const float *pn, float *ene, cudaStream_t stream);
|
||||
#endif
|
||||
|
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
#include <curand_kernel.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
|
||||
|
||||
void DihedralEnergy(int dihedral_numbers, const int *uint_crd_f, const float *scaler_f, const int *atom_a,
|
||||
const int *atom_b, const int *atom_c, const int *atom_d, const int *ipn, const float *pk,
|
||||
const float *gamc, const float *gams, const float *pn, float *ene, cudaStream_t stream);
|
||||
CUDA_LIB_EXPORT void DihedralEnergy(int dihedral_numbers, const int *uint_crd_f, const float *scaler_f,
|
||||
const int *atom_a, const int *atom_b, const int *atom_c, const int *atom_d,
|
||||
const int *ipn, const float *pk, const float *gamc, const float *gams,
|
||||
const float *pn, float *ene, cudaStream_t stream);
|
||||
#endif
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue