forked from mindspore-Ecosystem/mindspore
syn code for 0715
This commit is contained in:
commit
f4cb445ea8
|
@ -17,6 +17,10 @@ else()
|
|||
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
|
||||
endif()
|
||||
|
||||
if (ENABLE_PYTHON)
|
||||
add_compile_definitions(ENABLE_PYTHON)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC")
|
||||
|
|
16
RELEASE.md
16
RELEASE.md
|
@ -70,6 +70,22 @@ Alexey Shevlyakov, avakh, baihuawei, BowenK, buxue, caifubi, caojian05, Cathy Wo
|
|||
|
||||
Contributions of any kind are welcome!
|
||||
|
||||
# Release 0.3.1-alpha
|
||||
|
||||
## Major Features and Improvements
|
||||
|
||||
### Ascend 910 Training and Inference Framework
|
||||
* Frontend and User Interface
|
||||
* Independent model init interface.
|
||||
* Data processing, augmentation, and save format
|
||||
* Support sample padding for minddataset.
|
||||
|
||||
## Bugfixes
|
||||
* Python API
|
||||
* Fix bugs in the lars optimizer([!1894](https://gitee.com/mindspore/mindspore/pulls/1894))
|
||||
* Data processing
|
||||
* Fix accuracy problem of RandomCropDecodeResize ([!2340](https://gitee.com/mindspore/mindspore/pulls/2340))
|
||||
|
||||
# Release 0.3.0-alpha
|
||||
|
||||
## Major Features and Improvements
|
||||
|
|
23
build.sh
23
build.sh
|
@ -24,8 +24,8 @@ usage()
|
|||
{
|
||||
echo "Usage:"
|
||||
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
|
||||
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
|
||||
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]"
|
||||
echo " [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
|
||||
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -d Debug mode"
|
||||
|
@ -48,6 +48,7 @@ usage()
|
|||
echo " -P Enable dump anf graph to file in ProtoBuffer format, default on"
|
||||
echo " -Q Enable dump memory, default off"
|
||||
echo " -D Enable dumping of function graph ir, default on"
|
||||
echo " -S Enable async data dump, default off"
|
||||
echo " -z Compile dataset & mindrecord, default on"
|
||||
echo " -M Enable MPI and NCCL for GPU training, gpu default on"
|
||||
echo " -V Specify the minimum required cuda version, default CUDA 10.1"
|
||||
|
@ -56,6 +57,7 @@ usage()
|
|||
echo " -s Enable serving module, default off"
|
||||
echo " -B Enable debugger, default off"
|
||||
echo " -E Enable IBVERBS for parameter server, default off"
|
||||
echo " -l Compile with python dependency, default on"
|
||||
}
|
||||
|
||||
# check value of input is 'on' or 'off'
|
||||
|
@ -87,6 +89,7 @@ checkopts()
|
|||
ENABLE_TIMELINE="off"
|
||||
ENABLE_DUMP2PROTO="on"
|
||||
ENABLE_DUMPE2E="off"
|
||||
ENABLE_DATA_DUMP="off"
|
||||
ENABLE_DUMP_IR="on"
|
||||
COMPILE_MINDDATA="on"
|
||||
ENABLE_MPI="off"
|
||||
|
@ -98,9 +101,10 @@ checkopts()
|
|||
ENABLE_SERVING="off"
|
||||
ENABLE_DEBUGGER="off"
|
||||
ENABLE_IBVERBS="off"
|
||||
ENABLE_PYTHON="on"
|
||||
|
||||
# Process the options
|
||||
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt
|
||||
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt
|
||||
do
|
||||
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
|
||||
case "${opt}" in
|
||||
|
@ -151,6 +155,10 @@ checkopts()
|
|||
check_on_off $OPTARG p
|
||||
ENABLE_PROFILE="$OPTARG"
|
||||
;;
|
||||
l)
|
||||
check_on_off $OPTARG l
|
||||
ENABLE_PYTHON="$OPTARG"
|
||||
;;
|
||||
i)
|
||||
INC_BUILD="on"
|
||||
;;
|
||||
|
@ -212,6 +220,11 @@ checkopts()
|
|||
ENABLE_DUMPE2E="$OPTARG"
|
||||
echo "enable dump end to end"
|
||||
;;
|
||||
S)
|
||||
check_on_off $OPTARG S
|
||||
ENABLE_DATA_DUMP="$OPTARG"
|
||||
echo "enable data dump"
|
||||
;;
|
||||
D)
|
||||
check_on_off $OPTARG D
|
||||
ENABLE_DUMP_IR="$OPTARG"
|
||||
|
@ -315,7 +328,11 @@ build_mindspore()
|
|||
if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
|
||||
fi
|
||||
if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON"
|
||||
fi
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
|
||||
if [[ "X$ENABLE_MPI" = "Xon" ]]; then
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON"
|
||||
fi
|
||||
|
|
|
@ -9,11 +9,11 @@ else()
|
|||
LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N}
|
||||
URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz
|
||||
MD5 0c2662a2b0bc80b0eb56495205247c8f
|
||||
CONFIGURE_COMMAND ./icu4c/source/runConfigureICU Linux --enable-rpath --disable-tests --disable-samples --disable-icuio --disable-extras ICU_DATA_FILTER_FILE=${CMAKE_SOURCE_DIR}/third_party/icu4c/filter.json
|
||||
CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/scripts/build_icu4c.sh
|
||||
)
|
||||
include_directories(${icu4c_INC})
|
||||
add_library(mindspore::icuuc ALIAS icu4c::${LIB_ICU_COMMON})
|
||||
add_library(mindspore::icudata ALIAS icu4c::${LIB_ICU_DATA})
|
||||
add_library(mindspore::icui18n ALIAS icu4c::${LIB_ICU_I18N})
|
||||
add_definitions(-D ENABLE_ICU4C)
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -15,7 +15,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake)
|
|||
include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake)
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
|
||||
|
||||
if (ENABLE_DEBUGGER)
|
||||
if (ENABLE_DEBUGGER OR ENABLE_SERVING)
|
||||
# build dependencies of gRPC
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake)
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake)
|
||||
|
@ -30,7 +30,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake)
|
|||
if(USE_GLOG)
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake)
|
||||
endif()
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows" AND NOT ENABLE_GE)
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake)
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake)
|
||||
endif()
|
||||
|
|
|
@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF)
|
|||
option(ENABLE_AKG "enable akg" OFF)
|
||||
option(ENABLE_DEBUGGER "enable debugger" OFF)
|
||||
option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF)
|
||||
option(ENABLE_PYTHON "Enable python" ON)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
if (WIN32)
|
||||
|
@ -115,6 +116,10 @@ if(ENABLE_DUMP_E2E)
|
|||
add_compile_definitions(ENABLE_DUMP_E2E)
|
||||
endif()
|
||||
|
||||
if(ENABLE_DATA_DUMP)
|
||||
add_compile_definitions(ENABLE_DATA_DUMP)
|
||||
endif()
|
||||
|
||||
if(ENABLE_DEBUGGER)
|
||||
add_compile_definitions(ENABLE_DEBUGGER)
|
||||
endif()
|
||||
|
|
|
@ -213,7 +213,6 @@ install(
|
|||
${CMAKE_SOURCE_DIR}/mindspore/parallel
|
||||
${CMAKE_SOURCE_DIR}/mindspore/mindrecord
|
||||
${CMAKE_SOURCE_DIR}/mindspore/train
|
||||
${CMAKE_SOURCE_DIR}/mindspore/model_zoo
|
||||
${CMAKE_SOURCE_DIR}/mindspore/common
|
||||
${CMAKE_SOURCE_DIR}/mindspore/ops
|
||||
${CMAKE_SOURCE_DIR}/mindspore/communication
|
||||
|
@ -261,3 +260,17 @@ if (EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset)
|
|||
COMPONENT mindspore
|
||||
)
|
||||
endif ()
|
||||
|
||||
if (ENABLE_SERVING)
|
||||
install(
|
||||
TARGETS ms_serving
|
||||
DESTINATION ${INSTALL_BASE_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
|
||||
install(
|
||||
TARGETS inference
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif ()
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"DumpSettings": {
|
||||
"net_name": "ResNet50",
|
||||
"mode": 1,
|
||||
"iteration": 0,
|
||||
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
|
||||
},
|
||||
|
||||
"DumpSettingsSpec": {
|
||||
"net_name": "net name eg:ResNet50",
|
||||
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
|
||||
"iteration": "specified iteration ",
|
||||
"kernels": "op's full scope name which need to be dump"
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
Subproject commit 4084909d62c159da6ba316f61ad3d02a4857b34b
|
||||
Subproject commit 31aa96ef41067a0ecdc4113ef245f8ede48f3457
|
|
@ -20,7 +20,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "ir/dtype/type_id.h"
|
||||
#include "mindspore/core/ir/dtype/type_id.h"
|
||||
|
||||
namespace mindspore {
|
||||
#define MS_API __attribute__((visibility("default")))
|
||||
|
|
|
@ -334,7 +334,7 @@ class Parser:
|
|||
def __init__(self, fn: (types.FunctionType, types.MethodType), parse_method=None) -> None:
|
||||
self.fn = fn
|
||||
self.parse_method = parse_method
|
||||
_, self.line_offset = inspect.getsourcelines(self.fn)
|
||||
self.line_offset = 0
|
||||
self.filename: str = inspect.getfile(self.fn)
|
||||
|
||||
# Used to resolve the function's globals Namespace.
|
||||
|
@ -350,7 +350,8 @@ class Parser:
|
|||
logger.debug("fn = %r", self.fn)
|
||||
tree = None
|
||||
if isinstance(self.fn, (types.FunctionType, types.MethodType)):
|
||||
original_src = inspect.getsource(self.fn)
|
||||
lines, self.line_offset = inspect.getsourcelines(self.fn)
|
||||
original_src = ''.join(lines)
|
||||
hexstr = hashlib.sha256(original_src.encode()).hexdigest()
|
||||
tree = Parser.ast_cache.get(hexstr)
|
||||
if not tree:
|
||||
|
|
|
@ -108,7 +108,8 @@ def enumerate_(x, start=0):
|
|||
"""Enumerate list or tuple."""
|
||||
x_type = F.typeof(x)
|
||||
ret = ()
|
||||
if check_is_tuple_or_list(x_type, "enumerate"):
|
||||
op_name = "enumerate"
|
||||
if check_is_tuple_or_list(x_type, op_name, "first input") and check_is_const_int(start, op_name, "start"):
|
||||
ret = zip(range(start, start + len(x)), x)
|
||||
return ret
|
||||
|
||||
|
@ -123,11 +124,22 @@ def while_cond(x):
|
|||
|
||||
|
||||
@constexpr
|
||||
def check_is_tuple_or_list(x, op_name):
|
||||
def check_is_tuple_or_list(x, op_name, arg_name):
|
||||
"""check whether x is list or tuple."""
|
||||
if isinstance(x, (mstype.list_type, mstype.tuple_type)):
|
||||
return True
|
||||
raise TypeError(f"For '{op_name}', the input parameter should be tuple or list, but got {x}.")
|
||||
raise TypeError(f"For '{op_name}', the '{arg_name}' should be tuple or list, but got {x}.")
|
||||
|
||||
|
||||
@constexpr
|
||||
def check_is_const_int(x, op_name, arg_name):
|
||||
"""check whether x is const int."""
|
||||
if x is None:
|
||||
raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got not const.")
|
||||
if not isinstance(x, int):
|
||||
raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got {x}.")
|
||||
return True
|
||||
|
||||
|
||||
@constexpr
|
||||
def check_is_tensor_bool_cond(shp):
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
## common setting
|
||||
include_directories(${CMAKE_SOURCE_DIR}/mindspore/core)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine)
|
||||
|
@ -35,20 +36,20 @@ if(ENABLE_GPU)
|
|||
include_directories(${CUDNN_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS})
|
||||
|
||||
file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"device/gpu/*.cc"
|
||||
"device/gpu/*.cu"
|
||||
"kernel/gpu/*.cu"
|
||||
"kernel/akg/gpu/*.cc"
|
||||
"kernel/akg/akg_kernel_build.cc"
|
||||
"kernel/akg/akg_kernel_attrs_process.cc"
|
||||
"runtime/device/gpu/*.cc"
|
||||
"runtime/device/gpu/*.cu"
|
||||
"backend/kernel_compiler/gpu/*.cu"
|
||||
"backend/kernel_compiler/akg/gpu/*.cc"
|
||||
"backend/kernel_compiler/akg/akg_kernel_build.cc"
|
||||
"backend/kernel_compiler/akg/akg_kernel_attrs_process.cc"
|
||||
)
|
||||
|
||||
list(APPEND CUDA_NVCC_FLAGS -arch=sm_53)
|
||||
list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc")
|
||||
list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/mpi/mpi_initializer.cc"
|
||||
"device/gpu/distribution/collective_wrapper.cc"
|
||||
"device/gpu/distribution/mpi_wrapper.cc"
|
||||
"device/gpu/distribution/nccl_wrapper.cc"
|
||||
list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/blocking_queue.cc" "runtime/device/gpu/gpu_buffer_mgr.cc")
|
||||
list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/mpi/mpi_initializer.cc"
|
||||
"runtime/device/gpu/distribution/collective_wrapper.cc"
|
||||
"runtime/device/gpu/distribution/mpi_wrapper.cc"
|
||||
"runtime/device/gpu/distribution/nccl_wrapper.cc"
|
||||
)
|
||||
|
||||
set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
|
@ -56,6 +57,7 @@ if(ENABLE_GPU)
|
|||
set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
|
||||
cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
|
||||
set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
|
||||
add_compile_definitions(ENABLE_GPU)
|
||||
endif ()
|
||||
|
||||
## make flatuffer files
|
||||
|
@ -101,16 +103,20 @@ if (ENABLE_DUMP_PROTO)
|
|||
endif ()
|
||||
|
||||
if (ENABLE_D)
|
||||
include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu")
|
||||
include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu")
|
||||
include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir")
|
||||
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/aicpu/proto/*.proto")
|
||||
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto")
|
||||
ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
|
||||
|
||||
file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
|
||||
ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})
|
||||
|
||||
file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto")
|
||||
ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})
|
||||
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})
|
||||
|
||||
add_compile_definitions(ENABLE_D)
|
||||
endif ()
|
||||
|
@ -121,18 +127,36 @@ if (MINDSPORE_PROTO_LIST)
|
|||
endif()
|
||||
|
||||
## make sub objects
|
||||
set(SUB_COMP
|
||||
transform pre_activate parallel pipeline device kernel common debug gvar ir onnx operator optimizer predict
|
||||
pybind_api pynative session utils vm
|
||||
set(SUB_COMP
|
||||
transform/graph_ir
|
||||
transform/onnx
|
||||
backend/optimizer
|
||||
backend/kernel_compiler
|
||||
backend/session
|
||||
runtime/device
|
||||
frontend/optimizer
|
||||
frontend/parallel
|
||||
frontend/operator
|
||||
pipeline/jit
|
||||
pipeline/pynative
|
||||
common debug gvar predict pybind_api utils vm
|
||||
)
|
||||
|
||||
foreach (_comp ${SUB_COMP})
|
||||
add_subdirectory(${_comp})
|
||||
if (TARGET _mindspore_${_comp}_obj)
|
||||
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${_comp}_obj>)
|
||||
add_dependencies(_mindspore_${_comp}_obj proto_input flat_input)
|
||||
string(REPLACE "/" "_" sub ${_comp})
|
||||
if (TARGET _mindspore_${sub}_obj)
|
||||
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>)
|
||||
add_dependencies(_mindspore_${sub}_obj proto_input flat_input)
|
||||
endif ()
|
||||
endforeach ()
|
||||
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base)
|
||||
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_base_obj>)
|
||||
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/abstract abstract)
|
||||
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_abstract_obj>)
|
||||
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir)
|
||||
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>)
|
||||
add_dependencies(_mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input flat_input)
|
||||
|
||||
set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
|
||||
add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
|
||||
|
@ -204,8 +228,8 @@ endif()
|
|||
|
||||
# set c_expression building
|
||||
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
|
||||
set_property(SOURCE "pipeline/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
|
||||
pybind11_add_module(_c_expression "pipeline/init.cc")
|
||||
set_property(SOURCE "pipeline/jit/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
|
||||
pybind11_add_module(_c_expression "pipeline/jit/init.cc")
|
||||
|
||||
MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
|
@ -231,9 +255,11 @@ else ()
|
|||
target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
|
||||
target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
|
||||
target_link_libraries(_c_expression PRIVATE mindspore_gvar)
|
||||
target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
|
||||
if (${ENABLE_IBVERBS} STREQUAL "ON")
|
||||
target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
|
||||
if (NOT ENABLE_GE)
|
||||
target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
|
||||
if (${ENABLE_IBVERBS} STREQUAL "ON")
|
||||
target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
|
||||
endif()
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
|
@ -260,8 +286,8 @@ if (ENABLE_CPU)
|
|||
endif ()
|
||||
|
||||
if (ENABLE_MINDDATA)
|
||||
add_subdirectory(mindrecord)
|
||||
add_subdirectory(dataset)
|
||||
add_subdirectory(minddata/mindrecord)
|
||||
add_subdirectory(minddata/dataset)
|
||||
endif ()
|
||||
|
||||
# build inference
|
||||
|
@ -270,7 +296,7 @@ set(LOAD_ONNX_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc
|
||||
)
|
||||
add_library(inference SHARED
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/session/session.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/backend/session/session.cc
|
||||
${LOAD_ONNX_SRC}
|
||||
)
|
||||
target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
|
||||
|
|
|
@ -25,7 +25,15 @@ if (ENABLE_CPU)
|
|||
file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"cpu/*.cc"
|
||||
)
|
||||
|
||||
|
||||
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/push_kernel.cc"
|
||||
"cpu/ps/pull_kernel.cc"
|
||||
"cpu/ps/embedding_look_up_ps_kernel.cc"
|
||||
"cpu/ps/embedding_look_up_proxy_kernel.cc"
|
||||
"cpu/ps/apply_momentum_ps_kernel.cc"
|
||||
"cpu/ps/sparse_apply_adam_ps_kernel.cc"
|
||||
"cpu/ps/sparse_apply_ftrl_ps_kernel.cc")
|
||||
|
||||
if (NOT ENABLE_MPI)
|
||||
list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc")
|
||||
list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc")
|
||||
|
@ -55,4 +63,4 @@ endif()
|
|||
|
||||
set_property(SOURCE ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST}
|
||||
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_KERNEL)
|
||||
add_library(_mindspore_kernel_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})
|
||||
add_library(_mindspore_backend_kernel_compiler_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})
|
|
@ -0,0 +1,312 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
|
||||
#include <google/protobuf/text_format.h>
|
||||
#include <fstream>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
|
||||
#include "proto/tensor.pb.h"
|
||||
#include "proto/tensor_shape.pb.h"
|
||||
#include "proto/attr.pb.h"
|
||||
#include "proto/node_def.pb.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "common/utils.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
|
||||
#include "backend/session/kernel_graph.h"
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
using FNodeAttrHandle = std::function<void(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto)>;
|
||||
|
||||
bool SetIOIputSize(const std::shared_ptr<AnfNode> &anf_node, const size_t &input_num,
|
||||
std::vector<size_t> *input_size_list) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(input_size_list);
|
||||
for (size_t i = 0; i < input_num; i++) {
|
||||
std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i);
|
||||
if (AnfAlgo::GetInputDeviceDataType(anf_node, i) == kObjectTypeString) {
|
||||
if (!anf_node->isa<CNode>()) {
|
||||
MS_LOG(EXCEPTION) << "anf_node is not CNode.";
|
||||
}
|
||||
auto cnode = anf_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (cnode->inputs().size() < (i + 1)) {
|
||||
MS_LOG(ERROR) << "cnode inputs size " << cnode->inputs().size() << " is smaller than " << i + 1;
|
||||
return false;
|
||||
}
|
||||
auto input_node = cnode->inputs()[i + 1];
|
||||
MS_EXCEPTION_IF_NULL(input_node);
|
||||
if (input_node->isa<ValueNode>()) {
|
||||
auto value_ptr = GetValueNode(input_node);
|
||||
auto value = GetValue<std::string>(value_ptr);
|
||||
input_size_list->push_back(value.size());
|
||||
}
|
||||
} else {
|
||||
auto type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i));
|
||||
MS_EXCEPTION_IF_NULL(type_ptr);
|
||||
int64_t size_i = 1;
|
||||
for (size_t j = 0; j < shape_i.size(); j++) {
|
||||
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
|
||||
}
|
||||
size_t type_byte = GetTypeByte(type_ptr);
|
||||
if (type_byte == 0) {
|
||||
return false;
|
||||
}
|
||||
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
|
||||
input_size_list->push_back(LongToSize(size_i));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SetIOSize(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
|
||||
std::vector<size_t> input_size_list;
|
||||
std::vector<size_t> output_size_list;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
|
||||
|
||||
if (!SetIOIputSize(anf_node, input_num, &input_size_list)) {
|
||||
return false;
|
||||
}
|
||||
kernel_mod_ptr->SetInputSizeList(input_size_list);
|
||||
|
||||
for (size_t i = 0; i < output_num; i++) {
|
||||
std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i);
|
||||
TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i));
|
||||
MS_EXCEPTION_IF_NULL(type_ptr);
|
||||
int64_t size_i = 1;
|
||||
for (size_t j = 0; j < shape_i.size(); j++) {
|
||||
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
|
||||
}
|
||||
size_t type_byte = GetTypeByte(type_ptr);
|
||||
if (type_byte == 0) {
|
||||
return false;
|
||||
}
|
||||
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
|
||||
output_size_list.push_back(LongToSize(size_i));
|
||||
}
|
||||
kernel_mod_ptr->SetOutputSizeList(output_size_list);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value,
|
||||
::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) {
|
||||
MS_EXCEPTION_IF_NULL(node_attr);
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
if (type == "int") {
|
||||
auto attr_value = GetValue<int>(value);
|
||||
(*node_attr)[attr_name].set_i(attr_value);
|
||||
} else if (type == "str") {
|
||||
auto attr_value = GetValue<std::string>(value);
|
||||
(*node_attr)[attr_name].set_s(attr_value);
|
||||
} else if (type == "bool") {
|
||||
auto attr_value = GetValue<bool>(value);
|
||||
(*node_attr)[attr_name].set_b(attr_value);
|
||||
} else if (type == "float") {
|
||||
auto attr_value = GetValue<float>(value);
|
||||
(*node_attr)[attr_name].set_f(attr_value);
|
||||
} else if (type == "listInt") {
|
||||
std::vector<int> attr_value;
|
||||
auto value_type = value->type();
|
||||
MS_EXCEPTION_IF_NULL(value_type);
|
||||
auto value_type_str = value_type->ToString();
|
||||
if (value_type_str == "Int32") {
|
||||
int data = GetValue<int>(value);
|
||||
attr_value.push_back(data);
|
||||
} else {
|
||||
attr_value = GetValue<std::vector<int>>(value);
|
||||
}
|
||||
mindspore::AttrValue input_shape_attr;
|
||||
mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array();
|
||||
MS_EXCEPTION_IF_NULL(input_shape_attr_list);
|
||||
for (const auto shape : attr_value) {
|
||||
input_shape_attr_list->add_i(shape);
|
||||
}
|
||||
(*node_attr)[attr_name] = input_shape_attr;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "type: " << type << "not support";
|
||||
}
|
||||
}
|
||||
|
||||
void SetNodeAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(proto);
|
||||
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
|
||||
if (op_name == kInitDataSetQueue) {
|
||||
op_name = kInitData;
|
||||
}
|
||||
if (op_name == kPrint) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU);
|
||||
MS_EXCEPTION_IF_NULL(op_info_ptr);
|
||||
auto attrs_ptr = op_info_ptr->attrs_ptr();
|
||||
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs();
|
||||
for (const auto &attr_ptr : attrs_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(attr_ptr);
|
||||
std::string attr_name = attr_ptr->name();
|
||||
auto value = primitive->GetAttr(attr_name);
|
||||
if (value != nullptr) {
|
||||
if (attr_name == kQueueName || attr_name == kSharedName) {
|
||||
attr_name = kChannelName;
|
||||
} else if (attr_name == kSeed0) {
|
||||
attr_name = kSeed;
|
||||
} else if (attr_name == kSeed1) {
|
||||
attr_name = kSeed2;
|
||||
}
|
||||
std::string type = attr_ptr->type();
|
||||
ParseAttrValue(type, attr_name, value, node_attr);
|
||||
}
|
||||
}
|
||||
MS_LOG(INFO) << "Set node attr end!";
|
||||
}
|
||||
|
||||
void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
|
||||
MS_EXCEPTION_IF_NULL(proto);
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
|
||||
if (input_num == 0) {
|
||||
MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have input.";
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t input_index = 0; input_index < input_num; input_index++) {
|
||||
::mindspore::Tensor *node_inputs = proto->add_inputs();
|
||||
MS_EXCEPTION_IF_NULL(node_inputs);
|
||||
TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
|
||||
std::vector<size_t> input_shape;
|
||||
int32_t input_data_type;
|
||||
if (input_type == kObjectTypeString) {
|
||||
auto cnode = anf_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto input_node = cnode->inputs()[input_index + 1];
|
||||
auto value_ptr = GetValueNode(input_node);
|
||||
auto value = GetValue<std::string>(value_ptr);
|
||||
input_shape.push_back(1);
|
||||
input_shape.push_back(value.size());
|
||||
input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);
|
||||
} else {
|
||||
input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
|
||||
input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);
|
||||
}
|
||||
|
||||
mindspore::TensorShape *tensorShape = node_inputs->mutable_tensor_shape();
|
||||
for (auto item : input_shape) {
|
||||
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
|
||||
dim->set_size((::google::protobuf::int64)item);
|
||||
}
|
||||
node_inputs->set_tensor_type((mindspore::DataType)input_data_type);
|
||||
node_inputs->set_mem_device("HBM");
|
||||
}
|
||||
}
|
||||
|
||||
void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
|
||||
MS_EXCEPTION_IF_NULL(proto);
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
|
||||
if (output_num == 0) {
|
||||
MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. ";
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t output_index = 0; output_index < output_num; output_index++) {
|
||||
::mindspore::Tensor *node_outputs = proto->add_outputs();
|
||||
MS_EXCEPTION_IF_NULL(node_outputs);
|
||||
std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
|
||||
mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape();
|
||||
MS_EXCEPTION_IF_NULL(tensorShape);
|
||||
for (auto item : output_shape) {
|
||||
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
|
||||
MS_EXCEPTION_IF_NULL(dim);
|
||||
dim->set_size((::google::protobuf::int64)item);
|
||||
}
|
||||
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
|
||||
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
|
||||
node_outputs->set_tensor_type((mindspore::DataType)output_data_type);
|
||||
node_outputs->set_mem_device("HBM");
|
||||
}
|
||||
}
|
||||
|
||||
void SetNodedefProto(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(proto);
|
||||
MS_LOG(INFO) << "SetNodedefProto entry";
|
||||
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
|
||||
if (op_name == kInitDataSetQueue) {
|
||||
op_name = kInitData;
|
||||
}
|
||||
// set op name
|
||||
proto->set_op(op_name);
|
||||
// set inputs tensor
|
||||
SetNodeInputs(anf_node, proto);
|
||||
// set outputs tensor
|
||||
SetNodeOutputs(anf_node, proto);
|
||||
// set node attr
|
||||
SetNodeAttr(anf_node, proto);
|
||||
MS_LOG(INFO) << "SetNodedefProto end!";
|
||||
}
|
||||
|
||||
bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
|
||||
const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
MS_LOG(INFO) << "CreateNodeDefBytes entry";
|
||||
|
||||
mindspore::NodeDef proto;
|
||||
SetNodedefProto(anf_node, &proto);
|
||||
std::string nodeDefStr;
|
||||
if (!proto.SerializeToString(&nodeDefStr)) {
|
||||
MS_LOG(ERROR) << "Serialize nodeDef to string failed.";
|
||||
return false;
|
||||
}
|
||||
kernel_mod_ptr->SetNodeDef(nodeDefStr);
|
||||
MS_LOG(INFO) << "CreateNodeDefBytes end!";
|
||||
return true;
|
||||
}
|
||||
|
||||
KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
|
||||
if (op_name == kInitDataSetQueue) {
|
||||
op_name = kInitData;
|
||||
}
|
||||
auto kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>();
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
|
||||
kernel_mod_ptr->SetAnfNode(anf_node);
|
||||
kernel_mod_ptr->SetNodeName(op_name);
|
||||
if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {
|
||||
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
|
||||
}
|
||||
if (!SetIOSize(anf_node, kernel_mod_ptr)) {
|
||||
MS_LOG(EXCEPTION) << "Set input output size list failed.";
|
||||
}
|
||||
return kernel_mod_ptr;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -16,7 +16,7 @@
|
|||
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
|
||||
#include <memory>
|
||||
#include "kernel/kernel.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,13 +14,13 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/aicpu/aicpu_kernel_metadata.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "kernel/oplib/oplib.h"
|
||||
#include "kernel/common_utils.h"
|
||||
#include "kernel/aicpu/aicpu_util.h"
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/oplib/oplib.h"
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -20,7 +20,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/kernel_build_info.h"
|
||||
#include "backend/kernel_compiler/kernel_build_info.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/aicpu/aicpu_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
@ -23,9 +23,10 @@
|
|||
|
||||
#include "runtime/mem.h"
|
||||
#include "runtime/rt.h"
|
||||
#include "kernel/aicpu/aicpu_kernel_build.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
|
||||
#include "utils/convert_utils.h"
|
||||
#include "kernel/aicpu/aicpu_util.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
|
||||
using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
|
||||
|
||||
|
@ -144,8 +145,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
|
|||
if (node_name_ == kTopK) {
|
||||
node_name_ = kTopKV2;
|
||||
}
|
||||
|
||||
AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
|
||||
stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs);
|
||||
kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
|
||||
|
||||
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
|
||||
return {task_info_ptr};
|
|
@ -18,8 +18,8 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "kernel/ascend_kernel_mod.h"
|
||||
#include "kernel/aicpu/aicpu_util.h"
|
||||
#include "backend/kernel_compiler/ascend_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class AicpuOpKernelMod : public AscendKernelMod {
|
|
@ -13,14 +13,14 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/aicpu/aicpu_util.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "proto/types.pb.h"
|
||||
#include "runtime/mem.h"
|
||||
#include "runtime/rt.h"
|
||||
#include "utils/convert_utils.h"
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -0,0 +1,64 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr auto kInitDataSetQueue = "InitDataSetQueue";
|
||||
constexpr auto kInitData = "InitData";
|
||||
constexpr auto kGetNext = "GetNext";
|
||||
constexpr auto kPrint = "Print";
|
||||
constexpr auto kPack = "Pack";
|
||||
constexpr auto kOutputTypes = "output_types";
|
||||
constexpr auto kOutputShapes = "output_shapes";
|
||||
constexpr auto kChannelName = "channel_name";
|
||||
constexpr auto kSharedName = "shared_name";
|
||||
constexpr auto kShapes = "shapes";
|
||||
constexpr auto kTypes = "types";
|
||||
constexpr auto kQueueName = "queue_name";
|
||||
constexpr auto kSeed = "seed";
|
||||
constexpr auto kSeed0 = "Seed0";
|
||||
constexpr auto kSeed1 = "Seed1";
|
||||
constexpr auto kSeed2 = "seed2";
|
||||
constexpr auto kTopK = "TopK";
|
||||
constexpr auto kTopKV2 = "TopKV2";
|
||||
|
||||
struct AicpuParamHead {
|
||||
uint32_t length; // Total length: include cunstom message
|
||||
uint32_t ioAddrNum; // Input and output address number
|
||||
uint32_t extInfoLength; // extInfo struct Length
|
||||
uint64_t extInfoAddr; // extInfo address
|
||||
} __attribute__((packed));
|
||||
|
||||
class AicpuOpUtil {
|
||||
public:
|
||||
static int MsTypeToProtoType(TypeId ms_type);
|
||||
|
||||
private:
|
||||
// kernel id
|
||||
static uint64_t KernelId_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
|
|
@ -13,11 +13,11 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/akg/akg_kernel_attrs_process.h"
|
||||
#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
#include "pre_activate/common/helper.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/optimizer/common/helper.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -22,7 +22,7 @@
|
|||
#include <unordered_map>
|
||||
#include "ir/anf.h"
|
||||
#include "utils/utils.h"
|
||||
#include "operator/ops.h"
|
||||
#include "frontend/operator/ops.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/akg/akg_kernel_build.h"
|
||||
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
|
||||
#include <Python.h>
|
||||
#include <sys/types.h>
|
||||
#include <signal.h>
|
||||
|
@ -35,8 +35,8 @@
|
|||
#include "utils/convert_utils.h"
|
||||
#include "utils/any.h"
|
||||
#include "utils/utils.h"
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
#include "kernel/akg/akg_kernel_attrs_process.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -22,11 +22,11 @@
|
|||
#include <memory>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include "kernel/kernel.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "ir/dtype.h"
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "kernel/common_utils.h"
|
||||
#include "kernel/oplib/oplib.h"
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
#include "backend/kernel_compiler/oplib/oplib.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,11 +14,11 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/akg/akg_kernel_metadata.h"
|
||||
#include "backend/kernel_compiler/akg/akg_kernel_metadata.h"
|
||||
#include <memory>
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
#include "kernel/oplib/oplib.h"
|
||||
#include "kernel/common_utils.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/oplib/oplib.h"
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -21,7 +21,7 @@
|
|||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include "kernel/kernel_build_info.h"
|
||||
#include "backend/kernel_compiler/kernel_build_info.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/akg/ascend/akg_ascend_kernel_build.h"
|
||||
#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
|
@ -26,12 +26,12 @@
|
|||
#include <Python.h>
|
||||
#include "ir/dtype.h"
|
||||
#include "ir/func_graph.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "kernel/common_utils.h"
|
||||
#include "kernel/tbe/tbe_utils.h"
|
||||
#include "kernel/akg/ascend/akg_ascend_kernel_mod.h"
|
||||
#include "kernel/akg/akg_kernel_attrs_process.h"
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_utils.h"
|
||||
#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -22,8 +22,8 @@
|
|||
#include <vector>
|
||||
#include <map>
|
||||
#include "ir/anf.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "kernel/akg/akg_kernel_build.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/akg/ascend/akg_ascend_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
|
@ -26,6 +26,7 @@
|
|||
#include "runtime/rt.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/convert_utils.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -123,8 +124,8 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in
|
|||
MS_LOG(DEBUG) << "The block_dim is:" << block_dim;
|
||||
|
||||
TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
|
||||
stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs,
|
||||
output_data_addrs, workspace_addrs);
|
||||
kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data,
|
||||
input_data_addrs, output_data_addrs, workspace_addrs, NeedDump());
|
||||
return {task_info_ptr};
|
||||
}
|
||||
} // namespace kernel
|
|
@ -19,8 +19,8 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/ascend_kernel_mod.h"
|
||||
#include "kernel/tbe/tbe_utils.h"
|
||||
#include "backend/kernel_compiler/ascend_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,12 +14,12 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/akg/gpu/akg_gpu_kernel_build.h"
|
||||
#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/kernel.h"
|
||||
#include "kernel/akg/akg_kernel_build.h"
|
||||
#include "kernel/akg/gpu/akg_gpu_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
|
||||
#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
|
||||
#include "common/utils.h"
|
||||
|
||||
namespace mindspore {
|
|
@ -16,8 +16,8 @@
|
|||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
|
||||
#include "kernel/kernel.h"
|
||||
#include "ir/base.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "base/base.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/akg/gpu/akg_gpu_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include "nlohmann/json.hpp"
|
|
@ -21,7 +21,7 @@
|
|||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include "kernel/kernel.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -20,7 +20,10 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
#include "framework/ge_runtime/task_info.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
#include "debug/data_dump_parser.h"
|
||||
#endif
|
||||
|
||||
using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
|
||||
namespace mindspore {
|
||||
|
@ -31,6 +34,13 @@ class AscendKernelMod : public KernelMod {
|
|||
const std::vector<AddressPtr> &, uint32_t) = 0;
|
||||
uint32_t block_dim() { return block_dim_; }
|
||||
uint32_t stream_id() { return stream_id_; }
|
||||
virtual bool NeedDump() {
|
||||
#ifdef ENABLE_DATA_DUMP
|
||||
return DataDumpParser::GetInstance().NeedDump(kernel_name_);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
protected:
|
||||
uint32_t block_dim_{1};
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/common_utils.h"
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
|
@ -22,16 +22,18 @@
|
|||
#include <fstream>
|
||||
#include <thread>
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "common/utils.h"
|
||||
#include "ir/manager.h"
|
||||
#include "ir/meta_tensor.h"
|
||||
#include "ir/func_graph.h"
|
||||
#include "operator/ops.h"
|
||||
#include "frontend/operator/ops.h"
|
||||
#include "utils/graph_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr char kAxis[] = "axis";
|
||||
constexpr char kTypeInt32[] = "Int32";
|
||||
const std::unordered_map<std::string, TypeId> type_id_maps = {
|
||||
{"float", TypeId::kNumberTypeFloat32}, {"float16", TypeId::kNumberTypeFloat16},
|
||||
{"float32", TypeId::kNumberTypeFloat32}, {"float64", TypeId::kNumberTypeFloat64},
|
||||
|
@ -579,8 +581,40 @@ void WorkerForReduceSparseGradient(WorkerParamsForReduceSparseGradient param) {
|
|||
}
|
||||
}
|
||||
|
||||
void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad,
|
||||
size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices,
|
||||
std::vector<size_t> *slice_positions) {
|
||||
MS_LOG(DEBUG) << "Start";
|
||||
size_t thread_num = 24;
|
||||
if (slice_positions->size() < thread_num) {
|
||||
thread_num = slice_positions->size();
|
||||
}
|
||||
size_t stride = (slice_positions->size() + thread_num - 1) / thread_num;
|
||||
thread_num = (slice_positions->size() + stride - 1) / stride;
|
||||
std::vector<std::thread> threads;
|
||||
size_t max_length = sorted_indices->size() * outer_dim;
|
||||
for (size_t i = 0; i < thread_num; ++i) {
|
||||
size_t slice_start = i * stride;
|
||||
size_t slice_end = 0;
|
||||
if (i == thread_num - 1) {
|
||||
slice_end = slice_positions->size();
|
||||
} else {
|
||||
slice_end = slice_start + stride;
|
||||
}
|
||||
WorkerParamsForReduceSparseGradient params{
|
||||
slice_start, slice_end, max_length, outer_dim, sorted_indices, slice_positions, origin_sparse_grad.value_,
|
||||
unique_grad};
|
||||
threads.emplace_back(std::thread(WorkerForReduceSparseGradient, params));
|
||||
}
|
||||
for (size_t i = 0; i < thread_num; ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
MS_LOG(DEBUG) << "End";
|
||||
}
|
||||
|
||||
void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
|
||||
size_t outer_dim) {
|
||||
size_t outer_dim, bool use_multi_threads) {
|
||||
MS_LOG(DEBUG) << "Start";
|
||||
MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
|
||||
MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
|
||||
MS_EXCEPTION_IF_NULL(unique_grad);
|
||||
|
@ -599,37 +633,102 @@ void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradie
|
|||
[](const std::pair<int, size_t> &left, const std::pair<int, size_t> &right) { return left.first < right.first; });
|
||||
int last_index = 0;
|
||||
std::vector<size_t> slice_positions;
|
||||
slice_positions.reserve(sorted_indices.size());
|
||||
for (size_t i = 0; i < sorted_indices.size(); ++i) {
|
||||
if (i == 0 || last_index != sorted_indices[i].first) {
|
||||
slice_positions.emplace_back(i);
|
||||
}
|
||||
last_index = sorted_indices[i].first;
|
||||
}
|
||||
size_t thread_num = 8;
|
||||
if (slice_positions.size() < thread_num) {
|
||||
thread_num = slice_positions.size();
|
||||
if (use_multi_threads) {
|
||||
RunMultiThreadReduceSparseGradient(origin_sparse_grad, unique_grad, outer_dim, &sorted_indices, &slice_positions);
|
||||
} else {
|
||||
size_t max_length = sorted_indices.size() * outer_dim;
|
||||
WorkerParamsForReduceSparseGradient params{0,
|
||||
slice_positions.size(),
|
||||
max_length,
|
||||
outer_dim,
|
||||
&sorted_indices,
|
||||
&slice_positions,
|
||||
origin_sparse_grad.value_,
|
||||
unique_grad};
|
||||
WorkerForReduceSparseGradient(params);
|
||||
}
|
||||
size_t stride = (slice_positions.size() + thread_num - 1) / thread_num;
|
||||
thread_num = (slice_positions.size() + stride - 1) / stride;
|
||||
std::vector<std::thread> threads;
|
||||
size_t max_length = sorted_indices.size() * outer_dim;
|
||||
for (size_t i = 0; i < thread_num; ++i) {
|
||||
size_t slice_start = i * stride;
|
||||
size_t slice_end = 0;
|
||||
if (i == thread_num - 1) {
|
||||
slice_end = slice_positions.size();
|
||||
} else {
|
||||
slice_end = slice_start + stride;
|
||||
unique_grad->indices_size_ = slice_positions.size();
|
||||
MS_LOG(DEBUG) << "End";
|
||||
}
|
||||
|
||||
void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads,
|
||||
SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim,
|
||||
size_t outer_dim) {
|
||||
MS_LOG(DEBUG) << "Start";
|
||||
if (unique_slice_grads.empty()) {
|
||||
return;
|
||||
}
|
||||
size_t index_data_size = outer_dim * sizeof(float);
|
||||
size_t unique_indices_size = 0;
|
||||
for (size_t i = 0; i < unique_slice_grads.size(); ++i) {
|
||||
auto &slice_grad = unique_slice_grads[i];
|
||||
auto ret_code = memcpy_s(tmp_grad->value_ + unique_indices_size * outer_dim,
|
||||
(tmp_grad->indices_size_ - unique_indices_size) * index_data_size, slice_grad->value_,
|
||||
slice_grad->indices_size_ * index_data_size);
|
||||
if (ret_code != EOK) {
|
||||
MS_LOG(EXCEPTION) << "Failed to copy data!";
|
||||
}
|
||||
WorkerParamsForReduceSparseGradient params{
|
||||
slice_start, slice_end, max_length, outer_dim, &sorted_indices, &slice_positions, origin_sparse_grad.value_,
|
||||
unique_grad};
|
||||
threads.emplace_back(std::thread(WorkerForReduceSparseGradient, params));
|
||||
ret_code =
|
||||
memcpy_s(tmp_grad->indices_ + unique_indices_size, (tmp_grad->indices_size_ - unique_indices_size) * sizeof(int),
|
||||
slice_grad->indices_, slice_grad->indices_size_ * sizeof(int));
|
||||
if (ret_code != EOK) {
|
||||
MS_LOG(EXCEPTION) << "Failed to copy data!";
|
||||
}
|
||||
unique_indices_size += slice_grad->indices_size_;
|
||||
}
|
||||
tmp_grad->indices_size_ = unique_indices_size;
|
||||
ReduceSparseGradient(*tmp_grad, unique_grad, first_dim, outer_dim);
|
||||
MS_LOG(DEBUG) << "End";
|
||||
}
|
||||
|
||||
void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad,
|
||||
SparseGradient *unique_grad, size_t first_dim, size_t outer_dim) {
|
||||
MS_LOG(DEBUG) << "Start";
|
||||
MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
|
||||
MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
|
||||
MS_EXCEPTION_IF_NULL(unique_grad);
|
||||
MS_EXCEPTION_IF_NULL(unique_grad->value_);
|
||||
MS_EXCEPTION_IF_NULL(unique_grad->indices_);
|
||||
MS_EXCEPTION_IF_NULL(tmp_grad);
|
||||
MS_EXCEPTION_IF_NULL(tmp_grad->value_);
|
||||
MS_EXCEPTION_IF_NULL(tmp_grad->indices_);
|
||||
size_t thread_num = 24;
|
||||
if (origin_sparse_grad.indices_size_ < thread_num) {
|
||||
thread_num = origin_sparse_grad.indices_size_;
|
||||
}
|
||||
size_t thread_indices_size = origin_sparse_grad.indices_size_ / thread_num;
|
||||
size_t left_indices_size = origin_sparse_grad.indices_size_ % thread_num;
|
||||
std::vector<std::thread> threads;
|
||||
threads.reserve(thread_num);
|
||||
std::vector<std::shared_ptr<SparseGradient>> unique_slice_grads;
|
||||
for (size_t i = 0; i < thread_num; ++i) {
|
||||
size_t indices_size = thread_indices_size;
|
||||
if (i == thread_num - 1) {
|
||||
indices_size = thread_indices_size + left_indices_size;
|
||||
}
|
||||
size_t value_offset = i * thread_indices_size * outer_dim;
|
||||
size_t indices_offset = i * thread_indices_size;
|
||||
auto slice_grad = SparseGradient(
|
||||
{origin_sparse_grad.value_ + value_offset, origin_sparse_grad.indices_ + indices_offset, indices_size});
|
||||
unique_slice_grads.emplace_back(std::make_shared<SparseGradient>());
|
||||
unique_slice_grads[i]->value_ = unique_grad->value_ + value_offset;
|
||||
unique_slice_grads[i]->indices_ = unique_grad->indices_ + indices_offset;
|
||||
unique_slice_grads[i]->indices_size_ = indices_size;
|
||||
threads.emplace_back(
|
||||
std::thread(ReduceSparseGradient, slice_grad, unique_slice_grads[i].get(), first_dim, outer_dim, false));
|
||||
}
|
||||
for (size_t i = 0; i < thread_num; ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
unique_grad->indices_size_ = slice_positions.size();
|
||||
ReduceMultiSparseGradient(unique_slice_grads, tmp_grad, unique_grad, first_dim, outer_dim);
|
||||
MS_LOG(DEBUG) << "End";
|
||||
}
|
||||
|
||||
std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index) {
|
||||
|
@ -892,5 +991,39 @@ void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputePa
|
|||
threads[i].join();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode) {
|
||||
if (AnfAlgo::GetInputTensorNum(cnode) != AnfAlgo::GetOutputTensorNum(cnode) &&
|
||||
AnfAlgo::GetInputTensorNum(cnode) != 1) {
|
||||
MS_LOG(EXCEPTION) << "the kind of reduce node [" << cnode->DebugString()
|
||||
<< "] is not single input or single output ";
|
||||
}
|
||||
std::vector<int> axis;
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0);
|
||||
auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
auto axis_attr = primitive->GetAttr(kAxis);
|
||||
if (axis_attr == nullptr) {
|
||||
MS_LOG(ERROR) << "This node does't have axie attr.";
|
||||
return std::vector<int>();
|
||||
}
|
||||
auto type = axis_attr->type();
|
||||
MS_EXCEPTION_IF_NULL(type);
|
||||
std::vector<int> axis_list;
|
||||
if (type->ToString() == kTypeInt32) {
|
||||
axis_list.emplace_back(GetValue<int>(axis_attr));
|
||||
} else {
|
||||
axis_list = GetValue<std::vector<int>>(axis_attr);
|
||||
}
|
||||
for (const auto &elem : axis_list) {
|
||||
if (elem < 0) {
|
||||
axis.emplace_back(input_shape.size() + elem);
|
||||
} else {
|
||||
axis.emplace_back(elem);
|
||||
}
|
||||
}
|
||||
AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(axis), cnode);
|
||||
return axis;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -26,9 +26,9 @@
|
|||
#include <vector>
|
||||
#include <utility>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "kernel/kernel.h"
|
||||
#include "kernel/oplib/opinfo.h"
|
||||
#include "kernel/kernel_build_info.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "backend/kernel_compiler/oplib/opinfo.h"
|
||||
#include "backend/kernel_compiler/kernel_build_info.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -115,7 +115,7 @@ int Sign(float x);
|
|||
void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
|
||||
size_t outer_dim);
|
||||
void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
|
||||
size_t outer_dim);
|
||||
size_t outer_dim, bool use_multi_threads = true);
|
||||
std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index);
|
||||
std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list,
|
||||
const std::vector<AnfNodePtr> &input_list);
|
||||
|
@ -130,6 +130,15 @@ void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<An
|
|||
bool IsWeightBoundary(const AnfNodePtr &node);
|
||||
void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params,
|
||||
size_t total_compute_size);
|
||||
void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad,
|
||||
size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices,
|
||||
std::vector<size_t> *slice_positions);
|
||||
void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads,
|
||||
SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim,
|
||||
size_t outer_dim);
|
||||
void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad,
|
||||
SparseGradient *unique_grad, size_t first_dim, size_t outer_dim);
|
||||
std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
@ -14,9 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/cpu/addn_cpu_kernel.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "ir/primitive.h"
|
||||
#include "backend/kernel_compiler/cpu/addn_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -18,8 +18,8 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,10 +13,9 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/allgather_cpu_kernel.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "device/cpu/mpi/mpi_adapter.h"
|
||||
#include "ir/primitive.h"
|
||||
#include "backend/kernel_compiler/cpu/allgather_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "runtime/device/cpu/mpi/mpi_adapter.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
|
@ -17,8 +17,8 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,9 +13,9 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/apply_momentum_cpu_kernel.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "common/utils.h"
|
||||
|
||||
namespace mindspore {
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,8 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/argmax_cpu_kernel.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/argmax_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -17,8 +17,8 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/cpu/bias_add_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/bias_add_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -18,8 +18,8 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/cpu/bias_add_grad_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,9 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/cpu/concat_cpu_kernel.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "ir/primitive.h"
|
||||
#include "backend/kernel_compiler/cpu/concat_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -17,8 +17,8 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,7 +13,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -21,9 +21,9 @@
|
|||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <functional>
|
||||
#include "kernel/kernel.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "ir/anf.h"
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
|
||||
using mindspore::kernel::Address;
|
||||
using mindspore::kernel::AddressPtr;
|
||||
|
@ -55,7 +55,7 @@ class CPUKernel : public kernel::KernelMod {
|
|||
public:
|
||||
CPUKernel() = default;
|
||||
~CPUKernel() override = default;
|
||||
void Init(const CNodePtr &kernel_node);
|
||||
virtual void Init(const CNodePtr &kernel_node);
|
||||
virtual void InitKernel(const CNodePtr &kernel_node) = 0;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void * /*stream_ptr*/) override {
|
|
@ -14,13 +14,13 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "device/kernel_info.h"
|
||||
#include "runtime/device/kernel_info.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -38,7 +38,7 @@ void CPUKernelFactory::Register(const std::string &kernel_name, const KernelAttr
|
|||
}
|
||||
|
||||
std::shared_ptr<CPUKernel> CPUKernelFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) {
|
||||
auto kernel_info = apply_kernel->kernel_info();
|
||||
auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info());
|
||||
MS_EXCEPTION_IF_NULL(kernel_info);
|
||||
const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info();
|
||||
MS_EXCEPTION_IF_NULL(kernel_build_Info);
|
|
@ -24,8 +24,8 @@
|
|||
#include <vector>
|
||||
|
||||
#include "common/utils.h"
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "device/cpu/kernel_select_cpu.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "runtime/device/cpu/kernel_select_cpu.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -62,10 +62,12 @@ class CPUKernelRegistrar {
|
|||
static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_reg(#OPNAME, ATTR, \
|
||||
[]() { return std::make_shared<OPCLASS>(); });
|
||||
|
||||
#define MS_REG_CPU_KERNEL_T(OPNAME, ATTR, OPCLASS, T) \
|
||||
#define MS_REG_CPU_KERNEL_T(OPNAME, ATTR, OPCLASS, T) MS_REG_CPU_KERNEL_T_(__COUNTER__, OPNAME, ATTR, OPCLASS, T)
|
||||
#define MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T)
|
||||
#define _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) \
|
||||
static_assert(std::is_base_of<CPUKernel, OPCLASS<T>>::value, " must be base of CPUKernel"); \
|
||||
static const CPUKernelRegistrar g_cpu_kernel_##OPNAME##_##T##_reg(#OPNAME, ATTR, \
|
||||
[]() { return std::make_shared<OPCLASS<T>>(); });
|
||||
static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_##OPNAME##_##T##_reg( \
|
||||
#OPNAME, ATTR, []() { return std::make_shared<OPCLASS<T>>(); });
|
||||
|
||||
#define MS_REG_CPU_KERNEL_T_S(OPNAME, ATTR, OPCLASS, T, S) \
|
||||
static_assert(std::is_base_of<CPUKernel, OPCLASS<T, S>>::value, " must be base of CPUKernel"); \
|
|
@ -13,8 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/debug_cpu_kernel.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/debug_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "common/utils.h"
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
#include "debug/debugger/debugger.h"
|
|
@ -18,8 +18,8 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -14,10 +14,9 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include <thread>
|
||||
#include "kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "device/cpu/mpi/mpi_adapter.h"
|
||||
#include "ir/primitive.h"
|
||||
#include "backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "runtime/device/cpu/mpi/mpi_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -17,8 +17,8 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -15,9 +15,9 @@
|
|||
*/
|
||||
#include <thread>
|
||||
#include <string>
|
||||
#include "kernel/cpu/embedding_look_up_cpu_kernel.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "device/cpu/mpi/mpi_adapter.h"
|
||||
#include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "runtime/device/cpu/mpi/mpi_adapter.h"
|
||||
#include "ir/primitive.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -36,7 +36,9 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
}
|
||||
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
axis_ = 4 - input_shape_.size();
|
||||
reduce_scatter_flag_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "reduce_scatter_flag");
|
||||
if (AnfAlgo::HasNodeAttr(kAttrReduceScatterFlag, kernel_node)) {
|
||||
reduce_scatter_flag_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, kAttrReduceScatterFlag);
|
||||
}
|
||||
#ifdef ENABLE_MPI
|
||||
if (reduce_scatter_flag_) {
|
||||
size_t gatherv2_out_lens = 1;
|
||||
|
@ -65,7 +67,9 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
MS_LOG(EXCEPTION) << "Not Enable MPI, please build version with -M on when set reduce_scatter_flag true";
|
||||
}
|
||||
#endif
|
||||
offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "offset");
|
||||
if (AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) {
|
||||
offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, kAttrOffset);
|
||||
}
|
||||
CPUKernelUtils::ExpandDimsTo4(&input_shape_);
|
||||
CPUKernelUtils::ExpandDimsTo4(&output_shape_);
|
||||
}
|
|
@ -17,8 +17,8 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,8 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/equal_count_cpu_kernel.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/equal_count_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -17,8 +17,8 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,9 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/gather_cpu_kernel.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "ir/primitive.h"
|
||||
#include "backend/kernel_compiler/cpu/gather_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -17,8 +17,8 @@
|
|||
#define MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,11 +13,11 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/conv2d_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h"
|
||||
#include <string>
|
||||
#include "common/utils.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,11 +13,11 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
|
||||
#include <string>
|
||||
#include "common/utils.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,10 +13,10 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
|
||||
#include <string>
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "common/utils.h"
|
||||
|
||||
namespace mindspore {
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,11 +13,11 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/lstm_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h"
|
||||
#include <string>
|
||||
#include "common/utils.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -24,7 +24,7 @@
|
|||
#endif
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class LstmCPUKernel : public MKLCPUKernel {
|
|
@ -13,14 +13,14 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h"
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include "common/utils.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,12 +13,12 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/matmul_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h"
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "common/utils.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,12 +13,12 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "common/utils.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -21,8 +21,8 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
#include "dnnl.hpp"
|
||||
#include "kernel/cpu/cpu_kernel.h"
|
||||
#include "kernel/cpu/cpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,7 +13,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "dnnl.hpp"
|
||||
|
|
@ -13,9 +13,9 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/mul_cpu_kernel.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "common/utils.h"
|
||||
|
||||
namespace mindspore {
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,12 +13,12 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/pooling_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h"
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "common/utils.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,13 +13,13 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h"
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include "common/utils.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -19,7 +19,7 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,9 +13,9 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/relu_cpu_kernel.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "common/utils.h"
|
||||
|
||||
namespace mindspore {
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,9 +13,9 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/relu_grad_cpu_kernel.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "common/utils.h"
|
||||
|
||||
namespace mindspore {
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
|
@ -13,9 +13,9 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/softmax_cpu_kernel.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "common/utils.h"
|
||||
|
||||
namespace mindspore {
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue