syn code for 0715

This commit is contained in:
changzherui 2020-07-15 23:16:31 +08:00
commit f4cb445ea8
2333 changed files with 43977 additions and 20337 deletions

View File

@ -17,6 +17,10 @@ else()
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
endif()
if (ENABLE_PYTHON)
add_compile_definitions(ENABLE_PYTHON)
endif()
set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC")

View File

@ -70,6 +70,22 @@ Alexey Shevlyakov, avakh, baihuawei, BowenK, buxue, caifubi, caojian05, Cathy Wo
Contributions of any kind are welcome!
# Release 0.3.1-alpha
## Major Features and Improvements
### Ascend 910 Training and Inference Framework
* Frontend and User Interface
* Independent model init interface.
* Data processing, augmentation, and save format
* Support sample padding for minddataset.
## Bugfixes
* Python API
* Fix bugs in the lars optimizer([!1894](https://gitee.com/mindspore/mindspore/pulls/1894))
* Data processing
* Fix accuracy problem of RandomCropDecodeResize ([!2340](https://gitee.com/mindspore/mindspore/pulls/2340))
# Release 0.3.0-alpha
## Major Features and Improvements

View File

@ -24,8 +24,8 @@ usage()
{
echo "Usage:"
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]"
echo " [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
echo ""
echo "Options:"
echo " -d Debug mode"
@ -48,6 +48,7 @@ usage()
echo " -P Enable dump anf graph to file in ProtoBuffer format, default on"
echo " -Q Enable dump memory, default off"
echo " -D Enable dumping of function graph ir, default on"
echo " -S Enable async data dump, default off"
echo " -z Compile dataset & mindrecord, default on"
echo " -M Enable MPI and NCCL for GPU training, gpu default on"
echo " -V Specify the minimum required cuda version, default CUDA 10.1"
@ -56,6 +57,7 @@ usage()
echo " -s Enable serving module, default off"
echo " -B Enable debugger, default off"
echo " -E Enable IBVERBS for parameter server, default off"
echo " -l Compile with python dependency, default on"
}
# check value of input is 'on' or 'off'
@ -87,6 +89,7 @@ checkopts()
ENABLE_TIMELINE="off"
ENABLE_DUMP2PROTO="on"
ENABLE_DUMPE2E="off"
ENABLE_DATA_DUMP="off"
ENABLE_DUMP_IR="on"
COMPILE_MINDDATA="on"
ENABLE_MPI="off"
@ -98,9 +101,10 @@ checkopts()
ENABLE_SERVING="off"
ENABLE_DEBUGGER="off"
ENABLE_IBVERBS="off"
ENABLE_PYTHON="on"
# Process the options
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt
do
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
case "${opt}" in
@ -151,6 +155,10 @@ checkopts()
check_on_off $OPTARG p
ENABLE_PROFILE="$OPTARG"
;;
l)
check_on_off $OPTARG l
ENABLE_PYTHON="$OPTARG"
;;
i)
INC_BUILD="on"
;;
@ -212,6 +220,11 @@ checkopts()
ENABLE_DUMPE2E="$OPTARG"
echo "enable dump end to end"
;;
S)
check_on_off $OPTARG S
ENABLE_DATA_DUMP="$OPTARG"
echo "enable data dump"
;;
D)
check_on_off $OPTARG D
ENABLE_DUMP_IR="$OPTARG"
@ -315,7 +328,11 @@ build_mindspore()
if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
fi
if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON"
fi
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
if [[ "X$ENABLE_MPI" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON"
fi

View File

@ -9,11 +9,11 @@ else()
LIBS ${LIB_ICU_COMMON} ${LIB_ICU_DATA} ${LIB_ICU_I18N}
URL https://github.com/unicode-org/icu/archive/release-67-1.tar.gz
MD5 0c2662a2b0bc80b0eb56495205247c8f
CONFIGURE_COMMAND ./icu4c/source/runConfigureICU Linux --enable-rpath --disable-tests --disable-samples --disable-icuio --disable-extras ICU_DATA_FILTER_FILE=${CMAKE_SOURCE_DIR}/third_party/icu4c/filter.json
CONFIGURE_COMMAND ${CMAKE_SOURCE_DIR}/scripts/build_icu4c.sh
)
include_directories(${icu4c_INC})
add_library(mindspore::icuuc ALIAS icu4c::${LIB_ICU_COMMON})
add_library(mindspore::icudata ALIAS icu4c::${LIB_ICU_DATA})
add_library(mindspore::icui18n ALIAS icu4c::${LIB_ICU_I18N})
add_definitions(-D ENABLE_ICU4C)
endif()
endif()

View File

@ -15,7 +15,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/json.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/dependency_securec.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
if (ENABLE_DEBUGGER)
if (ENABLE_DEBUGGER OR ENABLE_SERVING)
# build dependencies of gRPC
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/absl.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/c-ares.cmake)
@ -30,7 +30,7 @@ include(${CMAKE_SOURCE_DIR}/cmake/external_libs/flatbuffers.cmake)
if(USE_GLOG)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/glog.cmake)
endif()
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows" AND NOT ENABLE_GE)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zeromq.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pslite.cmake)
endif()

View File

@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF)
option(ENABLE_AKG "enable akg" OFF)
option(ENABLE_DEBUGGER "enable debugger" OFF)
option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF)
option(ENABLE_PYTHON "Enable python" ON)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (WIN32)
@ -115,6 +116,10 @@ if(ENABLE_DUMP_E2E)
add_compile_definitions(ENABLE_DUMP_E2E)
endif()
if(ENABLE_DATA_DUMP)
add_compile_definitions(ENABLE_DATA_DUMP)
endif()
if(ENABLE_DEBUGGER)
add_compile_definitions(ENABLE_DEBUGGER)
endif()

View File

@ -213,7 +213,6 @@ install(
${CMAKE_SOURCE_DIR}/mindspore/parallel
${CMAKE_SOURCE_DIR}/mindspore/mindrecord
${CMAKE_SOURCE_DIR}/mindspore/train
${CMAKE_SOURCE_DIR}/mindspore/model_zoo
${CMAKE_SOURCE_DIR}/mindspore/common
${CMAKE_SOURCE_DIR}/mindspore/ops
${CMAKE_SOURCE_DIR}/mindspore/communication
@ -261,3 +260,17 @@ if (EXISTS ${CMAKE_SOURCE_DIR}/mindspore/dataset)
COMPONENT mindspore
)
endif ()
if (ENABLE_SERVING)
install(
TARGETS ms_serving
DESTINATION ${INSTALL_BASE_DIR}
COMPONENT mindspore
)
install(
TARGETS inference
DESTINATION ${INSTALL_LIB_DIR}
COMPONENT mindspore
)
endif ()

15
config/data_dump.json Normal file
View File

@ -0,0 +1,15 @@
{
"DumpSettings": {
"net_name": "ResNet50",
"mode": 1,
"iteration": 0,
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
},
"DumpSettingsSpec": {
"net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "specified iteration ",
"kernels": "op's full scope name which need to be dump"
}
}

383
config/op_info.config Normal file

File diff suppressed because one or more lines are too long

@ -1 +1 @@
Subproject commit 4084909d62c159da6ba316f61ad3d02a4857b34b
Subproject commit 31aa96ef41067a0ecdc4113ef245f8ede48f3457

View File

@ -20,7 +20,7 @@
#include <utility>
#include <vector>
#include <memory>
#include "ir/dtype/type_id.h"
#include "mindspore/core/ir/dtype/type_id.h"
namespace mindspore {
#define MS_API __attribute__((visibility("default")))

View File

@ -334,7 +334,7 @@ class Parser:
def __init__(self, fn: (types.FunctionType, types.MethodType), parse_method=None) -> None:
self.fn = fn
self.parse_method = parse_method
_, self.line_offset = inspect.getsourcelines(self.fn)
self.line_offset = 0
self.filename: str = inspect.getfile(self.fn)
# Used to resolve the function's globals Namespace.
@ -350,7 +350,8 @@ class Parser:
logger.debug("fn = %r", self.fn)
tree = None
if isinstance(self.fn, (types.FunctionType, types.MethodType)):
original_src = inspect.getsource(self.fn)
lines, self.line_offset = inspect.getsourcelines(self.fn)
original_src = ''.join(lines)
hexstr = hashlib.sha256(original_src.encode()).hexdigest()
tree = Parser.ast_cache.get(hexstr)
if not tree:

View File

@ -108,7 +108,8 @@ def enumerate_(x, start=0):
"""Enumerate list or tuple."""
x_type = F.typeof(x)
ret = ()
if check_is_tuple_or_list(x_type, "enumerate"):
op_name = "enumerate"
if check_is_tuple_or_list(x_type, op_name, "first input") and check_is_const_int(start, op_name, "start"):
ret = zip(range(start, start + len(x)), x)
return ret
@ -123,11 +124,22 @@ def while_cond(x):
@constexpr
def check_is_tuple_or_list(x, op_name):
def check_is_tuple_or_list(x, op_name, arg_name):
"""check whether x is list or tuple."""
if isinstance(x, (mstype.list_type, mstype.tuple_type)):
return True
raise TypeError(f"For '{op_name}', the input parameter should be tuple or list, but got {x}.")
raise TypeError(f"For '{op_name}', the '{arg_name}' should be tuple or list, but got {x}.")
@constexpr
def check_is_const_int(x, op_name, arg_name):
"""check whether x is const int."""
if x is None:
raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got not const.")
if not isinstance(x, int):
raise TypeError(f"For '{op_name}', the '{arg_name}' should be a const int number, but got {x}.")
return True
@constexpr
def check_is_tensor_bool_cond(shp):

View File

@ -1,4 +1,5 @@
## common setting
include_directories(${CMAKE_SOURCE_DIR}/mindspore/core)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_BINARY_DIR})
link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine)
@ -35,20 +36,20 @@ if(ENABLE_GPU)
include_directories(${CUDNN_PATH} ${CUDA_PATH} ${CUDA_INCLUDE_DIRS})
file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"device/gpu/*.cc"
"device/gpu/*.cu"
"kernel/gpu/*.cu"
"kernel/akg/gpu/*.cc"
"kernel/akg/akg_kernel_build.cc"
"kernel/akg/akg_kernel_attrs_process.cc"
"runtime/device/gpu/*.cc"
"runtime/device/gpu/*.cu"
"backend/kernel_compiler/gpu/*.cu"
"backend/kernel_compiler/akg/gpu/*.cc"
"backend/kernel_compiler/akg/akg_kernel_build.cc"
"backend/kernel_compiler/akg/akg_kernel_attrs_process.cc"
)
list(APPEND CUDA_NVCC_FLAGS -arch=sm_53)
list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc")
list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/mpi/mpi_initializer.cc"
"device/gpu/distribution/collective_wrapper.cc"
"device/gpu/distribution/mpi_wrapper.cc"
"device/gpu/distribution/nccl_wrapper.cc"
list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/blocking_queue.cc" "runtime/device/gpu/gpu_buffer_mgr.cc")
list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/mpi/mpi_initializer.cc"
"runtime/device/gpu/distribution/collective_wrapper.cc"
"runtime/device/gpu/distribution/mpi_wrapper.cc"
"runtime/device/gpu/distribution/nccl_wrapper.cc"
)
set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
@ -56,6 +57,7 @@ if(ENABLE_GPU)
set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
add_compile_definitions(ENABLE_GPU)
endif ()
## make flatuffer files
@ -101,16 +103,20 @@ if (ENABLE_DUMP_PROTO)
endif ()
if (ENABLE_D)
include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu")
include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu")
include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir")
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/aicpu/proto/*.proto")
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "backend/kernel_compiler/aicpu/proto/*.proto")
ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})
file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "runtime/device/ascend/dump/proto/*.proto")
ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})
list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})
add_compile_definitions(ENABLE_D)
endif ()
@ -121,18 +127,36 @@ if (MINDSPORE_PROTO_LIST)
endif()
## make sub objects
set(SUB_COMP
transform pre_activate parallel pipeline device kernel common debug gvar ir onnx operator optimizer predict
pybind_api pynative session utils vm
set(SUB_COMP
transform/graph_ir
transform/onnx
backend/optimizer
backend/kernel_compiler
backend/session
runtime/device
frontend/optimizer
frontend/parallel
frontend/operator
pipeline/jit
pipeline/pynative
common debug gvar predict pybind_api utils vm
)
foreach (_comp ${SUB_COMP})
add_subdirectory(${_comp})
if (TARGET _mindspore_${_comp}_obj)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${_comp}_obj>)
add_dependencies(_mindspore_${_comp}_obj proto_input flat_input)
string(REPLACE "/" "_" sub ${_comp})
if (TARGET _mindspore_${sub}_obj)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${sub}_obj>)
add_dependencies(_mindspore_${sub}_obj proto_input flat_input)
endif ()
endforeach ()
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/base base)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_base_obj>)
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/abstract abstract)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_abstract_obj>)
add_subdirectory(${CMAKE_SOURCE_DIR}/mindspore/core/ir ir)
list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_ir_obj>)
add_dependencies(_mindspore_base_obj _mindspore_ir_obj _mindspore_abstract_obj proto_input flat_input)
set_property(SOURCE ${SUB_OBJECTS_SRC} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_ME)
add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
@ -204,8 +228,8 @@ endif()
# set c_expression building
set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
set_property(SOURCE "pipeline/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
pybind11_add_module(_c_expression "pipeline/init.cc")
set_property(SOURCE "pipeline/jit/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
pybind11_add_module(_c_expression "pipeline/jit/init.cc")
MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
@ -231,9 +255,11 @@ else ()
target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
target_link_libraries(_c_expression PRIVATE mindspore_gvar)
target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
if (${ENABLE_IBVERBS} STREQUAL "ON")
target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
if (NOT ENABLE_GE)
target_link_libraries(_c_expression PRIVATE mindspore::pslite mindspore::protobuf ${zeromq_DIRPATH}/zmq_install/lib/libzmq.a)
if (${ENABLE_IBVERBS} STREQUAL "ON")
target_link_libraries(_c_expression PRIVATE ibverbs rdmacm)
endif()
endif()
endif ()
@ -260,8 +286,8 @@ if (ENABLE_CPU)
endif ()
if (ENABLE_MINDDATA)
add_subdirectory(mindrecord)
add_subdirectory(dataset)
add_subdirectory(minddata/mindrecord)
add_subdirectory(minddata/dataset)
endif ()
# build inference
@ -270,7 +296,7 @@ set(LOAD_ONNX_SRC
${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc
)
add_library(inference SHARED
${CMAKE_CURRENT_SOURCE_DIR}/session/session.cc
${CMAKE_CURRENT_SOURCE_DIR}/backend/session/session.cc
${LOAD_ONNX_SRC}
)
target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}

View File

@ -25,7 +25,15 @@ if (ENABLE_CPU)
file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"cpu/*.cc"
)
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/push_kernel.cc"
"cpu/ps/pull_kernel.cc"
"cpu/ps/embedding_look_up_ps_kernel.cc"
"cpu/ps/embedding_look_up_proxy_kernel.cc"
"cpu/ps/apply_momentum_ps_kernel.cc"
"cpu/ps/sparse_apply_adam_ps_kernel.cc"
"cpu/ps/sparse_apply_ftrl_ps_kernel.cc")
if (NOT ENABLE_MPI)
list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc")
@ -55,4 +63,4 @@ endif()
set_property(SOURCE ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST}
PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_KERNEL)
add_library(_mindspore_kernel_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})
add_library(_mindspore_backend_kernel_compiler_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})

View File

@ -0,0 +1,312 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
#include <google/protobuf/text_format.h>
#include <fstream>
#include <utility>
#include <string>
#include <vector>
#include <memory>
#include <algorithm>
#include <map>
#include "runtime/device/kernel_runtime.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
#include "proto/tensor.pb.h"
#include "proto/tensor_shape.pb.h"
#include "proto/attr.pb.h"
#include "proto/node_def.pb.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "common/utils.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "backend/session/kernel_graph.h"
#include "backend/kernel_compiler/common_utils.h"
namespace mindspore {
namespace kernel {
using FNodeAttrHandle = std::function<void(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto)>;
bool SetIOIputSize(const std::shared_ptr<AnfNode> &anf_node, const size_t &input_num,
std::vector<size_t> *input_size_list) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(input_size_list);
for (size_t i = 0; i < input_num; i++) {
std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i);
if (AnfAlgo::GetInputDeviceDataType(anf_node, i) == kObjectTypeString) {
if (!anf_node->isa<CNode>()) {
MS_LOG(EXCEPTION) << "anf_node is not CNode.";
}
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (cnode->inputs().size() < (i + 1)) {
MS_LOG(ERROR) << "cnode inputs size " << cnode->inputs().size() << " is smaller than " << i + 1;
return false;
}
auto input_node = cnode->inputs()[i + 1];
MS_EXCEPTION_IF_NULL(input_node);
if (input_node->isa<ValueNode>()) {
auto value_ptr = GetValueNode(input_node);
auto value = GetValue<std::string>(value_ptr);
input_size_list->push_back(value.size());
}
} else {
auto type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i));
MS_EXCEPTION_IF_NULL(type_ptr);
int64_t size_i = 1;
for (size_t j = 0; j < shape_i.size(); j++) {
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
}
size_t type_byte = GetTypeByte(type_ptr);
if (type_byte == 0) {
return false;
}
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
input_size_list->push_back(LongToSize(size_i));
}
}
return true;
}
bool SetIOSize(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
std::vector<size_t> input_size_list;
std::vector<size_t> output_size_list;
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
if (!SetIOIputSize(anf_node, input_num, &input_size_list)) {
return false;
}
kernel_mod_ptr->SetInputSizeList(input_size_list);
for (size_t i = 0; i < output_num; i++) {
std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i);
TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i));
MS_EXCEPTION_IF_NULL(type_ptr);
int64_t size_i = 1;
for (size_t j = 0; j < shape_i.size(); j++) {
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
}
size_t type_byte = GetTypeByte(type_ptr);
if (type_byte == 0) {
return false;
}
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
output_size_list.push_back(LongToSize(size_i));
}
kernel_mod_ptr->SetOutputSizeList(output_size_list);
return true;
}
void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value,
::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) {
MS_EXCEPTION_IF_NULL(node_attr);
MS_EXCEPTION_IF_NULL(value);
if (type == "int") {
auto attr_value = GetValue<int>(value);
(*node_attr)[attr_name].set_i(attr_value);
} else if (type == "str") {
auto attr_value = GetValue<std::string>(value);
(*node_attr)[attr_name].set_s(attr_value);
} else if (type == "bool") {
auto attr_value = GetValue<bool>(value);
(*node_attr)[attr_name].set_b(attr_value);
} else if (type == "float") {
auto attr_value = GetValue<float>(value);
(*node_attr)[attr_name].set_f(attr_value);
} else if (type == "listInt") {
std::vector<int> attr_value;
auto value_type = value->type();
MS_EXCEPTION_IF_NULL(value_type);
auto value_type_str = value_type->ToString();
if (value_type_str == "Int32") {
int data = GetValue<int>(value);
attr_value.push_back(data);
} else {
attr_value = GetValue<std::vector<int>>(value);
}
mindspore::AttrValue input_shape_attr;
mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array();
MS_EXCEPTION_IF_NULL(input_shape_attr_list);
for (const auto shape : attr_value) {
input_shape_attr_list->add_i(shape);
}
(*node_attr)[attr_name] = input_shape_attr;
} else {
MS_LOG(EXCEPTION) << "type: " << type << "not support";
}
}
void SetNodeAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(proto);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
if (op_name == kInitDataSetQueue) {
op_name = kInitData;
}
if (op_name == kPrint) {
return;
}
auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU);
MS_EXCEPTION_IF_NULL(op_info_ptr);
auto attrs_ptr = op_info_ptr->attrs_ptr();
auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
MS_EXCEPTION_IF_NULL(primitive);
::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs();
for (const auto &attr_ptr : attrs_ptr) {
MS_EXCEPTION_IF_NULL(attr_ptr);
std::string attr_name = attr_ptr->name();
auto value = primitive->GetAttr(attr_name);
if (value != nullptr) {
if (attr_name == kQueueName || attr_name == kSharedName) {
attr_name = kChannelName;
} else if (attr_name == kSeed0) {
attr_name = kSeed;
} else if (attr_name == kSeed1) {
attr_name = kSeed2;
}
std::string type = attr_ptr->type();
ParseAttrValue(type, attr_name, value, node_attr);
}
}
MS_LOG(INFO) << "Set node attr end!";
}
void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(proto);
MS_EXCEPTION_IF_NULL(anf_node);
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
if (input_num == 0) {
MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have input.";
return;
}
for (size_t input_index = 0; input_index < input_num; input_index++) {
::mindspore::Tensor *node_inputs = proto->add_inputs();
MS_EXCEPTION_IF_NULL(node_inputs);
TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
std::vector<size_t> input_shape;
int32_t input_data_type;
if (input_type == kObjectTypeString) {
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto input_node = cnode->inputs()[input_index + 1];
auto value_ptr = GetValueNode(input_node);
auto value = GetValue<std::string>(value_ptr);
input_shape.push_back(1);
input_shape.push_back(value.size());
input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);
} else {
input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);
}
mindspore::TensorShape *tensorShape = node_inputs->mutable_tensor_shape();
for (auto item : input_shape) {
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
dim->set_size((::google::protobuf::int64)item);
}
node_inputs->set_tensor_type((mindspore::DataType)input_data_type);
node_inputs->set_mem_device("HBM");
}
}
void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(proto);
MS_EXCEPTION_IF_NULL(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
if (output_num == 0) {
MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. ";
return;
}
for (size_t output_index = 0; output_index < output_num; output_index++) {
::mindspore::Tensor *node_outputs = proto->add_outputs();
MS_EXCEPTION_IF_NULL(node_outputs);
std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape();
MS_EXCEPTION_IF_NULL(tensorShape);
for (auto item : output_shape) {
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
MS_EXCEPTION_IF_NULL(dim);
dim->set_size((::google::protobuf::int64)item);
}
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
node_outputs->set_tensor_type((mindspore::DataType)output_data_type);
node_outputs->set_mem_device("HBM");
}
}
void SetNodedefProto(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {
MS_EXCEPTION_IF_NULL(anf_node);
MS_EXCEPTION_IF_NULL(proto);
MS_LOG(INFO) << "SetNodedefProto entry";
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
if (op_name == kInitDataSetQueue) {
op_name = kInitData;
}
// set op name
proto->set_op(op_name);
// set inputs tensor
SetNodeInputs(anf_node, proto);
// set outputs tensor
SetNodeOutputs(anf_node, proto);
// set node attr
SetNodeAttr(anf_node, proto);
MS_LOG(INFO) << "SetNodedefProto end!";
}
bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
MS_EXCEPTION_IF_NULL(anf_node);
MS_LOG(INFO) << "CreateNodeDefBytes entry";
mindspore::NodeDef proto;
SetNodedefProto(anf_node, &proto);
std::string nodeDefStr;
if (!proto.SerializeToString(&nodeDefStr)) {
MS_LOG(ERROR) << "Serialize nodeDef to string failed.";
return false;
}
kernel_mod_ptr->SetNodeDef(nodeDefStr);
MS_LOG(INFO) << "CreateNodeDefBytes end!";
return true;
}
KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
if (op_name == kInitDataSetQueue) {
op_name = kInitData;
}
auto kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>();
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
kernel_mod_ptr->SetAnfNode(anf_node);
kernel_mod_ptr->SetNodeName(op_name);
if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
}
if (!SetIOSize(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Set input output size list failed.";
}
return kernel_mod_ptr;
}
} // namespace kernel
} // namespace mindspore

View File

@ -16,7 +16,7 @@
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_KERNEL_BUILD_H_
#include <memory>
#include "kernel/kernel.h"
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -14,13 +14,13 @@
* limitations under the License.
*/
#include "kernel/aicpu/aicpu_kernel_metadata.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"
#include <memory>
#include <string>
#include "kernel/oplib/oplib.h"
#include "kernel/common_utils.h"
#include "kernel/aicpu/aicpu_util.h"
#include "session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/oplib/oplib.h"
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace kernel {

View File

@ -20,7 +20,7 @@
#include <string>
#include <vector>
#include <memory>
#include "kernel/kernel_build_info.h"
#include "backend/kernel_compiler/kernel_build_info.h"
namespace mindspore {
namespace kernel {

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "kernel/aicpu/aicpu_kernel_mod.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
#include <memory>
#include <vector>
@ -23,9 +23,10 @@
#include "runtime/mem.h"
#include "runtime/rt.h"
#include "kernel/aicpu/aicpu_kernel_build.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
#include "utils/convert_utils.h"
#include "kernel/aicpu/aicpu_util.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "utils/context/ms_context.h"
using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
@ -144,8 +145,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
if (node_name_ == kTopK) {
node_name_ = kTopKV2;
}
AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs);
kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
return {task_info_ptr};

View File

@ -18,8 +18,8 @@
#include <vector>
#include <memory>
#include <string>
#include "kernel/ascend_kernel_mod.h"
#include "kernel/aicpu/aicpu_util.h"
#include "backend/kernel_compiler/ascend_kernel_mod.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
namespace mindspore {
namespace kernel {
class AicpuOpKernelMod : public AscendKernelMod {

View File

@ -13,14 +13,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/aicpu/aicpu_util.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include <vector>
#include <string>
#include "proto/types.pb.h"
#include "runtime/mem.h"
#include "runtime/rt.h"
#include "utils/convert_utils.h"
#include "session/anf_runtime_algorithm.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace kernel {

View File

@ -0,0 +1,64 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_
#include <cstdint>
#include <vector>
#include <map>
#include <string>
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {
constexpr auto kInitDataSetQueue = "InitDataSetQueue";
constexpr auto kInitData = "InitData";
constexpr auto kGetNext = "GetNext";
constexpr auto kPrint = "Print";
constexpr auto kPack = "Pack";
constexpr auto kOutputTypes = "output_types";
constexpr auto kOutputShapes = "output_shapes";
constexpr auto kChannelName = "channel_name";
constexpr auto kSharedName = "shared_name";
constexpr auto kShapes = "shapes";
constexpr auto kTypes = "types";
constexpr auto kQueueName = "queue_name";
constexpr auto kSeed = "seed";
constexpr auto kSeed0 = "Seed0";
constexpr auto kSeed1 = "Seed1";
constexpr auto kSeed2 = "seed2";
constexpr auto kTopK = "TopK";
constexpr auto kTopKV2 = "TopKV2";
struct AicpuParamHead {
uint32_t length; // Total length: include cunstom message
uint32_t ioAddrNum; // Input and output address number
uint32_t extInfoLength; // extInfo struct Length
uint64_t extInfoAddr; // extInfo address
} __attribute__((packed));
class AicpuOpUtil {
public:
static int MsTypeToProtoType(TypeId ms_type);
private:
// kernel id
static uint64_t KernelId_;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_AICPU_AICPU_UTIL_H_

View File

@ -13,11 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/akg/akg_kernel_attrs_process.h"
#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
#include <algorithm>
#include "session/anf_runtime_algorithm.h"
#include "pre_activate/common/helper.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/optimizer/common/helper.h"
namespace mindspore {
namespace kernel {

View File

@ -22,7 +22,7 @@
#include <unordered_map>
#include "ir/anf.h"
#include "utils/utils.h"
#include "operator/ops.h"
#include "frontend/operator/ops.h"
namespace mindspore {
namespace kernel {

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "kernel/akg/akg_kernel_build.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
#include <Python.h>
#include <sys/types.h>
#include <signal.h>
@ -35,8 +35,8 @@
#include "utils/convert_utils.h"
#include "utils/any.h"
#include "utils/utils.h"
#include "session/anf_runtime_algorithm.h"
#include "kernel/akg/akg_kernel_attrs_process.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
namespace mindspore {
namespace kernel {

View File

@ -22,11 +22,11 @@
#include <memory>
#include <map>
#include <utility>
#include "kernel/kernel.h"
#include "backend/kernel_compiler/kernel.h"
#include "ir/dtype.h"
#include <nlohmann/json.hpp>
#include "kernel/common_utils.h"
#include "kernel/oplib/oplib.h"
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/oplib/oplib.h"
namespace mindspore {
namespace kernel {

View File

@ -14,11 +14,11 @@
* limitations under the License.
*/
#include "kernel/akg/akg_kernel_metadata.h"
#include "backend/kernel_compiler/akg/akg_kernel_metadata.h"
#include <memory>
#include "session/anf_runtime_algorithm.h"
#include "kernel/oplib/oplib.h"
#include "kernel/common_utils.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/oplib/oplib.h"
#include "backend/kernel_compiler/common_utils.h"
namespace mindspore {
namespace kernel {

View File

@ -21,7 +21,7 @@
#include <vector>
#include <unordered_map>
#include <memory>
#include "kernel/kernel_build_info.h"
#include "backend/kernel_compiler/kernel_build_info.h"
namespace mindspore {
namespace kernel {

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "kernel/akg/ascend/akg_ascend_kernel_build.h"
#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h"
#include <algorithm>
#include <map>
@ -26,12 +26,12 @@
#include <Python.h>
#include "ir/dtype.h"
#include "ir/func_graph.h"
#include "kernel/kernel.h"
#include "kernel/common_utils.h"
#include "kernel/tbe/tbe_utils.h"
#include "kernel/akg/ascend/akg_ascend_kernel_mod.h"
#include "kernel/akg/akg_kernel_attrs_process.h"
#include "session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
#include "backend/kernel_compiler/akg/akg_kernel_attrs_process.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace kernel {

View File

@ -22,8 +22,8 @@
#include <vector>
#include <map>
#include "ir/anf.h"
#include "kernel/kernel.h"
#include "kernel/akg/akg_kernel_build.h"
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
namespace mindspore {
namespace kernel {

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "kernel/akg/ascend/akg_ascend_kernel_mod.h"
#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.h"
#include <algorithm>
#include <fstream>
#include <map>
@ -26,6 +26,7 @@
#include "runtime/rt.h"
#include "utils/log_adapter.h"
#include "utils/convert_utils.h"
#include "utils/context/ms_context.h"
namespace mindspore {
namespace kernel {
@ -123,8 +124,8 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in
MS_LOG(DEBUG) << "The block_dim is:" << block_dim;
TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs,
output_data_addrs, workspace_addrs);
kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data,
input_data_addrs, output_data_addrs, workspace_addrs, NeedDump());
return {task_info_ptr};
}
} // namespace kernel

View File

@ -19,8 +19,8 @@
#include <string>
#include <vector>
#include <memory>
#include "kernel/ascend_kernel_mod.h"
#include "kernel/tbe/tbe_utils.h"
#include "backend/kernel_compiler/ascend_kernel_mod.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
namespace mindspore {
namespace kernel {

View File

@ -14,12 +14,12 @@
* limitations under the License.
*/
#include "kernel/akg/gpu/akg_gpu_kernel_build.h"
#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.h"
#include <vector>
#include <memory>
#include "kernel/kernel.h"
#include "kernel/akg/akg_kernel_build.h"
#include "kernel/akg/gpu/akg_gpu_kernel_mod.h"
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
#include "common/utils.h"
namespace mindspore {

View File

@ -16,8 +16,8 @@
#ifndef MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
#define MINDSPORE_CCSRC_KERNEL_AKG_GPU_AKG_GPU_KERNEL_BUILD_H_
#include "kernel/kernel.h"
#include "ir/base.h"
#include "backend/kernel_compiler/kernel.h"
#include "base/base.h"
namespace mindspore {
namespace kernel {

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "kernel/akg/gpu/akg_gpu_kernel_mod.h"
#include "backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h"
#include <fstream>
#include <algorithm>
#include "nlohmann/json.hpp"

View File

@ -21,7 +21,7 @@
#include <vector>
#include <unordered_map>
#include <memory>
#include "kernel/kernel.h"
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -20,7 +20,10 @@
#include <vector>
#include <memory>
#include "framework/ge_runtime/task_info.h"
#include "kernel/kernel.h"
#include "backend/kernel_compiler/kernel.h"
#ifdef ENABLE_DATA_DUMP
#include "debug/data_dump_parser.h"
#endif
using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
namespace mindspore {
@ -31,6 +34,13 @@ class AscendKernelMod : public KernelMod {
const std::vector<AddressPtr> &, uint32_t) = 0;
uint32_t block_dim() { return block_dim_; }
uint32_t stream_id() { return stream_id_; }
virtual bool NeedDump() {
#ifdef ENABLE_DATA_DUMP
return DataDumpParser::GetInstance().NeedDump(kernel_name_);
#else
return false;
#endif
}
protected:
uint32_t block_dim_{1};

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "kernel/common_utils.h"
#include "backend/kernel_compiler/common_utils.h"
#include <unordered_map>
#include <map>
#include <iostream>
@ -22,16 +22,18 @@
#include <fstream>
#include <thread>
#include "nlohmann/json.hpp"
#include "session/anf_runtime_algorithm.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "common/utils.h"
#include "ir/manager.h"
#include "ir/meta_tensor.h"
#include "ir/func_graph.h"
#include "operator/ops.h"
#include "frontend/operator/ops.h"
#include "utils/graph_utils.h"
namespace mindspore {
namespace kernel {
constexpr char kAxis[] = "axis";
constexpr char kTypeInt32[] = "Int32";
const std::unordered_map<std::string, TypeId> type_id_maps = {
{"float", TypeId::kNumberTypeFloat32}, {"float16", TypeId::kNumberTypeFloat16},
{"float32", TypeId::kNumberTypeFloat32}, {"float64", TypeId::kNumberTypeFloat64},
@ -579,8 +581,40 @@ void WorkerForReduceSparseGradient(WorkerParamsForReduceSparseGradient param) {
}
}
void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad,
size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices,
std::vector<size_t> *slice_positions) {
MS_LOG(DEBUG) << "Start";
size_t thread_num = 24;
if (slice_positions->size() < thread_num) {
thread_num = slice_positions->size();
}
size_t stride = (slice_positions->size() + thread_num - 1) / thread_num;
thread_num = (slice_positions->size() + stride - 1) / stride;
std::vector<std::thread> threads;
size_t max_length = sorted_indices->size() * outer_dim;
for (size_t i = 0; i < thread_num; ++i) {
size_t slice_start = i * stride;
size_t slice_end = 0;
if (i == thread_num - 1) {
slice_end = slice_positions->size();
} else {
slice_end = slice_start + stride;
}
WorkerParamsForReduceSparseGradient params{
slice_start, slice_end, max_length, outer_dim, sorted_indices, slice_positions, origin_sparse_grad.value_,
unique_grad};
threads.emplace_back(std::thread(WorkerForReduceSparseGradient, params));
}
for (size_t i = 0; i < thread_num; ++i) {
threads[i].join();
}
MS_LOG(DEBUG) << "End";
}
void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim) {
size_t outer_dim, bool use_multi_threads) {
MS_LOG(DEBUG) << "Start";
MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
MS_EXCEPTION_IF_NULL(unique_grad);
@ -599,37 +633,102 @@ void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradie
[](const std::pair<int, size_t> &left, const std::pair<int, size_t> &right) { return left.first < right.first; });
int last_index = 0;
std::vector<size_t> slice_positions;
slice_positions.reserve(sorted_indices.size());
for (size_t i = 0; i < sorted_indices.size(); ++i) {
if (i == 0 || last_index != sorted_indices[i].first) {
slice_positions.emplace_back(i);
}
last_index = sorted_indices[i].first;
}
size_t thread_num = 8;
if (slice_positions.size() < thread_num) {
thread_num = slice_positions.size();
if (use_multi_threads) {
RunMultiThreadReduceSparseGradient(origin_sparse_grad, unique_grad, outer_dim, &sorted_indices, &slice_positions);
} else {
size_t max_length = sorted_indices.size() * outer_dim;
WorkerParamsForReduceSparseGradient params{0,
slice_positions.size(),
max_length,
outer_dim,
&sorted_indices,
&slice_positions,
origin_sparse_grad.value_,
unique_grad};
WorkerForReduceSparseGradient(params);
}
size_t stride = (slice_positions.size() + thread_num - 1) / thread_num;
thread_num = (slice_positions.size() + stride - 1) / stride;
std::vector<std::thread> threads;
size_t max_length = sorted_indices.size() * outer_dim;
for (size_t i = 0; i < thread_num; ++i) {
size_t slice_start = i * stride;
size_t slice_end = 0;
if (i == thread_num - 1) {
slice_end = slice_positions.size();
} else {
slice_end = slice_start + stride;
unique_grad->indices_size_ = slice_positions.size();
MS_LOG(DEBUG) << "End";
}
void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads,
SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim) {
MS_LOG(DEBUG) << "Start";
if (unique_slice_grads.empty()) {
return;
}
size_t index_data_size = outer_dim * sizeof(float);
size_t unique_indices_size = 0;
for (size_t i = 0; i < unique_slice_grads.size(); ++i) {
auto &slice_grad = unique_slice_grads[i];
auto ret_code = memcpy_s(tmp_grad->value_ + unique_indices_size * outer_dim,
(tmp_grad->indices_size_ - unique_indices_size) * index_data_size, slice_grad->value_,
slice_grad->indices_size_ * index_data_size);
if (ret_code != EOK) {
MS_LOG(EXCEPTION) << "Failed to copy data!";
}
WorkerParamsForReduceSparseGradient params{
slice_start, slice_end, max_length, outer_dim, &sorted_indices, &slice_positions, origin_sparse_grad.value_,
unique_grad};
threads.emplace_back(std::thread(WorkerForReduceSparseGradient, params));
ret_code =
memcpy_s(tmp_grad->indices_ + unique_indices_size, (tmp_grad->indices_size_ - unique_indices_size) * sizeof(int),
slice_grad->indices_, slice_grad->indices_size_ * sizeof(int));
if (ret_code != EOK) {
MS_LOG(EXCEPTION) << "Failed to copy data!";
}
unique_indices_size += slice_grad->indices_size_;
}
tmp_grad->indices_size_ = unique_indices_size;
ReduceSparseGradient(*tmp_grad, unique_grad, first_dim, outer_dim);
MS_LOG(DEBUG) << "End";
}
void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad,
SparseGradient *unique_grad, size_t first_dim, size_t outer_dim) {
MS_LOG(DEBUG) << "Start";
MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
MS_EXCEPTION_IF_NULL(unique_grad);
MS_EXCEPTION_IF_NULL(unique_grad->value_);
MS_EXCEPTION_IF_NULL(unique_grad->indices_);
MS_EXCEPTION_IF_NULL(tmp_grad);
MS_EXCEPTION_IF_NULL(tmp_grad->value_);
MS_EXCEPTION_IF_NULL(tmp_grad->indices_);
size_t thread_num = 24;
if (origin_sparse_grad.indices_size_ < thread_num) {
thread_num = origin_sparse_grad.indices_size_;
}
size_t thread_indices_size = origin_sparse_grad.indices_size_ / thread_num;
size_t left_indices_size = origin_sparse_grad.indices_size_ % thread_num;
std::vector<std::thread> threads;
threads.reserve(thread_num);
std::vector<std::shared_ptr<SparseGradient>> unique_slice_grads;
for (size_t i = 0; i < thread_num; ++i) {
size_t indices_size = thread_indices_size;
if (i == thread_num - 1) {
indices_size = thread_indices_size + left_indices_size;
}
size_t value_offset = i * thread_indices_size * outer_dim;
size_t indices_offset = i * thread_indices_size;
auto slice_grad = SparseGradient(
{origin_sparse_grad.value_ + value_offset, origin_sparse_grad.indices_ + indices_offset, indices_size});
unique_slice_grads.emplace_back(std::make_shared<SparseGradient>());
unique_slice_grads[i]->value_ = unique_grad->value_ + value_offset;
unique_slice_grads[i]->indices_ = unique_grad->indices_ + indices_offset;
unique_slice_grads[i]->indices_size_ = indices_size;
threads.emplace_back(
std::thread(ReduceSparseGradient, slice_grad, unique_slice_grads[i].get(), first_dim, outer_dim, false));
}
for (size_t i = 0; i < thread_num; ++i) {
threads[i].join();
}
unique_grad->indices_size_ = slice_positions.size();
ReduceMultiSparseGradient(unique_slice_grads, tmp_grad, unique_grad, first_dim, outer_dim);
MS_LOG(DEBUG) << "End";
}
std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index) {
@ -892,5 +991,39 @@ void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputePa
threads[i].join();
}
}
std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode) {
if (AnfAlgo::GetInputTensorNum(cnode) != AnfAlgo::GetOutputTensorNum(cnode) &&
AnfAlgo::GetInputTensorNum(cnode) != 1) {
MS_LOG(EXCEPTION) << "the kind of reduce node [" << cnode->DebugString()
<< "] is not single input or single output ";
}
std::vector<int> axis;
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0);
auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
MS_EXCEPTION_IF_NULL(primitive);
auto axis_attr = primitive->GetAttr(kAxis);
if (axis_attr == nullptr) {
MS_LOG(ERROR) << "This node does't have axie attr.";
return std::vector<int>();
}
auto type = axis_attr->type();
MS_EXCEPTION_IF_NULL(type);
std::vector<int> axis_list;
if (type->ToString() == kTypeInt32) {
axis_list.emplace_back(GetValue<int>(axis_attr));
} else {
axis_list = GetValue<std::vector<int>>(axis_attr);
}
for (const auto &elem : axis_list) {
if (elem < 0) {
axis.emplace_back(input_shape.size() + elem);
} else {
axis.emplace_back(elem);
}
}
AnfAlgo::SetNodeAttr(kAttrAxis, MakeValue(axis), cnode);
return axis;
}
} // namespace kernel
} // namespace mindspore

View File

@ -26,9 +26,9 @@
#include <vector>
#include <utility>
#include <nlohmann/json.hpp>
#include "kernel/kernel.h"
#include "kernel/oplib/opinfo.h"
#include "kernel/kernel_build_info.h"
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/oplib/opinfo.h"
#include "backend/kernel_compiler/kernel_build_info.h"
namespace mindspore {
namespace kernel {
@ -115,7 +115,7 @@ int Sign(float x);
void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim);
void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim);
size_t outer_dim, bool use_multi_threads = true);
std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index);
std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list,
const std::vector<AnfNodePtr> &input_list);
@ -130,6 +130,15 @@ void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<An
bool IsWeightBoundary(const AnfNodePtr &node);
void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params,
size_t total_compute_size);
void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad,
size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices,
std::vector<size_t> *slice_positions);
void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads,
SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim);
void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad,
SparseGradient *unique_grad, size_t first_dim, size_t outer_dim);
std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode);
} // namespace kernel
} // namespace mindspore

View File

@ -14,9 +14,8 @@
* limitations under the License.
*/
#include "kernel/cpu/addn_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
#include "ir/primitive.h"
#include "backend/kernel_compiler/cpu/addn_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -18,8 +18,8 @@
#define MINDSPORE_CCSRC_KERNEL_CPU_ADDN_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -13,10 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/allgather_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
#include "device/cpu/mpi/mpi_adapter.h"
#include "ir/primitive.h"
#include "backend/kernel_compiler/cpu/allgather_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "runtime/device/cpu/mpi/mpi_adapter.h"
#include "utils/log_adapter.h"
namespace mindspore {

View File

@ -17,8 +17,8 @@
#define MINDSPORE_CCSRC_KERNEL_CPU_REDUCE_SCATTER_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -13,9 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/apply_momentum_cpu_kernel.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/apply_momentum_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"
namespace mindspore {

View File

@ -18,7 +18,7 @@
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -13,8 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/argmax_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/argmax_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -17,8 +17,8 @@
#define MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "kernel/cpu/bias_add_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/bias_add_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -18,8 +18,8 @@
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "kernel/cpu/bias_add_grad_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -19,8 +19,8 @@
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -14,9 +14,8 @@
* limitations under the License.
*/
#include "kernel/cpu/concat_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
#include "ir/primitive.h"
#include "backend/kernel_compiler/cpu/concat_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -17,8 +17,8 @@
#define MINDSPORE_CCSRC_KERNEL_CPU_CONCAT_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -21,9 +21,9 @@
#include <memory>
#include <numeric>
#include <functional>
#include "kernel/kernel.h"
#include "backend/kernel_compiler/kernel.h"
#include "ir/anf.h"
#include "session/anf_runtime_algorithm.h"
#include "backend/session/anf_runtime_algorithm.h"
using mindspore::kernel::Address;
using mindspore::kernel::AddressPtr;
@ -55,7 +55,7 @@ class CPUKernel : public kernel::KernelMod {
public:
CPUKernel() = default;
~CPUKernel() override = default;
void Init(const CNodePtr &kernel_node);
virtual void Init(const CNodePtr &kernel_node);
virtual void InitKernel(const CNodePtr &kernel_node) = 0;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void * /*stream_ptr*/) override {

View File

@ -14,13 +14,13 @@
* limitations under the License.
*/
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
#include <memory>
#include <iostream>
#include <string>
#include "device/kernel_info.h"
#include "runtime/device/kernel_info.h"
namespace mindspore {
namespace kernel {
@ -38,7 +38,7 @@ void CPUKernelFactory::Register(const std::string &kernel_name, const KernelAttr
}
std::shared_ptr<CPUKernel> CPUKernelFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) {
auto kernel_info = apply_kernel->kernel_info();
auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info());
MS_EXCEPTION_IF_NULL(kernel_info);
const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info();
MS_EXCEPTION_IF_NULL(kernel_build_Info);

View File

@ -24,8 +24,8 @@
#include <vector>
#include "common/utils.h"
#include "kernel/cpu/cpu_kernel.h"
#include "device/cpu/kernel_select_cpu.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "runtime/device/cpu/kernel_select_cpu.h"
namespace mindspore {
namespace kernel {
@ -62,10 +62,12 @@ class CPUKernelRegistrar {
static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_reg(#OPNAME, ATTR, \
[]() { return std::make_shared<OPCLASS>(); });
#define MS_REG_CPU_KERNEL_T(OPNAME, ATTR, OPCLASS, T) \
#define MS_REG_CPU_KERNEL_T(OPNAME, ATTR, OPCLASS, T) MS_REG_CPU_KERNEL_T_(__COUNTER__, OPNAME, ATTR, OPCLASS, T)
#define MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T)
#define _MS_REG_CPU_KERNEL_T_(COUNT, OPNAME, ATTR, OPCLASS, T) \
static_assert(std::is_base_of<CPUKernel, OPCLASS<T>>::value, " must be base of CPUKernel"); \
static const CPUKernelRegistrar g_cpu_kernel_##OPNAME##_##T##_reg(#OPNAME, ATTR, \
[]() { return std::make_shared<OPCLASS<T>>(); });
static const CPUKernelRegistrar g_cpu_kernel_##COUNT##_##OPNAME##_##T##_reg( \
#OPNAME, ATTR, []() { return std::make_shared<OPCLASS<T>>(); });
#define MS_REG_CPU_KERNEL_T_S(OPNAME, ATTR, OPCLASS, T, S) \
static_assert(std::is_base_of<CPUKernel, OPCLASS<T, S>>::value, " must be base of CPUKernel"); \

View File

@ -13,8 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/debug_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/debug_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"
#ifdef ENABLE_DEBUGGER
#include "debug/debugger/debugger.h"

View File

@ -18,8 +18,8 @@
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -14,10 +14,9 @@
* limitations under the License.
*/
#include <thread>
#include "kernel/cpu/embedding_look_up_comm_grad_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
#include "device/cpu/mpi/mpi_adapter.h"
#include "ir/primitive.h"
#include "backend/kernel_compiler/cpu/embedding_look_up_comm_grad_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "runtime/device/cpu/mpi/mpi_adapter.h"
namespace mindspore {
namespace kernel {

View File

@ -17,8 +17,8 @@
#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_COMM_GRAD_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -15,9 +15,9 @@
*/
#include <thread>
#include <string>
#include "kernel/cpu/embedding_look_up_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
#include "device/cpu/mpi/mpi_adapter.h"
#include "backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "runtime/device/cpu/mpi/mpi_adapter.h"
#include "ir/primitive.h"
namespace mindspore {
@ -36,7 +36,9 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
}
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
axis_ = 4 - input_shape_.size();
reduce_scatter_flag_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "reduce_scatter_flag");
if (AnfAlgo::HasNodeAttr(kAttrReduceScatterFlag, kernel_node)) {
reduce_scatter_flag_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, kAttrReduceScatterFlag);
}
#ifdef ENABLE_MPI
if (reduce_scatter_flag_) {
size_t gatherv2_out_lens = 1;
@ -65,7 +67,9 @@ void EmbeddingLookUpCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_LOG(EXCEPTION) << "Not Enable MPI, please build version with -M on when set reduce_scatter_flag true";
}
#endif
offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "offset");
if (AnfAlgo::HasNodeAttr(kAttrOffset, kernel_node)) {
offset_ = AnfAlgo::GetNodeAttr<int>(kernel_node, kAttrOffset);
}
CPUKernelUtils::ExpandDimsTo4(&input_shape_);
CPUKernelUtils::ExpandDimsTo4(&output_shape_);
}

View File

@ -17,8 +17,8 @@
#define MINDSPORE_CCSRC_KERNEL_CPU_EMBEDDING_LOOK_UP_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -13,8 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/equal_count_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/equal_count_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -17,8 +17,8 @@
#define MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -13,9 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/gather_cpu_kernel.h"
#include "device/cpu/cpu_device_address.h"
#include "ir/primitive.h"
#include "backend/kernel_compiler/cpu/gather_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -17,8 +17,8 @@
#define MINDSPORE_CCSRC_KERNEL_CPU_GATHER_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -13,11 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/conv2d_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h"
#include <string>
#include "common/utils.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -18,7 +18,7 @@
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -13,11 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
#include <string>
#include "common/utils.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -18,7 +18,7 @@
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -13,10 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
#include <string>
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"
namespace mindspore {

View File

@ -18,7 +18,7 @@
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -13,11 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/lstm_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h"
#include <string>
#include "common/utils.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -24,7 +24,7 @@
#endif
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class LstmCPUKernel : public MKLCPUKernel {

View File

@ -13,14 +13,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/lstm_grad_cpu_kernel.h"
#include <cstring>
#include <cmath>
#include <numeric>
#include <string>
#include "common/utils.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -18,7 +18,7 @@
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -13,12 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/matmul_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.h"
#include <algorithm>
#include <utility>
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "common/utils.h"
#include "device/cpu/cpu_device_address.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -18,7 +18,7 @@
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -13,12 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
#include <vector>
#include <string>
#include <algorithm>
#include "common/utils.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
namespace mindspore {
namespace kernel {

View File

@ -21,8 +21,8 @@
#include <memory>
#include <vector>
#include "dnnl.hpp"
#include "kernel/cpu/cpu_kernel.h"
#include "kernel/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {

View File

@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "utils/log_adapter.h"
#include "dnnl.hpp"

View File

@ -13,9 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/mul_cpu_kernel.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"
namespace mindspore {

View File

@ -18,7 +18,7 @@
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -13,12 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/pooling_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.h"
#include <string>
#include <algorithm>
#include "common/utils.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -18,7 +18,7 @@
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -13,13 +13,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h"
#include <string>
#include <utility>
#include <algorithm>
#include "common/utils.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {

View File

@ -19,7 +19,7 @@
#include <vector>
#include <memory>
#include <utility>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -13,9 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/relu_cpu_kernel.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"
namespace mindspore {

View File

@ -18,7 +18,7 @@
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -13,9 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/relu_grad_cpu_kernel.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"
namespace mindspore {

View File

@ -18,7 +18,7 @@
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {

View File

@ -13,9 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/softmax_cpu_kernel.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
#include "backend/kernel_compiler/cpu/mkldnn/softmax_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/utils.h"
namespace mindspore {

Some files were not shown because too many files have changed in this diff Show More