From 181b49f50c376cf5f9a2bea0a51c704787a7fe0c Mon Sep 17 00:00:00 2001 From: hangangqiang Date: Thu, 14 Jan 2021 19:21:29 +0800 Subject: [PATCH 01/10] add commit id info in package in mindspore lite --- build.bat | 1 + build.sh | 6 + cmake/package_lite.cmake | 324 +++++++++++------- .../kernel/opencl/kernel/fusion_eltwise.cc | 1 - 4 files changed, 208 insertions(+), 124 deletions(-) diff --git a/build.bat b/build.bat index 146f51d3c16..a72cdffba0c 100644 --- a/build.bat +++ b/build.bat @@ -78,6 +78,7 @@ IF NOT EXIST "%BUILD_PATH%/mindspore" ( cd %BUILD_PATH%/mindspore IF "%1%" == "lite" ( + (git log -1 | findstr "^commit") > %BUILD_PATH%\.commit_id cmake -DPLATFORM_ARM64=off -DSUPPORT_TRAIN=off ^ -DENABLE_TOOLS=on -DENABLE_CONVERTER=on -DBUILD_TESTCASES=off ^ -DCMAKE_BUILD_TYPE=Release -DSUPPORT_GPU=off -DBUILD_MINDDATA=off -DOFFLINE_COMPILE=off ^ diff --git a/build.sh b/build.sh index 172fc758be8..c64652d1544 100755 --- a/build.sh +++ b/build.sh @@ -509,6 +509,11 @@ get_version() { VERSION_STR=${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_REVISION} } +write_commit_file() { + COMMIT_STR=$(git log -1 | grep commit) + echo ${COMMIT_STR} > "${BASEPATH}/mindspore/lite/build/.commit_id" +} + build_lite() { get_version @@ -541,6 +546,7 @@ build_lite() fi mkdir -pv build cd build + write_commit_file BUILD_TYPE="Release" if [[ "${DEBUG_MODE}" == "on" ]]; then BUILD_TYPE="Debug" diff --git a/cmake/package_lite.cmake b/cmake/package_lite.cmake index e084bb33d95..f9a7c38219f 100644 --- a/cmake/package_lite.cmake +++ b/cmake/package_lite.cmake @@ -3,6 +3,8 @@ include(CMakePackageConfigHelpers) set(RUNTIME_PKG_NAME ${MAIN_DIR}-${RUNTIME_COMPONENT_NAME}) set(CONVERTER_PKG_NAME ${MAIN_DIR}-${CONVERTER_COMPONENT_NAME}) +set(RUNTIME_ROOT_DIR ${RUNTIME_PKG_NAME}/) +set(CONVERTER_ROOT_DIR ${CONVERTER_PKG_NAME}/) set(RUNTIME_LIB_DIR ${RUNTIME_PKG_NAME}/lib) set(RUNTIME_INC_DIR ${RUNTIME_PKG_NAME}/include) set(CONVERTER_LIB_DIR ${CONVERTER_PKG_NAME}/lib) @@ -17,154 +19,230 @@ set(MIND_DATA_LIB_DIR ${RUNTIME_PKG_NAME}/minddata/lib) set(LIB_DIR_RUN_X86 ${RUNTIME_PKG_NAME}/lib) -if (BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") - install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "vision.h" EXCLUDE) - if (PLATFORM_ARM64) +if(BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") + install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "vision.h" EXCLUDE) + if(PLATFORM_ARM64) file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so) - install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) - elseif (PLATFORM_ARM32) + elseif(PLATFORM_ARM32) file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so) - install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) - else () - install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${jpeg_turbo_LIBPATH}/libjpeg.so.62.3.0 DESTINATION ${TURBO_DIR}/lib RENAME libjpeg.so.62 COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${jpeg_turbo_LIBPATH}/libturbojpeg.so.0.2.0 DESTINATION ${TURBO_DIR}/lib RENAME libturbojpeg.so.0 COMPONENT ${RUNTIME_COMPONENT_NAME}) - endif () -endif () - -if (BUILD_MINDDATA STREQUAL "lite") - install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") - if (PLATFORM_ARM64) - install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libjpeg.so DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libturbojpeg.so DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) - elseif (PLATFORM_ARM32) - install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libjpeg.so DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libturbojpeg.so DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) - else () - install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libjpeg.so.62.3.0 DESTINATION ${TURBO_DIR}/lib RENAME libjpeg.so.62 COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libturbojpeg.so.0.2.0 DESTINATION ${TURBO_DIR}/lib RENAME libturbojpeg.so.0 COMPONENT ${RUNTIME_COMPONENT_NAME}) - endif () -endif () - -if (BUILD_MINDDATA STREQUAL "lite_cv") - if (PLATFORM_ARM64) - install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") - install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - elseif (PLATFORM_ARM32) - install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") - install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - else () - install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") - install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - endif () -endif () - -if (PLATFORM_ARM64) - if (SUPPORT_NPU) - install(FILES ${DDK_LIB_PATH}/libhiai.so DESTINATION ${RUNTIME_PKG_NAME}/third_party/hiai_ddk/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${DDK_LIB_PATH}/libhiai_ir.so DESTINATION ${RUNTIME_PKG_NAME}/third_party/hiai_ddk/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${DDK_LIB_PATH}/libhiai_ir_build.so DESTINATION ${RUNTIME_PKG_NAME}/third_party/hiai_ddk/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) + else() + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${jpeg_turbo_LIBPATH}/libjpeg.so.62.3.0 DESTINATION ${TURBO_DIR}/lib RENAME libjpeg.so.62 + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${jpeg_turbo_LIBPATH}/libturbojpeg.so.0.2.0 DESTINATION ${TURBO_DIR}/lib RENAME libturbojpeg.so.0 + COMPONENT ${RUNTIME_COMPONENT_NAME}) endif() - if (SUPPORT_TRAIN) - install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") - else () - install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "train*" EXCLUDE) - endif () - install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.a DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${RUNTIME_INC_DIR}/ir/dtype COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/lite/build/schema/model_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/lite/build/schema/ops_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema COMPONENT ${RUNTIME_COMPONENT_NAME}) +endif() + +if(BUILD_MINDDATA STREQUAL "lite") + install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + if(PLATFORM_ARM64) + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libjpeg.so DESTINATION ${TURBO_DIR}/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libturbojpeg.so DESTINATION ${TURBO_DIR}/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + elseif(PLATFORM_ARM32) + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libjpeg.so DESTINATION ${TURBO_DIR}/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libturbojpeg.so DESTINATION ${TURBO_DIR}/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + else() + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libjpeg.so.62.3.0 + DESTINATION ${TURBO_DIR}/lib RENAME libjpeg.so.62 COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/third_party/libjpeg-turbo/lib/libturbojpeg.so.0.2.0 + DESTINATION ${TURBO_DIR}/lib RENAME libturbojpeg.so.0 COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() +endif() + +if(BUILD_MINDDATA STREQUAL "lite_cv") + if(PLATFORM_ARM64) + install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv + DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so + DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + elseif(PLATFORM_ARM32) + install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv + DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + else() + install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv + DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() +endif() + +if(PLATFORM_ARM64) + install(FILES ${TOP_DIR}/mindspore/lite/build/.commit_id DESTINATION ${RUNTIME_PKG_NAME} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(SUPPORT_NPU) + install(FILES ${DDK_LIB_PATH}/libhiai.so DESTINATION ${RUNTIME_PKG_NAME}/third_party/hiai_ddk/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${DDK_LIB_PATH}/libhiai_ir.so DESTINATION ${RUNTIME_PKG_NAME}/third_party/hiai_ddk/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${DDK_LIB_PATH}/libhiai_ir_build.so DESTINATION ${RUNTIME_PKG_NAME}/third_party/hiai_ddk/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() + if(SUPPORT_TRAIN) + install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + else() + install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "train*" EXCLUDE) + endif() + install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${RUNTIME_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.a DESTINATION ${RUNTIME_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${RUNTIME_INC_DIR}/ir/dtype + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/schema/model_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/schema/ops_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema + COMPONENT ${RUNTIME_COMPONENT_NAME}) install(DIRECTORY ${flatbuffers_INC} DESTINATION ${FLATBF_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - if (ENABLE_TOOLS) + if(ENABLE_TOOLS) install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME}) - endif () -elseif (PLATFORM_ARM32) - if (SUPPORT_TRAIN) - install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") - else () - install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "train*" EXCLUDE) - endif () - install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.a DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${RUNTIME_INC_DIR}/ir/dtype COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/lite/build/schema/model_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/lite/build/schema/ops_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() +elseif(PLATFORM_ARM32) + install(FILES ${TOP_DIR}/mindspore/lite/build/.commit_id DESTINATION ${RUNTIME_PKG_NAME} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(SUPPORT_TRAIN) + install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + else() + install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "train*" EXCLUDE) + endif() + install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${RUNTIME_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.a DESTINATION ${RUNTIME_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${RUNTIME_INC_DIR}/ir/dtype + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/schema/model_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/schema/ops_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema + COMPONENT ${RUNTIME_COMPONENT_NAME}) install(DIRECTORY ${flatbuffers_INC} DESTINATION ${FLATBF_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - if (ENABLE_TOOLS) + if(ENABLE_TOOLS) install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME}) - endif () -elseif (WIN32) + endif() +elseif(WIN32) + install(FILES ${TOP_DIR}/build/.commit_id DESTINATION ${RUNTIME_PKG_NAME} + COMPONENT ${RUNTIME_COMPONENT_NAME}) get_filename_component(CXX_DIR ${CMAKE_CXX_COMPILER} PATH) - file(GLOB LIB_LIST ${CXX_DIR}/libstdc++-6.dll ${CXX_DIR}/libwinpthread-1.dll ${CXX_DIR}/libssp-0.dll ${CXX_DIR}/libgcc_s_seh-1.dll) - if (ENABLE_CONVERTER) - install(TARGETS converter_lite RUNTIME DESTINATION ${CONVERTER_PKG_NAME}/converter COMPONENT ${CONVERTER_COMPONENT_NAME}) + file(GLOB LIB_LIST ${CXX_DIR}/libstdc++-6.dll ${CXX_DIR}/libwinpthread-1.dll + ${CXX_DIR}/libssp-0.dll ${CXX_DIR}/libgcc_s_seh-1.dll) + if(ENABLE_CONVERTER) + install(FILES ${TOP_DIR}/build/.commit_id DESTINATION ${CONVERTER_PKG_NAME} + COMPONENT ${CONVERTER_COMPONENT_NAME}) + install(TARGETS converter_lite RUNTIME DESTINATION ${CONVERTER_PKG_NAME}/converter + COMPONENT ${CONVERTER_COMPONENT_NAME}) install(FILES ${LIB_LIST} DESTINATION ${CONVERTER_PKG_NAME}/converter COMPONENT ${CONVERTER_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/build/mindspore/tools/converter/mindspore_core/gvar/libmindspore_gvar.dll DESTINATION ${CONVERTER_PKG_NAME}/converter COMPONENT ${CONVERTER_COMPONENT_NAME}) - install(FILES ${glog_LIBPATH}/../bin/libglog.dll DESTINATION ${CONVERTER_PKG_NAME}/converter COMPONENT ${CONVERTER_COMPONENT_NAME}) - endif () - if (ENABLE_TOOLS) + install(FILES ${TOP_DIR}/build/mindspore/tools/converter/mindspore_core/gvar/libmindspore_gvar.dll + DESTINATION ${CONVERTER_PKG_NAME}/converter COMPONENT ${CONVERTER_COMPONENT_NAME}) + install(FILES ${glog_LIBPATH}/../bin/libglog.dll DESTINATION ${CONVERTER_PKG_NAME}/converter + COMPONENT ${CONVERTER_COMPONENT_NAME}) + endif() + if(ENABLE_TOOLS) install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME}) install(FILES ${LIB_LIST} DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(DIRECTORY ${flatbuffers_INC} DESTINATION ${RUNTIME_PKG_NAME}/third_party/flatbuffers COMPONENT ${RUNTIME_COMPONENT_NAME}) - if (SUPPORT_TRAIN) - install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") - else () - install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "train*" EXCLUDE) - endif () - install(FILES ${TOP_DIR}/build/mindspore/schema/model_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/build/mindspore/schema/ops_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${RUNTIME_INC_DIR}/ir/dtype COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(DIRECTORY ${flatbuffers_INC} DESTINATION ${RUNTIME_PKG_NAME}/third_party/flatbuffers + COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(SUPPORT_TRAIN) + install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + else() + install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "train*" EXCLUDE) + endif() + install(FILES ${TOP_DIR}/build/mindspore/schema/model_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/build/mindspore/schema/ops_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${RUNTIME_INC_DIR}/ir/dtype + COMPONENT ${RUNTIME_COMPONENT_NAME}) set(WIN_LIB_DIR_RUN_X86 ${RUNTIME_PKG_NAME}/benchmark) - install(FILES ${TOP_DIR}/build/mindspore/src/libmindspore-lite.a DESTINATION ${WIN_LIB_DIR_RUN_X86} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/build/mindspore/src/libmindspore-lite.dll.a DESTINATION ${WIN_LIB_DIR_RUN_X86} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/build/mindspore/src/libmindspore-lite.dll DESTINATION ${WIN_LIB_DIR_RUN_X86} COMPONENT ${RUNTIME_COMPONENT_NAME}) - endif () -else () - if (SUPPORT_TRAIN) - install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") - else () - install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "train*" EXCLUDE) - endif () - install(FILES ${TOP_DIR}/mindspore/lite/build/schema/model_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/lite/build/schema/ops_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${RUNTIME_INC_DIR}/ir/dtype COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/build/mindspore/src/libmindspore-lite.a DESTINATION ${WIN_LIB_DIR_RUN_X86} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/build/mindspore/src/libmindspore-lite.dll.a DESTINATION ${WIN_LIB_DIR_RUN_X86} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/build/mindspore/src/libmindspore-lite.dll DESTINATION ${WIN_LIB_DIR_RUN_X86} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() +else() + install(FILES ${TOP_DIR}/mindspore/lite/build/.commit_id DESTINATION ${RUNTIME_PKG_NAME} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(SUPPORT_TRAIN) + install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + else() + install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${RUNTIME_INC_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "train*" EXCLUDE) + endif() + install(FILES ${TOP_DIR}/mindspore/lite/build/schema/model_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/schema/ops_generated.h DESTINATION ${RUNTIME_INC_DIR}/schema + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${RUNTIME_INC_DIR}/ir/dtype + COMPONENT ${RUNTIME_COMPONENT_NAME}) install(DIRECTORY ${flatbuffers_INC} DESTINATION ${FLATBF_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.a DESTINATION ${RUNTIME_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) - if (ENABLE_CONVERTER) - install(TARGETS converter_lite RUNTIME DESTINATION ${CONVERTER_PKG_NAME}/converter COMPONENT ${CONVERTER_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/lite/build/tools/converter/mindspore_core/gvar/libmindspore_gvar.so DESTINATION ${CONVERTER_PKG_NAME}/lib COMPONENT ${CONVERTER_COMPONENT_NAME}) - install(FILES ${glog_LIBPATH}/libglog.so.0.4.0 DESTINATION ${CONVERTER_PKG_NAME}/third_party/glog/lib RENAME libglog.so.0 COMPONENT ${CONVERTER_COMPONENT_NAME}) - endif () - if (ENABLE_TOOLS) + install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${RUNTIME_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.a DESTINATION ${RUNTIME_LIB_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(ENABLE_CONVERTER) + install(FILES ${TOP_DIR}/mindspore/lite/build/.commit_id DESTINATION ${CONVERTER_PKG_NAME} + COMPONENT ${CONVERTER_COMPONENT_NAME}) + install(TARGETS converter_lite RUNTIME DESTINATION ${CONVERTER_PKG_NAME}/converter + COMPONENT ${CONVERTER_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/tools/converter/mindspore_core/gvar/libmindspore_gvar.so + DESTINATION ${CONVERTER_PKG_NAME}/lib COMPONENT ${CONVERTER_COMPONENT_NAME}) + install(FILES ${glog_LIBPATH}/libglog.so.0.4.0 + DESTINATION ${CONVERTER_PKG_NAME}/third_party/glog/lib RENAME libglog.so.0 + COMPONENT ${CONVERTER_COMPONENT_NAME}) + endif() + if(ENABLE_TOOLS) install(TARGETS benchmark RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/benchmark COMPONENT ${RUNTIME_COMPONENT_NAME}) install(TARGETS cropper RUNTIME DESTINATION ${RUNTIME_PKG_NAME}/cropper COMPONENT ${RUNTIME_COMPONENT_NAME}) - install(FILES ${TOP_DIR}/mindspore/lite/build/tools/cropper/cropper_mapping_cpu.cfg DESTINATION ${RUNTIME_PKG_NAME}/cropper COMPONENT ${RUNTIME_COMPONENT_NAME}) - endif () -endif () + install(FILES ${TOP_DIR}/mindspore/lite/build/tools/cropper/cropper_mapping_cpu.cfg + DESTINATION ${RUNTIME_PKG_NAME}/cropper COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() +endif() -if (CMAKE_SYSTEM_NAME MATCHES "Windows") +if(CMAKE_SYSTEM_NAME MATCHES "Windows") set(CPACK_GENERATOR ZIP) -else () +else() set(CPACK_GENERATOR TGZ) -endif () +endif() set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) -if (PLATFORM_ARM64 OR PLATFORM_ARM32) +if(PLATFORM_ARM64 OR PLATFORM_ARM32) set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME}) -else () +else() set(CPACK_COMPONENTS_ALL ${RUNTIME_COMPONENT_NAME} ${CONVERTER_COMPONENT_NAME}) -endif () +endif() set(CPACK_PACKAGE_FILE_NAME ${MAIN_DIR}) -if (WIN32) +if(WIN32) set(CPACK_PACKAGE_DIRECTORY ${TOP_DIR}/output) -else () +else() set(CPACK_PACKAGE_DIRECTORY ${TOP_DIR}/output/tmp) -endif () +endif() set(CPACK_PACKAGE_CHECKSUM SHA256) include(CPack) diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc index 6c7c713516e..ca8189f7edc 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc @@ -47,7 +47,6 @@ std::pair CheckSupportOrCreateParam( LiteKernel *node, bool create_param = false, const std::map &replace_map = {}) { MS_ASSERT(node); - MS_ASSERT(param); PrimitiveType node_type = node->Type(); auto operator_ = static_cast(node_type); auto *op_parameter = reinterpret_cast(node)->GetParameter(); From 06b366fd77a8198bb09a7347734263ae5a49d754 Mon Sep 17 00:00:00 2001 From: ling Date: Thu, 14 Jan 2021 15:20:42 +0800 Subject: [PATCH 02/10] conv fp16 cast delete --- .../src/runtime/kernel/arm/fp16/concat_fp16.h | 4 -- .../kernel/arm/fp16/convolution_1x1_fp16.cc | 13 +------ .../kernel/arm/fp16/convolution_base_fp16.cc | 38 ++----------------- .../kernel/arm/fp16/convolution_base_fp16.h | 4 -- .../arm/fp16/convolution_depthwise_fp16.cc | 10 +---- .../convolution_depthwise_slidewindow_fp16.cc | 12 ++---- .../kernel/arm/fp16/convolution_fp16.cc | 13 ++----- .../arm/fp16/convolution_winograd_fp16.cc | 12 +----- .../arm/fp16/deconvolution_depthwise_fp16.cc | 12 ++---- .../kernel/arm/fp16/deconvolution_fp16.cc | 3 -- .../arm/fp16/deconvolution_winograd_fp16.cc | 3 -- .../src/runtime/kernel/arm/fp16/scale_fp16.cc | 32 +++------------- .../src/runtime/kernel/arm/fp16/scale_fp16.h | 2 - .../src/runtime/kernel/arm/fp16/stack_fp16.cc | 1 - .../src/runtime/kernel/arm/fp16/stack_fp16.h | 2 - 15 files changed, 24 insertions(+), 137 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h index 368646276cd..d05374468a5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h @@ -36,13 +36,9 @@ class ConcatFp16CPUKernel : public LiteKernel { : LiteKernel(parameter, inputs, outputs, ctx, primitive) { concat_param_ = reinterpret_cast(op_parameter_); } - ~ConcatFp16CPUKernel() = default; - int Init() override; - int ReSize() override; - int Run() override; private: diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc index 0f93bdb4f02..d16052dd8ba 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc @@ -207,18 +207,12 @@ static int Convolution1x1Fp16RunHw(void *cdata, int task_id) { } int Convolution1x1FP16CPUKernel::Run() { - auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Get executor tensor failed."; - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); - return ret; - } + ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); pack_input_ = reinterpret_cast( ctx_->allocator->Malloc(matmul_param_->row_16_ * matmul_param_->deep_ * sizeof(float16_t))); if (pack_input_ == nullptr) { MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!"; - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); return RET_MEMORY_FAILED; } @@ -232,6 +226,7 @@ int Convolution1x1FP16CPUKernel::Run() { input_ptr_ = batch_in; } + int ret = RET_ERROR; if (multi_thread_by_hw_) { ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunHw, this, thread_count_); } else { @@ -240,16 +235,12 @@ int Convolution1x1FP16CPUKernel::Run() { } if (ret != RET_OK) { MS_LOG(ERROR) << "ParallelLaunch failed."; - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); ctx_->allocator->Free(pack_input_); pack_input_ = nullptr; return ret; } } - ConvolutionBaseFP16CPUKernel::IfCastOutput(); - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); - ctx_->allocator->Free(pack_input_); pack_input_ = nullptr; return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc index 435aa8d518e..6212d5b8e59 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc @@ -33,19 +33,10 @@ ConvolutionBaseFP16CPUKernel::~ConvolutionBaseFP16CPUKernel() { } int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() { - // ===================input====================// - auto input_tensor = in_tensors_.at(kInputIndex); - in_data_type_ = input_tensor->data_type(); - MS_ASSERT(in_data_type_ == kNumberTypeFloat32 || in_data_type_ == kNumberTypeFloat16); - - execute_input_ = ConvertInputFp32toFp16(input_tensor, context_); - - // ==================output====================// - auto out_tensor = out_tensors_.at(kOutputIndex); - out_data_type_ = out_tensor->data_type(); - MS_ASSERT(out_data_type_ == kNumberTypeFloat32 || out_data_type_ == kNumberTypeFloat16); - - execute_output_ = MallocOutputFp16(out_tensor, context_); + auto input_tensor = in_tensors_.at(0); + auto output_tensor = out_tensors_.at(0); + execute_input_ = reinterpret_cast(input_tensor->data_c()); + execute_output_ = reinterpret_cast(output_tensor->data_c()); return RET_OK; } @@ -78,25 +69,4 @@ int ConvolutionBaseFP16CPUKernel::GetExecuteFilter() { } return RET_OK; } - -void ConvolutionBaseFP16CPUKernel::IfCastOutput() { - if (out_data_type_ == kNumberTypeFloat32) { - auto out_tensor = out_tensors_.at(kOutputIndex); - auto out_ele_num = out_tensor->ElementsNum(); - auto output_addr = reinterpret_cast(out_tensor->MutableData()); - Float16ToFloat32(execute_output_, output_addr, out_ele_num); - } -} - -void ConvolutionBaseFP16CPUKernel::FreeTmpBuffer() { - if (in_data_type_ == kNumberTypeFloat32) { - context_->allocator->Free(execute_input_); - execute_input_ = nullptr; - } - if (out_data_type_ == kNumberTypeFloat32) { - context_->allocator->Free(execute_output_); - execute_output_ = nullptr; - } -} - } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h index 972795cd121..5805f92cec4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h @@ -38,16 +38,12 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel { int RunImpl(int task_id) { return mindspore::lite::RET_OK; } virtual int GetExecuteTensor(); virtual int GetExecuteFilter(); - virtual void IfCastOutput(); - void FreeTmpBuffer(); protected: float16_t *fp16_weight_ = nullptr; float16_t *execute_input_ = nullptr; float16_t *execute_weight_ = nullptr; float16_t *execute_output_ = nullptr; - TypeId in_data_type_; - TypeId out_data_type_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc index 1605dc5c27e..04fa5aa80ae 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc @@ -114,19 +114,13 @@ static int ConvDwFp16Run(void *cdata, int task_id) { } int ConvolutionDepthwiseFp16CPUKernel::Run() { - auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Get Execute tensor failed."; - return ret; - } + ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_); + auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; } - ConvolutionBaseFP16CPUKernel::IfCastOutput(); - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); return ret; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc index 07caca2f2a3..41797496237 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc @@ -149,13 +149,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { return ret; } - ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Get Execute tensor failed."; - FreePackedInputOutput(); - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); - return ret; - } + ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); + if (need_align_) { PackNHWCToNHWC8Fp16(execute_input_, packed_input_, conv_param_->input_batch_, conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); @@ -172,8 +167,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); } - ConvolutionBaseFP16CPUKernel::IfCastOutput(); - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); + FreePackedInputOutput(); return ret; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc index b8f289c63ff..cb039776353 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc @@ -128,17 +128,11 @@ static int ConvolutionFp16Impl(void *cdata, int task_id) { } int ConvolutionFP16CPUKernel::Run() { - auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Get Execute tensor failed."; - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); - return ret; - } + ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - ret = InitTmpBuffer(); + auto ret = InitTmpBuffer(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init tmp buffer failed."; - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); FreeTmpBuffer(); return RET_ERROR; } @@ -147,8 +141,7 @@ int ConvolutionFP16CPUKernel::Run() { if (ret != RET_OK) { MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]"; } - ConvolutionBaseFP16CPUKernel::IfCastOutput(); - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); + FreeTmpBuffer(); return ret; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc index 5706c4c29cc..30a6d6f1330 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc @@ -195,17 +195,11 @@ static int ConvolutionWinogradFp16Impl(void *cdata, int task_id) { } int ConvolutionWinogradFP16CPUKernel::Run() { - auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Get Execute tensor failed."; - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); - return ret; - } + ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - ret = InitTmpBuffer(); + auto ret = InitTmpBuffer(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init tmp buffer failed."; - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); FreeTmpBuffer(); return RET_ERROR; } @@ -215,8 +209,6 @@ int ConvolutionWinogradFP16CPUKernel::Run() { MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; } - ConvolutionBaseFP16CPUKernel::IfCastOutput(); - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); FreeTmpBuffer(); return ret; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc index b9f9fb1012c..9607a9c4e22 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc @@ -162,13 +162,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { return RET_ERROR; } - ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Get Execute tensor failed."; - FreePackedInputOutput(); - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); - return ret; - } + ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); + if (need_align_) { PackNHWCToNHWC8Fp16(execute_input_, packed_input_, conv_param_->input_batch_, conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); @@ -189,8 +184,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); } - ConvolutionBaseFP16CPUKernel::IfCastOutput(); - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); + FreePackedInputOutput(); return ret; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc index 520d1885d9d..c65d64f26b6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc @@ -189,7 +189,6 @@ int DeConvolutionFp16CPUKernel::Run() { int error_code = InitRunBuf(); if (error_code != RET_OK) { MS_LOG(ERROR) << "deconv fp16 InitRunBuf error! error_code[" << error_code << "]"; - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); FreeRunBuf(); return RET_ERROR; } @@ -206,8 +205,6 @@ int DeConvolutionFp16CPUKernel::Run() { } } - ConvolutionBaseFP16CPUKernel::IfCastOutput(); - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); FreeRunBuf(); return error_code; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc index 44e13c23766..dd8faa8f2d2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc @@ -405,9 +405,6 @@ int DeConvWinogradFp16CPUKernel::Run() { ParallelLaunch(this->context_->thread_pool_, DeConvWgPostFp16Run, this, thread_num_hw_); } - ConvolutionBaseFP16CPUKernel::IfCastOutput(); - ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); - return RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc index 74f4d6e01a9..9f86d141554 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc @@ -33,9 +33,6 @@ using mindspore::schema::PrimitiveType_Scale; namespace mindspore::kernel { int ScaleFp16CPUKernel::InitScaleOffset() { - auto input_tensor = in_tensors_.at(0); - malloc_input_ = input_tensor->data_type() == kNumberTypeFloat32; - auto scale_tensor = in_tensors_.at(1); malloc_scale_ = scale_tensor->data_type() == kNumberTypeFloat32; @@ -45,9 +42,6 @@ int ScaleFp16CPUKernel::InitScaleOffset() { auto offset_tensor = in_tensors_.at(2); malloc_offset_ = offset_tensor->data_type() == kNumberTypeFloat32; } - - auto output_tensor = out_tensors_.at(0); - malloc_output_ = output_tensor->data_type() == kNumberTypeFloat32; return RET_OK; } @@ -103,6 +97,11 @@ int ScaleFp16Run(void *cdata, int task_id) { } int ScaleFp16CPUKernel::Run() { + auto input_tensor = in_tensors_.at(0); + auto output_tensor = out_tensors_.at(0); + input_ = reinterpret_cast(input_tensor->MutableData()); + output_ = reinterpret_cast(output_tensor->MutableData()); + auto ret = InitScaleOffset(); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale fp16 InitScaleOffset failed."; @@ -123,20 +122,11 @@ int ScaleFp16CPUKernel::Run() { return RET_ERROR; } - // if output tensor is fp32, we need to transform - if (malloc_output_) { - auto out_tensor = out_tensors_.at(0); - Float16ToFloat32(output_, reinterpret_cast(out_tensor->MutableData()), out_tensor->ElementsNum()); - } FreeTmpBuffer(); return RET_OK; } int ScaleFp16CPUKernel::MallocAssignTmpBuffer() { - input_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_); - if (input_ == nullptr) { - return RET_ERROR; - } scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_); if (scale_ == nullptr) { return RET_ERROR; @@ -155,18 +145,10 @@ int ScaleFp16CPUKernel::MallocAssignTmpBuffer() { } memset(offset_, 0, in_tensors_.at(1)->ElementsNum() * sizeof(float16_t)); } - output_ = MallocOutputFp16(out_tensors_.at(0), context_); - if (output_ == nullptr) { - return RET_ERROR; - } return RET_OK; } void ScaleFp16CPUKernel::FreeTmpBuffer() { - if (malloc_input_ && input_ != nullptr) { - context_->allocator->Free(input_); - input_ = nullptr; - } if (malloc_scale_ && scale_ != nullptr) { context_->allocator->Free(scale_); scale_ = nullptr; @@ -175,10 +157,6 @@ void ScaleFp16CPUKernel::FreeTmpBuffer() { context_->allocator->Free(offset_); offset_ = nullptr; } - if (malloc_output_ && output_ != nullptr) { - context_->allocator->Free(output_); - output_ = nullptr; - } } REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Scale, LiteKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.h index 26da3846d07..a54b95c017a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.h @@ -43,10 +43,8 @@ class ScaleFp16CPUKernel : public ScaleCPUKernel { void FreeTmpBuffer(); private: - bool malloc_input_ = false; bool malloc_scale_ = false; bool malloc_offset_ = false; - bool malloc_output_ = false; float16_t *input_ = nullptr; float16_t *scale_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc index 8f34b522a87..2429b5aa822 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc @@ -29,7 +29,6 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Stack; namespace mindspore::kernel { - int StackFp16CPUKernel::Init() { if (!InferShapeDone()) { return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h index a6a19332f0b..776a87d2406 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h @@ -27,9 +27,7 @@ class StackFp16CPUKernel : public StackCPUKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : StackCPUKernel(parameter, inputs, outputs, ctx, primitive) {} - ~StackFp16CPUKernel() = default; - int Init() override; int Run() override; From 8476fe2c5e238a6cd798beca26f9a7b0d02c3d1b Mon Sep 17 00:00:00 2001 From: chenhaozhe Date: Wed, 13 Jan 2021 14:07:39 +0800 Subject: [PATCH 03/10] Infer SSD with decoder --- model_zoo/official/cv/ssd/README.md | 50 +++++++++---------- model_zoo/official/cv/ssd/eval.py | 5 +- model_zoo/official/cv/ssd/export.py | 4 +- .../cv/ssd/scripts/run_distribute_train.sh | 2 +- .../ssd/scripts/run_distribute_train_gpu.sh | 2 +- model_zoo/official/cv/ssd/src/dataset.py | 8 +-- model_zoo/official/cv/ssd/src/eval_utils.py | 2 - model_zoo/official/cv/ssd/src/ssd.py | 36 +++++++++++++ 8 files changed, 74 insertions(+), 35 deletions(-) diff --git a/model_zoo/official/cv/ssd/README.md b/model_zoo/official/cv/ssd/README.md index bd23cb5dace..889c77e4478 100644 --- a/model_zoo/official/cv/ssd/README.md +++ b/model_zoo/official/cv/ssd/README.md @@ -371,34 +371,34 @@ The ckpt_file parameter is required. #### Evaluation Performance -| Parameters | Ascend | GPU | -| -------------------------- | -------------------------------------------------------------| -------------------------------------------------------------| -| Model Version | SSD V1 | SSD V1 | -| Resource | Ascend 910 ;CPU 2.60GHz,192cores;Memory,755G | NV SMX2 V100-16G | -| uploaded Date | 09/15/2020 (month/day/year) | 09/24/2020 (month/day/year) | -| MindSpore Version | 1.0.0 | 1.0.0 | -| Dataset | COCO2017 | COCO2017 | -| Training Parameters | epoch = 500, batch_size = 32 | epoch = 800, batch_size = 32 | -| Optimizer | Momentum | Momentum | -| Loss Function | Sigmoid Cross Entropy,SmoothL1Loss | Sigmoid Cross Entropy,SmoothL1Loss | -| Speed | 8pcs: 90ms/step | 8pcs: 121ms/step | -| Total time | 8pcs: 4.81hours | 8pcs: 12.31hours | -| Parameters (M) | 34 | 34 | -| Scripts | | | +| Parameters | Ascend | GPU | Ascend | +| ------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | +| Model Version | SSD V1 | SSD V1 | SSD-Mobilenet-V1-Fpn | +| Resource | Ascend 910 ;CPU 2.60GHz,192cores;Memory,755G | NV SMX2 V100-16G | Ascend 910 ;CPU 2.60GHz,192cores;Memory,755G | +| uploaded Date | 09/15/2020 (month/day/year) | 09/24/2020 (month/day/year) | 01/13/2021 (month/day/year) | +| MindSpore Version | 1.0.0 | 1.0.0 | 1.1.0 | +| Dataset | COCO2017 | COCO2017 | COCO2017 | +| Training Parameters | epoch = 500, batch_size = 32 | epoch = 800, batch_size = 32 | epoch = 60, batch_size = 32 | +| Optimizer | Momentum | Momentum | Momentum | +| Loss Function | Sigmoid Cross Entropy,SmoothL1Loss | Sigmoid Cross Entropy,SmoothL1Loss | Sigmoid Cross Entropy,SmoothL1Loss | +| Speed | 8pcs: 90ms/step | 8pcs: 121ms/step | 8pcs: 547ms/step | +| Total time | 8pcs: 4.81hours | 8pcs: 12.31hours | 8pcs: 4.22hours | +| Parameters (M) | 34 | 34 | 48M | +| Scripts | | | | #### Inference Performance -| Parameters | Ascend | GPU | -| ------------------- | ----------------------------| ----------------------------| -| Model Version | SSD V1 | SSD V1 | -| Resource | Ascend 910 | GPU | -| Uploaded Date | 09/15/2020 (month/day/year) | 09/24/2020 (month/day/year) | -| MindSpore Version | 1.0.0 | 1.0.0 | -| Dataset | COCO2017 | COCO2017 | -| batch_size | 1 | 1 | -| outputs | mAP | mAP | -| Accuracy | IoU=0.50: 23.8% | IoU=0.50: 22.4% | -| Model for inference | 34M(.ckpt file) | 34M(.ckpt file) | +| Parameters | Ascend | GPU | Ascend | +| ------------------- | --------------------------- | --------------------------- | --------------------------- | +| Model Version | SSD V1 | SSD V1 | SSD-Mobilenet-V1-Fpn | +| Resource | Ascend 910 | GPU | Ascend 910 | +| Uploaded Date | 09/15/2020 (month/day/year) | 09/24/2020 (month/day/year) | 09/24/2020 (month/day/year) | +| MindSpore Version | 1.0.0 | 1.0.0 | 1.1.0 | +| Dataset | COCO2017 | COCO2017 | COCO2017 | +| batch_size | 1 | 1 | 1 | +| outputs | mAP | mAP | mAP | +| Accuracy | IoU=0.50: 23.8% | IoU=0.50: 22.4% | Iout=0.50: 30% | +| Model for inference | 34M(.ckpt file) | 34M(.ckpt file) | 48M(.ckpt file) | ## [Description of Random Situation](#contents) diff --git a/model_zoo/official/cv/ssd/eval.py b/model_zoo/official/cv/ssd/eval.py index 37e3f1efa29..0e06d08c4f8 100644 --- a/model_zoo/official/cv/ssd/eval.py +++ b/model_zoo/official/cv/ssd/eval.py @@ -21,10 +21,11 @@ import time import numpy as np from mindspore import context, Tensor from mindspore.train.serialization import load_checkpoint, load_param_into_net -from src.ssd import SSD300, ssd_mobilenet_v2, ssd_mobilenet_v1_fpn +from src.ssd import SSD300, SsdInferWithDecoder, ssd_mobilenet_v2, ssd_mobilenet_v1_fpn from src.dataset import create_ssd_dataset, create_mindrecord from src.config import config from src.eval_utils import metrics +from src.box_utils import default_boxes def ssd_eval(dataset_path, ckpt_path, anno_json): """SSD evaluation.""" @@ -35,6 +36,8 @@ def ssd_eval(dataset_path, ckpt_path, anno_json): net = SSD300(ssd_mobilenet_v2(), config, is_training=False) else: net = ssd_mobilenet_v1_fpn(config=config) + net = SsdInferWithDecoder(net, Tensor(default_boxes), config) + print("Load Checkpoint!") param_dict = load_checkpoint(ckpt_path) net.init_parameters_data() diff --git a/model_zoo/official/cv/ssd/export.py b/model_zoo/official/cv/ssd/export.py index cd31c5adc95..9a76420a211 100644 --- a/model_zoo/official/cv/ssd/export.py +++ b/model_zoo/official/cv/ssd/export.py @@ -19,8 +19,9 @@ import numpy as np import mindspore from mindspore import context, Tensor from mindspore.train.serialization import load_checkpoint, load_param_into_net, export -from src.ssd import SSD300, ssd_mobilenet_v2, ssd_mobilenet_v1_fpn +from src.ssd import SSD300, SsdInferWithDecoder, ssd_mobilenet_v2, ssd_mobilenet_v1_fpn from src.config import config +from src.box_utils import default_boxes parser = argparse.ArgumentParser(description='SSD export') parser.add_argument("--device_id", type=int, default=0, help="Device id") @@ -41,6 +42,7 @@ if __name__ == '__main__': net = SSD300(ssd_mobilenet_v2(), config, is_training=False) else: net = ssd_mobilenet_v1_fpn(config=config) + net = SsdInferWithDecoder(net, Tensor(default_boxes), config) param_dict = load_checkpoint(args.ckpt_file) net.init_parameters_data() diff --git a/model_zoo/official/cv/ssd/scripts/run_distribute_train.sh b/model_zoo/official/cv/ssd/scripts/run_distribute_train.sh index 73c0613ded0..f7a36945483 100644 --- a/model_zoo/official/cv/ssd/scripts/run_distribute_train.sh +++ b/model_zoo/official/cv/ssd/scripts/run_distribute_train.sh @@ -31,7 +31,7 @@ fi # Before start distribute train, first create mindrecord files. BASE_PATH=$(cd "`dirname $0`" || exit; pwd) cd $BASE_PATH/../ || exit -python train.py --only_create_dataset=True +python train.py --only_create_dataset=True --dataset=$4 echo "After running the scipt, the network runs in the background. The log will be generated in LOGx/log.txt" diff --git a/model_zoo/official/cv/ssd/scripts/run_distribute_train_gpu.sh b/model_zoo/official/cv/ssd/scripts/run_distribute_train_gpu.sh index 5f27a22d323..9277e3de698 100644 --- a/model_zoo/official/cv/ssd/scripts/run_distribute_train_gpu.sh +++ b/model_zoo/official/cv/ssd/scripts/run_distribute_train_gpu.sh @@ -31,7 +31,7 @@ fi # Before start distribute train, first create mindrecord files. BASE_PATH=$(cd "`dirname $0`" || exit; pwd) cd $BASE_PATH/../ || exit -python train.py --only_create_dataset=True --run_platform="GPU" +python train.py --only_create_dataset=True --run_platform="GPU" --dataset=$4 echo "After running the scipt, the network runs in the background. The log will be generated in LOG/log.txt" diff --git a/model_zoo/official/cv/ssd/src/dataset.py b/model_zoo/official/cv/ssd/src/dataset.py index 738bb397847..d9b8cbf2ee2 100644 --- a/model_zoo/official/cv/ssd/src/dataset.py +++ b/model_zoo/official/cv/ssd/src/dataset.py @@ -207,10 +207,10 @@ def create_voc_label(is_training): print(f'Label "{cls_name}" not in "{config.classes}"') continue bnd_box = obj.find('bndbox') - x_min = int(bnd_box.find('xmin').text) - 1 - y_min = int(bnd_box.find('ymin').text) - 1 - x_max = int(bnd_box.find('xmax').text) - 1 - y_max = int(bnd_box.find('ymax').text) - 1 + x_min = int(float(bnd_box.find('xmin').text)) - 1 + y_min = int(float(bnd_box.find('ymin').text)) - 1 + x_max = int(float(bnd_box.find('xmax').text)) - 1 + y_max = int(float(bnd_box.find('ymax').text)) - 1 labels.append([y_min, x_min, y_max, x_max, cls_map[cls_name]]) if not is_training: diff --git a/model_zoo/official/cv/ssd/src/eval_utils.py b/model_zoo/official/cv/ssd/src/eval_utils.py index 180069d185d..e8e01b32c00 100644 --- a/model_zoo/official/cv/ssd/src/eval_utils.py +++ b/model_zoo/official/cv/ssd/src/eval_utils.py @@ -17,7 +17,6 @@ import json import numpy as np from .config import config -from .box_utils import ssd_bboxes_decode def apply_nms(all_boxes, all_scores, thres, max_boxes): @@ -81,7 +80,6 @@ def metrics(pred_data, anno_json): img_id = sample['img_id'] h, w = sample['image_shape'] - pred_boxes = ssd_bboxes_decode(pred_boxes) final_boxes = [] final_label = [] final_score = [] diff --git a/model_zoo/official/cv/ssd/src/ssd.py b/model_zoo/official/cv/ssd/src/ssd.py index d9e4b82ddf1..ebaa9f69482 100644 --- a/model_zoo/official/cv/ssd/src/ssd.py +++ b/model_zoo/official/cv/ssd/src/ssd.py @@ -569,6 +569,42 @@ class SSDWithMobileNetV2(nn.Cell): return self.last_channel +class SsdInferWithDecoder(nn.Cell): + """ + SSD Infer wrapper to decode the bbox locations. + + Args: + network (Cell): the origin ssd infer network without bbox decoder. + default_boxes (Tensor): the default_boxes from anchor generator + config (dict): ssd config + Returns: + Tensor, the locations for bbox after decoder representing (y0,x0,y1,x1) + Tensor, the prediction labels. + + """ + def __init__(self, network, default_boxes, config): + super(SsdInferWithDecoder, self).__init__() + self.network = network + self.default_boxes = default_boxes + self.prior_scaling_xy = config.prior_scaling[0] + self.prior_scaling_wh = config.prior_scaling[1] + + def construct(self, x): + pred_loc, pred_label = self.network(x) + + default_bbox_xy = self.default_boxes[..., :2] + default_bbox_wh = self.default_boxes[..., 2:] + pred_xy = pred_loc[..., :2] * self.prior_scaling_xy * default_bbox_wh + default_bbox_xy + pred_wh = P.Exp()(pred_loc[..., 2:] * self.prior_scaling_wh) * default_bbox_wh + + pred_xy_0 = pred_xy - pred_wh / 2.0 + pred_xy_1 = pred_xy + pred_wh / 2.0 + pred_xy = P.Concat(-1)((pred_xy_0, pred_xy_1)) + pred_xy = P.Maximum()(pred_xy, 0) + pred_xy = P.Minimum()(pred_xy, 1) + return pred_xy, pred_label + + def ssd_mobilenet_v1_fpn(**kwargs): return SsdMobilenetV1Fpn(**kwargs) From 289f85695508511b26756d0902986c05718031c9 Mon Sep 17 00:00:00 2001 From: zhaoting Date: Fri, 15 Jan 2021 11:22:42 +0800 Subject: [PATCH 04/10] fix README link --- model_zoo/official/cv/centerface/README.md | 2 +- model_zoo/official/cv/deeptext/README.md | 2 +- model_zoo/official/cv/psenet/README.md | 2 +- model_zoo/official/cv/psenet/README_CN.md | 2 +- model_zoo/official/nlp/fasttext/README.md | 2 +- model_zoo/official/nlp/gnmt_v2/README.md | 2 +- model_zoo/official/nlp/mass/README_CN.md | 2 +- model_zoo/official/nlp/prophetnet/README.md | 2 +- model_zoo/research/audio/fcn-4/README.md | 2 +- model_zoo/research/cv/centernet/README.md | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/model_zoo/official/cv/centerface/README.md b/model_zoo/official/cv/centerface/README.md index 0ad5cd6028e..7650f574097 100644 --- a/model_zoo/official/cv/centerface/README.md +++ b/model_zoo/official/cv/centerface/README.md @@ -84,7 +84,7 @@ other datasets need to use the same format as WiderFace. - Hardware(Ascend) - Prepare hardware environment with Ascend processor. If you want to try Ascend, please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources. - Framework - - [MindSpore](https://cmc-szv.clouddragon.huawei.com/cmcversion/index/search?searchKey=Do-MindSpore%20V100R001C00B622) + - [MindSpore](https://www.mindspore.cn/install/en) - For more information, please check the resources below: - [MindSpore tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html) - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html) diff --git a/model_zoo/official/cv/deeptext/README.md b/model_zoo/official/cv/deeptext/README.md index 29496af748c..d5fa94c80ea 100644 --- a/model_zoo/official/cv/deeptext/README.md +++ b/model_zoo/official/cv/deeptext/README.md @@ -188,7 +188,7 @@ class 1 precision is 88.01%, recall is 82.77% | Loss | ~0.008 | | Accuracy (8p) | precision=0.8854, recall=0.8024 | | Total time (8p) | 4h | -| Scripts | [deeptext script](https://gitee.com/mindspore/mindspore/tree/r1.1/mindspore/official/cv/deeptext) | +| Scripts | [deeptext script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/deeptext) | #### Inference Performance diff --git a/model_zoo/official/cv/psenet/README.md b/model_zoo/official/cv/psenet/README.md index c2a2e17fd54..5b473f888ad 100644 --- a/model_zoo/official/cv/psenet/README.md +++ b/model_zoo/official/cv/psenet/README.md @@ -197,7 +197,7 @@ Calculated!{"precision": 0.814796668299853, "recall": 0.8006740491092923, "hmean | Total time | 1pc: 75.48 h; 8pcs: 10.01 h | | Parameters (M) | 27.36 | | Checkpoint for Fine tuning | 109.44M (.ckpt file) | -| Scripts | | +| Scripts | | ### Inference Performance diff --git a/model_zoo/official/cv/psenet/README_CN.md b/model_zoo/official/cv/psenet/README_CN.md index 20e4dce55b1..9e622fef963 100644 --- a/model_zoo/official/cv/psenet/README_CN.md +++ b/model_zoo/official/cv/psenet/README_CN.md @@ -195,7 +195,7 @@ Calculated!{"precision": 0.8147966668299853,"recall":0.8006740491092923,"h | 总时间 | 1卡:75.48小时;4卡:18.87小时| | 参数(M) | 27.36 | | 微调检查点 | 109.44M (.ckpt file) | -| 脚本 | | +| 脚本 | | ### 推理性能 diff --git a/model_zoo/official/nlp/fasttext/README.md b/model_zoo/official/nlp/fasttext/README.md index 682dedc3497..276ac4c99d0 100644 --- a/model_zoo/official/nlp/fasttext/README.md +++ b/model_zoo/official/nlp/fasttext/README.md @@ -1,4 +1,4 @@ -![](https://www.mindspore.cn/static/img/logo.a3e472c9.png) +![](https://www.mindspore.cn/static/img/logo_black.6a5c850d.png) diff --git a/model_zoo/official/nlp/gnmt_v2/README.md b/model_zoo/official/nlp/gnmt_v2/README.md index 618ab203ba6..89a6ef9d2dc 100644 --- a/model_zoo/official/nlp/gnmt_v2/README.md +++ b/model_zoo/official/nlp/gnmt_v2/README.md @@ -1,4 +1,4 @@ -![](https://www.mindspore.cn/static/img/logo.a3e472c9.png) +![](https://www.mindspore.cn/static/img/logo_black.6a5c850d.png) diff --git a/model_zoo/official/nlp/mass/README_CN.md b/model_zoo/official/nlp/mass/README_CN.md index e4e7e8ffd45..a07f812b44f 100644 --- a/model_zoo/official/nlp/mass/README_CN.md +++ b/model_zoo/official/nlp/mass/README_CN.md @@ -47,7 +47,7 @@ BERT(Devlin等人,2018年)采用有屏蔽的语料丰富文本预训练Tra 受BERT、GPT及其他语言模型的启发,微软致力于在此基础上研究[掩式序列到序列(MASS)预训练语言生成](https://www.microsoft.com/en-us/research/uploads/prod/2019/06/MASS-paper-updated-002.pdf)。MASS的参数k很重要,用来控制屏蔽后的分片长度。BERT和GPT属于特例,k等于1或者句长。 -[MASS介绍 — 序列对序列语言生成任务中性能优于BERT和GPT的预训练方法](https://www.microsoft.com/en-us/research/blog/introduction-mass-a-pre-training-method-thing-forts-bert-and-gpt-in-sequence-to-sequence-language-generate-tasks/) +[MASS介绍 — 序列对序列语言生成任务中性能优于BERT和GPT的预训练方法](https://www.microsoft.com/en-us/research/blog/introducing-mass-a-pre-training-method-that-outperforms-bert-and-gpt-in-sequence-to-sequence-language-generation-tasks/) [论文](https://www.microsoft.com/en-us/research/uploads/prod/2019/06/MASS-paper-updated-002.pdf): Song, Kaitao, Xu Tan, Tao Qin, Jianfeng Lu and Tie-Yan Liu.“MASS: Masked Sequence to Sequence Pre-training for Language Generation.”ICML (2019). diff --git a/model_zoo/official/nlp/prophetnet/README.md b/model_zoo/official/nlp/prophetnet/README.md index 141b008584f..b4f2a3c7420 100644 --- a/model_zoo/official/nlp/prophetnet/README.md +++ b/model_zoo/official/nlp/prophetnet/README.md @@ -655,4 +655,4 @@ The model has been validated on Ascend environment, not validated on CPU and GPU # ModelZoo Homepage - [Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo) + [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo) diff --git a/model_zoo/research/audio/fcn-4/README.md b/model_zoo/research/audio/fcn-4/README.md index dfd03595995..5d0fe4ddffa 100644 --- a/model_zoo/research/audio/fcn-4/README.md +++ b/model_zoo/research/audio/fcn-4/README.md @@ -192,7 +192,7 @@ Parameters for both training and evaluation can be set in config.py | Speed | 1pc: 160 samples/sec; | | Total time | 1pc: 20 mins; | | Checkpoint for Fine tuning | 198.73M(.ckpt file) | -| Scripts | [music_auto_tagging script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/audio/fcn-4) | +| Scripts | [music_auto_tagging script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/audio/fcn-4) | ## [ModelZoo Homepage](#contents) diff --git a/model_zoo/research/cv/centernet/README.md b/model_zoo/research/cv/centernet/README.md index ef31c84f0a9..5ff91b0b222 100644 --- a/model_zoo/research/cv/centernet/README.md +++ b/model_zoo/research/cv/centernet/README.md @@ -79,7 +79,7 @@ Dataset used: [COCO2017](https://cocodataset.org/) - Hardware(Ascend) - Prepare hardware environment with Ascend processor. If you want to try Ascend, please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources. - Framework - - [MindSpore](https://cmc-szv.clouddragon.huawei.com/cmcversion/index/search?searchKey=Do-MindSpore%20V100R001C00B622) + - [MindSpore](https://www.mindspore.cn/install/en) - For more information, please check the resources below: - [MindSpore tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html) - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html) From 3a6d63ab02129053b347b1f92834863b80b678e2 Mon Sep 17 00:00:00 2001 From: zhaoting Date: Tue, 5 Jan 2021 17:32:57 +0800 Subject: [PATCH 05/10] add some CPU operators --- .../cpu/arithmetic_self_cpu_kernel.cc | 14 ++ .../cpu/arithmetic_self_cpu_kernel.h | 2 + .../backend/kernel_compiler/cpu/cpu_kernel.h | 2 + .../cpu/eltwise_grad_cpu_kernel.cc | 16 ++ .../cpu/eltwise_grad_cpu_kernel.h | 9 + .../cpu/layer_norm_cpu_kernel.cc | 105 +++++++++ .../cpu/layer_norm_cpu_kernel.h | 70 ++++++ .../cpu/layer_norm_grad_cpu_kernel.cc | 124 ++++++++++ .../cpu/layer_norm_grad_cpu_kernel.h | 76 ++++++ tests/st/ops/cpu/test_gelu_grad_op.py | 63 +++++ tests/st/ops/cpu/test_gelu_op.py | 93 ++++++++ tests/st/ops/cpu/test_layer_norm_grad_op.py | 221 ++++++++++++++++++ tests/st/ops/cpu/test_layer_norm_op.py | 199 ++++++++++++++++ 13 files changed, 994 insertions(+) create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.h create mode 100644 tests/st/ops/cpu/test_gelu_grad_op.py create mode 100644 tests/st/ops/cpu/test_gelu_op.py create mode 100644 tests/st/ops/cpu/test_layer_norm_grad_op.py create mode 100644 tests/st/ops/cpu/test_layer_norm_op.py diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc index 883cc7ff221..f7b4ad7bb6c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc @@ -76,6 +76,16 @@ void Reciprocal(const T *in, T *out, size_t start, size_t end) { out[i] = static_cast(1.0 / in[i]); } } + +template +void Gelu(const T *in, T *out, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + T x = in[i]; + auto double_x = static_cast(x); + T tanh_res = (T)std::tanh(0.7978845608 * (double_x + 0.044715 * double_x * double_x * double_x)); + out[i] = x * ((T)1.0 + tanh_res) / (T)2.0; + } +} } // namespace void ArithmeticSelfCPUKernel::InitKernel(const CNodePtr &kernel_node) { @@ -95,6 +105,8 @@ void ArithmeticSelfCPUKernel::InitKernel(const CNodePtr &kernel_node) { operate_type_ = FLOOR; } else if (kernel_name == prim::kPrimReciprocal->name()) { operate_type_ = RECIPROCAL; + } else if (kernel_name == prim::kPrimGelu->name()) { + operate_type_ = GELU; } dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); } @@ -150,6 +162,8 @@ void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector &inputs threads.emplace_back(std::thread(Floor, input, output, start, end)); } else if (operate_type_ == RECIPROCAL) { threads.emplace_back(std::thread(Reciprocal, input, output, start, end)); + } else if (operate_type_ == GELU) { + threads.emplace_back(std::thread(Gelu, input, output, start, end)); } start += once_compute_size; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h index 51f88f41036..db7c99c90b6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h @@ -62,6 +62,8 @@ MS_REG_CPU_KERNEL(Floor, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutput ArithmeticSelfCPUKernel); MS_REG_CPU_KERNEL(Reciprocal, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), ArithmeticSelfCPUKernel); +MS_REG_CPU_KERNEL(Gelu, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ArithmeticSelfCPUKernel); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h index b097e6f40c6..f636ab84b84 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h @@ -88,6 +88,8 @@ enum OperateType { GREATER, GREATEREQUAL, RECIPROCAL, + GELU, + GELUGRAD, }; class CPUKernel : public kernel::KernelMod { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc index a8e53d93807..ee7a47c91a5 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc @@ -78,6 +78,18 @@ void EltWiseGradCPUKernel::TanhGrad(const T *input1, const T *input2, T *out, si } } +template +void EltWiseGradCPUKernel::GeluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + T x = input2[i]; + auto double_x = static_cast(x); + T tanh_res = (T)std::tanh(0.7978845608 * (double_x + 0.044715 * double_x * double_x * double_x)); + T mul_right = (T)(0.7978845608 + 0.1070322244 * double_x * double_x); + T y_res = (((T)1.0 + tanh_res) + x * ((T)1.0 - tanh_res * tanh_res) * mul_right) / (T)2.0; + out[i] = input1[i] * y_res; + } +} + void EltWiseGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); @@ -93,6 +105,8 @@ void EltWiseGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { operate_type_ = TANHGRAD; } else if (kernel_name == "SqrtGrad") { operate_type_ = SQRTGRAD; + } else if (kernel_name == "GeluGrad") { + operate_type_ = GELUGRAD; } else { MS_LOG(EXCEPTION) << "Not support " << kernel_name; } @@ -172,6 +186,8 @@ void EltWiseGradCPUKernel::LaunchKernel(const std::vector &inputs, c threads.emplace_back(std::thread(&EltWiseGradCPUKernel::TanhGrad, this, input1, input2, output, start, end)); } else if (operate_type_ == SQRTGRAD) { threads.emplace_back(std::thread(&EltWiseGradCPUKernel::SqrtGrad, this, input1, input2, output, start, end)); + } else if (operate_type_ == GELUGRAD) { + threads.emplace_back(std::thread(&EltWiseGradCPUKernel::GeluGrad, this, input1, input2, output, start, end)); } else { MS_LOG(EXCEPTION) << "Not support " << operate_type_; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h index dcfe0050daf..b67c632654b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h @@ -47,6 +47,8 @@ class EltWiseGradCPUKernel : public CPUKernel { void SqrtGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); template void TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); + template + void GeluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); std::vector input_shape0_; std::vector input_shape1_; std::vector input_element_num0_; @@ -81,6 +83,13 @@ MS_REG_CPU_KERNEL( TanhGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), EltWiseGradCPUKernel); +MS_REG_CPU_KERNEL(GeluGrad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + EltWiseGradCPUKernel); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.cc new file mode 100644 index 00000000000..353ee5d4bd1 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.cc @@ -0,0 +1,105 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/cpu/layer_norm_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +void LayerNormCPUKernel::InitKernel(const CNodePtr &kernel_node) { + CheckParam(kernel_node); + dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); + std::vector x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto begin_norm_axis = AnfAlgo::GetNodeAttr(kernel_node, "begin_norm_axis"); + auto begin_params_axis = AnfAlgo::GetNodeAttr(kernel_node, "begin_params_axis"); + if (begin_norm_axis < 0) { + begin_norm_axis += x_shape.size(); + } + if (begin_params_axis < 0) { + begin_params_axis += x_shape.size(); + } + for (size_t i = 0; i < IntToSize(begin_norm_axis); i++) { + block_num_ *= x_shape[i]; + } + for (size_t i = IntToSize(begin_norm_axis); i < x_shape.size(); i++) { + block_size_ *= x_shape[i]; + } + for (size_t i = IntToSize(begin_params_axis); i < x_shape.size(); i++) { + param_num_ *= x_shape[i]; + } + if (block_num_ <= 0 || block_size_ <= 0) { + MS_LOG(EXCEPTION) << "LayerNormCPUKernel input shape error, input shape: " << x_shape; + } +} + +bool LayerNormCPUKernel::Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) { + if (dtype_ == kNumberTypeFloat16) { + LaunchKernel(inputs, outputs); + } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) { + LaunchKernel(inputs, outputs); + } else { + MS_LOG(EXCEPTION) << "input dtype only support float16, float32, float64"; + } + return true; +} + +template +void LayerNormCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { + size_t f_size = sizeof(T); + if (inputs[1]->size != f_size * param_num_ || inputs[2]->size != f_size * param_num_) { + MS_LOG(EXCEPTION) << "The product of gamma and beta's shape must be " << param_num_; + } + if (outputs[1]->size != f_size * block_num_ || outputs[2]->size != f_size * block_num_) { + MS_LOG(EXCEPTION) << "The product of mean and var's shape must be " << block_num_; + } + auto x = reinterpret_cast(inputs[0]->addr); + auto gamma = reinterpret_cast(inputs[1]->addr); + auto beta = reinterpret_cast(inputs[2]->addr); + auto y = reinterpret_cast(outputs[0]->addr); + auto mean = reinterpret_cast(outputs[1]->addr); + auto var = reinterpret_cast(outputs[2]->addr); + for (size_t i = 0; i < block_num_; ++i) { + T sum = (T)0.0; + T square_sum = (T)0.0; + for (size_t j = i * block_size_; j < (i + 1) * block_size_; ++j) { + sum += x[j]; + square_sum += x[j] * x[j]; + } + T block_mean = sum / block_size_; + T block_var = square_sum / block_size_ - block_mean * block_mean; + for (size_t j = i * block_size_; j < (i + 1) * block_size_; ++j) { + auto param_shift = j % param_num_; + y[j] = (x[j] - block_mean) / (T)std::sqrt(static_cast(block_var) + eps_) * gamma[param_shift] + + beta[param_shift]; + } + mean[i] = block_mean; + var[i] = block_var; + } +} + +void LayerNormCPUKernel::CheckParam(const CNodePtr &kernel_node) { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 3) { + MS_LOG(EXCEPTION) << "LayerNormCPUKernel needs 3 inputs, but gets " << input_num; + } + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + if (output_num != 3) { + MS_LOG(EXCEPTION) << "LayerNormCPUKernel expects 3 output, but gets" << output_num; + } +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.h new file mode 100644 index 00000000000..b5786ad76df --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_cpu_kernel.h @@ -0,0 +1,70 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_CPU_KERNEL_H_ +#include +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class LayerNormCPUKernel : public CPUKernel { + public: + LayerNormCPUKernel() = default; + ~LayerNormCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + template + void LaunchKernel(const std::vector &inputs, const std::vector &outputs); + + private: + void CheckParam(const CNodePtr &kernel_node); + TypeId dtype_{kTypeUnknown}; + float eps_{1e-12}; + size_t block_num_{1}; + size_t block_size_{1}; + size_t param_num_{1}; +}; + +MS_REG_CPU_KERNEL(LayerNorm, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + LayerNormCPUKernel); + +MS_REG_CPU_KERNEL(LayerNorm, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LayerNormCPUKernel); +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.cc new file mode 100644 index 00000000000..63cefe0ab92 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.cc @@ -0,0 +1,124 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +void LayerNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { + CheckParam(kernel_node); + dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); + std::vector x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto begin_norm_axis = AnfAlgo::GetNodeAttr(kernel_node, "begin_norm_axis"); + auto begin_params_axis = AnfAlgo::GetNodeAttr(kernel_node, "begin_params_axis"); + if (begin_norm_axis < 0) { + begin_norm_axis += x_shape.size(); + } + if (begin_params_axis < 0) { + begin_params_axis += x_shape.size(); + } + for (size_t i = 0; i < IntToSize(begin_norm_axis); i++) { + block_num_ *= x_shape[i]; + } + for (size_t i = IntToSize(begin_norm_axis); i < x_shape.size(); i++) { + block_size_ *= x_shape[i]; + } + for (size_t i = 0; i < IntToSize(begin_params_axis); i++) { + param_size_ *= x_shape[i]; + } + for (size_t i = begin_params_axis; i < x_shape.size(); i++) { + param_num_ *= x_shape[i]; + } + if (block_num_ <= 0 || block_size_ <= 0) { + MS_LOG(EXCEPTION) << "LayerNormGradCPUKernel input shape error, input shape: " << x_shape; + } +} + +bool LayerNormGradCPUKernel::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs) { + if (dtype_ == kNumberTypeFloat16) { + LaunchKernel(inputs, workspace, outputs); + } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) { + LaunchKernel(inputs, workspace, outputs); + } else { + MS_LOG(EXCEPTION) << "input dtype only support float16, float32, float64"; + } + return true; +} + +template +void LayerNormGradCPUKernel::LaunchKernel(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs) { + auto x = reinterpret_cast(inputs[0]->addr); + auto dy = reinterpret_cast(inputs[1]->addr); + auto var = reinterpret_cast(inputs[2]->addr); + auto mean = reinterpret_cast(inputs[3]->addr); + auto gamma = reinterpret_cast(inputs[4]->addr); + auto dx = reinterpret_cast(outputs[0]->addr); + auto dg = reinterpret_cast(outputs[1]->addr); + auto db = reinterpret_cast(outputs[2]->addr); + + for (size_t i = 0; i < param_num_; ++i) { + T dgamma = (T)0.0; + T dbeta = (T)0.0; + for (size_t j = i; j < param_size_ * param_num_; j += param_num_) { + auto norm_shift = static_cast(j / block_size_); + dgamma += dy[j] * (T)std::pow(static_cast(var[norm_shift]) + eps_, -0.5) * (x[j] - mean[norm_shift]); + dbeta += dy[j]; + } + dg[i] = dgamma; + db[i] = dbeta; + } + for (size_t i = 0; i < block_num_; ++i) { + T sum1 = (T)0.0; + T sum2 = (T)0.0; + T sum3 = (T)0.0; + for (size_t j = i * block_size_; j < (i + 1) * block_size_; ++j) { + auto param_shift = j % param_num_; + auto norm_shift = static_cast(j / block_size_); + auto dxm = x[j] - mean[norm_shift]; + auto dyg = dy[j] * gamma[param_shift]; + sum1 += (T)(-0.5) * dyg * dxm * (T)std::pow(static_cast(var[norm_shift]) + eps_, -1.5); + sum2 += dyg; + sum3 += (T)(-2.0) * dxm; + } + for (size_t j = i * block_size_; j < (i + 1) * block_size_; ++j) { + auto param_shift = j % param_num_; + auto norm_shift = static_cast(j / block_size_); + auto var_sqrt = (T)std::pow(static_cast(var[norm_shift]) + eps_, -0.5); + auto dx1 = dy[j] * gamma[param_shift] * var_sqrt; + auto dx2 = sum1 * (T)2.0 / block_size_ * (x[j] - mean[norm_shift]); + auto dx3 = ((T)(-1.0) * var_sqrt * sum2 + ((T)1.0 / block_size_) * sum1 * sum3) * ((T)1.0 / block_size_); + dx[j] = dx1 + dx2 + dx3; + } + } +} + +void LayerNormGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 5) { + MS_LOG(EXCEPTION) << "LayerNormGradCPUKernel needs 5 inputs, but gets " << input_num; + } + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + if (output_num != 3) { + MS_LOG(EXCEPTION) << "LayerNormGradCPUKernel expects 3 output, but gets" << output_num; + } +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.h new file mode 100644 index 00000000000..afd9a17369a --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.h @@ -0,0 +1,76 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_GRAD_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_GRAD_CPU_KERNEL_H_ +#include +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class LayerNormGradCPUKernel : public CPUKernel { + public: + LayerNormGradCPUKernel() = default; + ~LayerNormGradCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + template + void LaunchKernel(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs); + + private: + void CheckParam(const CNodePtr &kernel_node); + TypeId dtype_{kTypeUnknown}; + float eps_{1e-12}; + size_t block_num_{1}; + size_t block_size_{1}; + size_t param_num_{1}; + size_t param_size_{1}; +}; + +MS_REG_CPU_KERNEL(LayerNormGrad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + LayerNormGradCPUKernel); + +MS_REG_CPU_KERNEL(LayerNormGrad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LayerNormGradCPUKernel); +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_LAYER_NORM_GRAD_CPU_KERNEL_H_ diff --git a/tests/st/ops/cpu/test_gelu_grad_op.py b/tests/st/ops/cpu/test_gelu_grad_op.py new file mode 100644 index 00000000000..e54d80131cf --- /dev/null +++ b/tests/st/ops/cpu/test_gelu_grad_op.py @@ -0,0 +1,63 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import composite as C +from mindspore.ops import operations as P + +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + +class GeluNet(nn.Cell): + def __init__(self): + super(GeluNet, self).__init__() + self.gelu = P.Gelu() + + def construct(self, x): + return self.gelu(x) + + +class Grad(nn.Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.grad = C.GradOperation(get_all=True, sens_param=True) + self.network = network + + def construct(self, input_data, sens): + gout = self.grad(self.network)(input_data, sens) + return gout + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_gelugrad(): + x_ms = Tensor(np.array([0.58401114, 0.68800163, 0.9760397, 0.14702141, 0.46563736, 0.9607501, + 0.14567593, 0.12261796, 0.37054458, 0.46421242]).astype(np.float32)) + dy_ms = Tensor(np.array([0.5559598, 0.96994054, 0.24770357, 0.34646875, 0.2984393, 0.03287048, + 0.55681044, 0.966908, 0.06015943, 0.6099489]).astype(np.float32)) + + net = GeluNet() + grad = Grad(net) + + output = grad(x_ms, dy_ms) + expect = [0.50963277, 0.9414753, 0.2667653, 0.21358444, 0.25243032, 0.0352667, + 0.34266686, 0.57757664, 0.04707306, 0.51536125] + assert np.allclose(output[0].asnumpy(), expect) diff --git a/tests/st/ops/cpu/test_gelu_op.py b/tests/st/ops/cpu/test_gelu_op.py new file mode 100644 index 00000000000..3ac9e6f01fa --- /dev/null +++ b/tests/st/ops/cpu/test_gelu_op.py @@ -0,0 +1,93 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + +class GeluNet(nn.Cell): + def __init__(self): + super(GeluNet, self).__init__() + self.gelu = P.Gelu() + + def construct(self, x): + return self.gelu(x) + + +def GeluCompute(x): + return 0.5 * x * (1.0 + np.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * x * x * x))) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_gelu_1d(): + x_np = np.random.random((50,)).astype(np.float32) + y_np = GeluCompute(x_np) + + x_ms = Tensor(x_np) + net = GeluNet() + y_ms = net(x_ms) + + assert np.allclose(y_np, y_ms.asnumpy()) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_gelu_2d(): + x_np = np.random.random((50, 40)).astype(np.float32) + y_np = GeluCompute(x_np) + + x_ms = Tensor(x_np) + net = GeluNet() + y_ms = net(x_ms) + + assert np.allclose(y_np, y_ms.asnumpy()) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_gelu_4d(): + x_np = np.random.random((32, 3, 224, 224)).astype(np.float32) + y_np = GeluCompute(x_np) + + x_ms = Tensor(x_np) + net = GeluNet() + y_ms = net(x_ms) + + assert np.allclose(y_np, y_ms.asnumpy()) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_gelu_neg(): + x_np = np.random.random((32, 3, 224, 224)).astype(np.float32) * -1 + y_np = GeluCompute(x_np) + + x_ms = Tensor(x_np) + net = GeluNet() + y_ms = net(x_ms) + + assert np.allclose(y_np, y_ms.asnumpy()) diff --git a/tests/st/ops/cpu/test_layer_norm_grad_op.py b/tests/st/ops/cpu/test_layer_norm_grad_op.py new file mode 100644 index 00000000000..a4ff7e274fb --- /dev/null +++ b/tests/st/ops/cpu/test_layer_norm_grad_op.py @@ -0,0 +1,221 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops.operations import _grad_ops as G + +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + +class LayerNormGradNet(nn.Cell): + def __init__(self, begin_norm_axis, begin_params_axis): + super(LayerNormGradNet, self).__init__() + self.norm = G.LayerNormGrad(begin_norm_axis, begin_params_axis) + + def construct(self, dy, x, var, mean, gamma): + return self.norm(dy, x, var, mean, gamma) + + +def LayerNormGradReference(x, dy, gamma, epsilon, begin_norm_axis, begin_params_axis): + begin_norm_axis = begin_norm_axis if begin_norm_axis >= 0 else begin_norm_axis + len(x.shape) + begin_params_axis = begin_params_axis if begin_params_axis >= 0 else begin_params_axis + len(x.shape) + + norm_axis = [i for i in range(begin_norm_axis, len(x.shape))] + param_axis = [i for i in range(0, begin_params_axis)] + num = 1 + for i in range(begin_norm_axis, len(x.shape)): + num *= x.shape[i] + + mean = np.mean(x, axis=tuple(norm_axis), keepdims=True) + var = np.var(x, axis=tuple(norm_axis), keepdims=True) + + gamma = gamma.reshape((*((1,) * begin_params_axis), *x.shape[begin_params_axis:])) + dg = np.sum(dy * np.power(var + epsilon, -0.5) * (x - mean), axis=tuple(param_axis), keepdims=True) + db = np.sum(dy, axis=tuple(param_axis), keepdims=True) + + sum1 = np.sum((-0.5) * dy * gamma * (x - mean) * np.power(var + epsilon, -1.5), axis=tuple(norm_axis), + keepdims=True) + sum2 = np.sum(dy * gamma, axis=tuple(norm_axis), keepdims=True) + sum3 = np.sum(-2.0 * (x - mean), axis=tuple(norm_axis), keepdims=True) + + dx1 = dy * gamma * np.power(var + epsilon, -0.5) + dx2 = sum1 * 2.0 / num * (x - mean) + dx3 = ((-1.0) * np.power(var + epsilon, -0.5) * sum2 + (1.0 / num) * sum1 * sum3) * (1.0 / num) + dx = dx1 + dx2 + dx3 + return dx, dg, db, mean, var + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernormgrad0(): + begin_norm_axis = 1 + begin_params_axis = 1 + x_np = np.random.randn(4096, 3072).astype(np.float32) + dy_np = np.random.randn(4096, 3072).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + epsilon = 10e-12 + dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, + begin_params_axis) + + dy_ms = Tensor(dy_np) + x_ms = Tensor(x_np) + var_ms = Tensor(var_np) + mean_ms = Tensor(mean_np) + gamma_ms = Tensor(gamma_np) + + net = LayerNormGradNet(begin_norm_axis, begin_params_axis) + dx_ms, dg_ms, db_ms = net(x_ms, dy_ms, var_ms, mean_ms, gamma_ms) + + assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-4, atol=1e-4) + assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-4, atol=1e-3) + assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-4, atol=1e-3) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernormgrad1(): + begin_norm_axis = 1 + begin_params_axis = 1 + x_np = np.random.randn(640, 768).astype(np.float32) + dy_np = np.random.randn(640, 768).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + epsilon = 10e-12 + dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, + begin_params_axis) + + dy_ms = Tensor(dy_np) + x_ms = Tensor(x_np) + var_ms = Tensor(var_np) + mean_ms = Tensor(mean_np) + gamma_ms = Tensor(gamma_np) + + net = LayerNormGradNet(begin_norm_axis, begin_params_axis) + dx_ms, dg_ms, db_ms = net(x_ms, dy_ms, var_ms, mean_ms, gamma_ms) + + assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-4, atol=1e-4) + assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-4, atol=1e-3) + assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-4, atol=1e-3) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernormgrad2(): + begin_norm_axis = -1 + begin_params_axis = -1 + x_np = np.random.randn(32, 128, 768).astype(np.float32) + dy_np = np.random.randn(32, 128, 768).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + epsilon = 10e-12 + dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, + begin_params_axis) + + dy_ms = Tensor(dy_np) + x_ms = Tensor(x_np) + var_ms = Tensor(var_np) + mean_ms = Tensor(mean_np) + gamma_ms = Tensor(gamma_np) + + net = LayerNormGradNet(begin_norm_axis, begin_params_axis) + dx_ms, dg_ms, db_ms = net(x_ms, dy_ms, var_ms, mean_ms, gamma_ms) + + assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-4, atol=1e-4) + assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-4, atol=1e-3) + assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-4, atol=1e-3) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernormgrad3(): + begin_norm_axis = -1 + begin_params_axis = -1 + x_np = np.random.randn(32, 64).astype(np.float32) + dy_np = np.random.randn(32, 64).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + epsilon = 10e-12 + dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, + begin_params_axis) + + dy_ms = Tensor(dy_np) + x_ms = Tensor(x_np) + var_ms = Tensor(var_np) + mean_ms = Tensor(mean_np) + gamma_ms = Tensor(gamma_np) + + net = LayerNormGradNet(begin_norm_axis, begin_params_axis) + dx_ms, dg_ms, db_ms = net(x_ms, dy_ms, var_ms, mean_ms, gamma_ms) + assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-4, atol=1e-4) + assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-4, atol=1e-3) + assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-4, atol=1e-3) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernormgrad4(): + begin_norm_axis = -1 + begin_params_axis = -1 + x_np = np.random.randn(32, 64).astype(np.float32) + dy_np = np.random.randn(32, 64).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + epsilon = 10e-12 + dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, + begin_params_axis) + + dy_ms = Tensor(dy_np) + x_ms = Tensor(x_np) + var_ms = Tensor(var_np) + mean_ms = Tensor(mean_np) + gamma_ms = Tensor(gamma_np) + + net = LayerNormGradNet(begin_norm_axis, begin_params_axis) + dx_ms, dg_ms, db_ms = net(x_ms, dy_ms, var_ms, mean_ms, gamma_ms) + assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-4, atol=1e-4) + assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-4, atol=1e-3) + assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-4, atol=1e-3) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernormgrad5(): + begin_norm_axis = 2 + begin_params_axis = 1 + x_np = np.random.randn(128, 2, 16, 32).astype(np.float32) + dy_np = np.random.randn(128, 2, 16, 32).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + epsilon = 10e-12 + dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, + begin_params_axis) + + dy_ms = Tensor(dy_np) + x_ms = Tensor(x_np) + var_ms = Tensor(var_np) + mean_ms = Tensor(mean_np) + gamma_ms = Tensor(gamma_np) + + net = LayerNormGradNet(begin_norm_axis, begin_params_axis) + dx_ms, dg_ms, db_ms = net(x_ms, dy_ms, var_ms, mean_ms, gamma_ms) + assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-4, atol=1e-4) + assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-4, atol=1e-3) + assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-4, atol=1e-3) diff --git a/tests/st/ops/cpu/test_layer_norm_op.py b/tests/st/ops/cpu/test_layer_norm_op.py new file mode 100644 index 00000000000..791446c9c39 --- /dev/null +++ b/tests/st/ops/cpu/test_layer_norm_op.py @@ -0,0 +1,199 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + +class LayerNormNet(nn.Cell): + def __init__(self, begin_norm_axis, begin_params_axis): + super(LayerNormNet, self).__init__() + self.norm = P.LayerNorm(begin_norm_axis, begin_params_axis) + + def construct(self, x, gamma, beta): + return self.norm(x, gamma, beta) + + +def LayerNormReference(begin_norm_axis, begin_params_axis, x, gamma, beta): + begin_norm_axis = begin_norm_axis if begin_norm_axis >= 0 else begin_norm_axis + len(x.shape) + begin_params_axis = begin_params_axis if begin_params_axis >= 0 else begin_params_axis + len(x.shape) + + axis = [i for i in range(begin_norm_axis, len(x.shape))] + mean = np.mean(x, axis=tuple(axis), keepdims=True) + var = np.var(x, axis=tuple(axis), keepdims=True) + + gamma = gamma.reshape((*((1,) * begin_params_axis), *x.shape[begin_params_axis:])) + beta = beta.reshape((*((1,) * begin_params_axis), *x.shape[begin_params_axis:])) + y = np.subtract(x, mean) / np.sqrt(var + 1e-12) * gamma + beta + return y, mean, var + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernorm0(): + begin_norm_axis = 1 + begin_params_axis = 1 + x_np = np.random.randn(4096, 3072).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np) + + x_ms = Tensor(x_np) + gamma_ms = Tensor(gamma_np) + beta_ms = Tensor(beta_np) + net = LayerNormNet(begin_norm_axis, begin_params_axis) + y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms) + + assert np.allclose(y_ms.asnumpy(), y_np, atol=1e-4) + assert np.allclose(mean_ms.asnumpy(), mean_np, atol=1e-4) + assert np.allclose(var_ms.asnumpy(), var_np, atol=1e-4) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernorm1(): + begin_norm_axis = 1 + begin_params_axis = 1 + x_np = np.random.randn(640, 768).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np) + + x_ms = Tensor(x_np) + gamma_ms = Tensor(gamma_np) + beta_ms = Tensor(beta_np) + net = LayerNormNet(begin_norm_axis, begin_params_axis) + y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms) + + assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-4) + assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-4) + assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-4) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernorm3d_1(): + begin_norm_axis = -1 + begin_params_axis = -1 + x_np = np.random.randn(32, 128, 768).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np) + + x_ms = Tensor(x_np) + gamma_ms = Tensor(gamma_np) + beta_ms = Tensor(beta_np) + net = LayerNormNet(begin_norm_axis, begin_params_axis) + y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms) + + assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-4) + assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-4) + assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-4) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernorm3d_2(): + begin_norm_axis = -1 + begin_params_axis = 1 + x_np = np.random.randn(32, 128, 768).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np) + + x_ms = Tensor(x_np) + gamma_ms = Tensor(gamma_np) + beta_ms = Tensor(beta_np) + net = LayerNormNet(begin_norm_axis, begin_params_axis) + y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms) + + assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-4) + assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-4) + assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-4) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernorm2d_2(): + begin_norm_axis = -1 + begin_params_axis = 1 + x_np = np.random.randn(64, 32).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np) + + x_ms = Tensor(x_np) + gamma_ms = Tensor(gamma_np) + beta_ms = Tensor(beta_np) + net = LayerNormNet(begin_norm_axis, begin_params_axis) + y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms) + assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-4) + assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-4) + assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-4) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernorm2d_3(): + begin_norm_axis = -1 + begin_params_axis = 1 + x_np = np.random.randn(128, 128).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np) + + x_ms = Tensor(x_np) + gamma_ms = Tensor(gamma_np) + beta_ms = Tensor(beta_np) + net = LayerNormNet(begin_norm_axis, begin_params_axis) + y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms) + assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-4) + assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-4) + assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-4) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_layernorm2d_4(): + begin_norm_axis = 2 + begin_params_axis = 1 + np.random.seed(42) + x_np = np.random.randn(128, 2, 16, 32).astype(np.float32) + gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32) + y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np) + + x_ms = Tensor(x_np) + gamma_ms = Tensor(gamma_np) + beta_ms = Tensor(beta_np) + net = LayerNormNet(begin_norm_axis, begin_params_axis) + y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms) + assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-4) + assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-4) + assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-4) From 08e00b20a9ca919b9d2a14206892ac90f965bddf Mon Sep 17 00:00:00 2001 From: yujianfeng Date: Sat, 16 Jan 2021 10:20:20 +0800 Subject: [PATCH 06/10] Move the whole graph nodes firstly for the single used graph when inline --- .../ccsrc/frontend/optimizer/irpass/inline.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/inline.h b/mindspore/ccsrc/frontend/optimizer/irpass/inline.h index 3a4277c61fc..1362818c93f 100644 --- a/mindspore/ccsrc/frontend/optimizer/irpass/inline.h +++ b/mindspore/ccsrc/frontend/optimizer/irpass/inline.h @@ -141,15 +141,6 @@ class InlinerBase : public AnfVisitor { } if (IsUniqueUse(nullptr, fg, nullptr)) { - // The other branch calling the last after block. - if (fg->has_flag(FUNC_GRAPH_FLAG_AFTER_BLOCK)) { - // Check if parameters' changed. - auto param_simplified_caller = SimplifyAfterParameter(fg, node, args); - if (param_simplified_caller != nullptr) { - return param_simplified_caller; - } - } - // For the single used fg, including non-after and after not matched above, // we move the whole fg nodes. if (use_move_) { @@ -160,6 +151,15 @@ class InlinerBase : public AnfVisitor { mng->MoveAllCNodeDropGraph(fg, node->func_graph(), inputs[0]->scope()); return out_node; } + + // The other branch calling the last after block. + if (fg->has_flag(FUNC_GRAPH_FLAG_AFTER_BLOCK)) { + // Check if parameters' changed. + auto param_simplified_caller = SimplifyAfterParameter(fg, node, args); + if (param_simplified_caller != nullptr) { + return param_simplified_caller; + } + } } else { // We don't expand the middle multiple used after block, except the last one. if (GraphHasBranch(fg)) { From ce85c7a468f7884fb05a719c37b5a66ea6d9284b Mon Sep 17 00:00:00 2001 From: jjfeing Date: Sat, 16 Jan 2021 15:27:05 +0800 Subject: [PATCH 07/10] fix ub fusion used num --- .../buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc index 42540b40019..77c5bc11959 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc @@ -53,6 +53,9 @@ void BnupdateEltwiseEltwiseFusionPass::MatchBnupdateAddRelu(const CNodePtr &cnod auto add = relu_input->cast(); MS_EXCEPTION_IF_NULL(add); auto tuple_getitem = add->input(1); + std::vector add_output_used_num; + add_output_used_num.emplace_back(SizeToLong(manager->node_users()[add].size())); + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(add_output_used_num), add); MS_EXCEPTION_IF_NULL(tuple_getitem); if (tuple_getitem->isa() && AnfAlgo::GetCNodeName(tuple_getitem) == prim::kPrimTupleGetItem->name()) { auto getitem = tuple_getitem->cast(); From 87ecd44f67822c4fdddf7c866caf23334a87206a Mon Sep 17 00:00:00 2001 From: Jiaqi Date: Tue, 12 Jan 2021 17:24:44 +0800 Subject: [PATCH 08/10] develop dice loss --- mindspore/nn/loss/__init__.py | 4 +- mindspore/nn/loss/loss.py | 61 ++++++++++++++++++++++ mindspore/nn/metrics/dice.py | 4 +- mindspore/nn/metrics/hausdorff_distance.py | 6 +-- model_zoo/official/cv/unet/eval.py | 2 + tests/ut/python/nn/test_loss.py | 22 +++++++- 6 files changed, 91 insertions(+), 8 deletions(-) diff --git a/mindspore/nn/loss/__init__.py b/mindspore/nn/loss/__init__.py index 873aa0d7f66..fe3b9d983f5 100644 --- a/mindspore/nn/loss/__init__.py +++ b/mindspore/nn/loss/__init__.py @@ -21,8 +21,8 @@ It shows how well the model works on a dataset and the optimization target which from .loss import L1Loss, MSELoss, SmoothL1Loss, \ SoftmaxCrossEntropyWithLogits, BCELoss, CosineEmbeddingLoss, \ - SampledSoftmaxLoss + SampledSoftmaxLoss, DiceLoss __all__ = ['L1Loss', 'MSELoss', 'SmoothL1Loss', 'SoftmaxCrossEntropyWithLogits', 'BCELoss', - 'CosineEmbeddingLoss', 'SampledSoftmaxLoss'] + 'CosineEmbeddingLoss', 'SampledSoftmaxLoss', 'DiceLoss'] diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py index f1c0c0a7401..03406f6cc43 100644 --- a/mindspore/nn/loss/loss.py +++ b/mindspore/nn/loss/loss.py @@ -297,6 +297,67 @@ def _check_label_dtype(labels_dtype, cls_name): validator.check_type_name("labels", labels_dtype, [mstype.int32, mstype.int64], cls_name) +class DiceLoss(_Loss): + r""" + The Dice coefficient is a set similarity loss. It is used to calculate the similarity between two samples. The + value of the Dice coefficient is 1 when the segmentation result is the best and 0 when the segmentation result + is the worst. The Dice coefficient indicates the ratio of the area between two objects to the total area. + The function is shown as follows: + + .. math:: + dice = 1 - \frac{2 * (pred \bigcap true)}{pred \bigcup true} + + Args: + smooth (float): A term added to the denominator to improve numerical stability. Should be greater than 0. + Default: 1e-5. + threshold (float): A threshold, which is used to compare with the input tensor. Default: 0.5. + + Inputs: + - **y_pred** (Tensor) - Tensor of shape (N, C). + - **y** (Tensor) - Tensor of shape (N, C). + + Outputs: + Tensor, a tensor of shape with the per-example sampled Dice losses. + + Supported Platforms: + ``Ascend`` + + Examples: + >>> loss = nn.Diceloss(smooth=1e-5, threshold=0.5) + >>> y_pred = Tensor(np.array([[0.2, 0.5], [0.3, 0.1], [0.9, 0.6]]), mstype.float32) + >>> y = Tensor(np.array([[0, 1], [1, 0], [0, 1]]), mstype.float32) + >>> output = loss(y_pred, y) + >>> print(output) + [0.77777076] + """ + def __init__(self, smooth=1e-5, threshold=0.5): + super(DiceLoss, self).__init__() + self.smooth = validator.check_positive_float(smooth, "smooth") + self.threshold = validator.check_value_type("threshold", threshold, [float]) + self.reshape = P.Reshape() + + def construct(self, logits, label): + _check_shape(logits.shape, label.shape) + logits = self.cast((logits > self.threshold), mstype.float32) + label = self.cast(label, mstype.float32) + dim = label.shape + pred_flat = self.reshape(logits, (dim[0], -1)) + true_flat = self.reshape(label, (dim[0], -1)) + + intersection = self.reduce_sum((pred_flat * true_flat), 1) + unionset = self.reduce_sum(pred_flat, 1) + self.reduce_sum(true_flat, 1) + + dice = (2 * intersection + self.smooth) / (unionset + self.smooth) + dice_loss = 1 - self.reduce_sum(dice) / dim[0] + + return dice_loss + + +@constexpr +def _check_shape(logits_shape, label_shape): + validator.check('logits_shape', logits_shape, 'label_shape', label_shape) + + class SampledSoftmaxLoss(_Loss): r""" Computes the sampled softmax training loss. diff --git a/mindspore/nn/metrics/dice.py b/mindspore/nn/metrics/dice.py index a4110c1e7ac..56524c7d0fd 100644 --- a/mindspore/nn/metrics/dice.py +++ b/mindspore/nn/metrics/dice.py @@ -26,7 +26,7 @@ class Dice(Metric): The function is shown as follows: .. math:: - \text{dice} = \frac{2 * (\text{pred} \bigcap \text{true})}{\text{pred} \bigcup \text{true}} + dice = \frac{2 * (pred \bigcap true)}{pred \bigcup true} Args: smooth (float): A term added to the denominator to improve numerical stability. Should be greater than 0. @@ -58,7 +58,7 @@ class Dice(Metric): def update(self, *inputs): """ - Updates the internal evaluation result :math:`y_{pred}` and :math:`y`. + Updates the internal evaluation result :math:`y_pred` and :math:`y`. Args: inputs: Input `y_pred` and `y`. `y_pred` and `y` are Tensor, list or numpy.ndarray. `y_pred` is the diff --git a/mindspore/nn/metrics/hausdorff_distance.py b/mindspore/nn/metrics/hausdorff_distance.py index 9731a0b3ff4..d9354870111 100644 --- a/mindspore/nn/metrics/hausdorff_distance.py +++ b/mindspore/nn/metrics/hausdorff_distance.py @@ -70,9 +70,9 @@ class HausdorffDistance(Metric): Given two feature sets A and B, the Hausdorff distance between two point sets A and B is defined as follows: .. math:: - \text{H}(A, B) = \text{max}[\text{h}(A, B), \text{h}(B, A)] - \text{h}(A, B) = \underset{a \in A}{\text{max}}\{\underset{b \in B}{\text{min}} \rVert a - b \rVert \} - \text{h}(A, B) = \underset{b \in B}{\text{max}}\{\underset{a \in A}{\text{min}} \rVert b - a \rVert \} + H(A, B) = \text{max}[h(A, B), h(B, A)] + h(A, B) = \underset{a \in A}{\text{max}}\{\underset{b \in B}{\text{min}} \rVert a - b \rVert \} + h(A, B) = \underset{b \in B}{\text{max}}\{\underset{a \in A}{\text{min}} \rVert b - a \rVert \} Args: distance_metric (string): The parameter of calculating Hausdorff distance supports three measurement methods, diff --git a/model_zoo/official/cv/unet/eval.py b/model_zoo/official/cv/unet/eval.py index c4d1373676d..6b8e746c6ea 100644 --- a/model_zoo/official/cv/unet/eval.py +++ b/model_zoo/official/cv/unet/eval.py @@ -85,6 +85,7 @@ class dice_coeff(nn.Metric): raise RuntimeError('Total samples num must not be 0.') return self._dice_coeff_sum / float(self._samples_num) + def test_net(data_dir, ckpt_path, cross_valid_ind=1, @@ -102,6 +103,7 @@ def test_net(data_dir, dice_score = model.eval(valid_dataset, dataset_sink_mode=False) print("============== Cross valid dice coeff is:", dice_score) + def get_args(): parser = argparse.ArgumentParser(description='Test the UNet on images and target masks', formatter_class=argparse.ArgumentDefaultsHelpFormatter) diff --git a/tests/ut/python/nn/test_loss.py b/tests/ut/python/nn/test_loss.py index f4d97ef1acc..a7c6e524220 100644 --- a/tests/ut/python/nn/test_loss.py +++ b/tests/ut/python/nn/test_loss.py @@ -14,7 +14,8 @@ # ============================================================================ """ test loss """ import numpy as np - +import pytest +import mindspore.common.dtype as mstype import mindspore.nn as nn from mindspore import Tensor from ..ut_filter import non_graph_engine @@ -88,3 +89,22 @@ def test_cosine_embedding_loss(): x2 = Tensor(np.array([[0.4, 1.2], [-0.4, -0.9]]).astype(np.float32)) label = Tensor(np.array([1, -1]).astype(np.int32)) loss(x1, x2, label) + + +def test_dice_loss(): + """ test_dice_loss """ + loss = nn.DiceLoss() + y_pred = Tensor(np.array([[0.2, 0.5], [0.3, 0.1], [0.9, 0.6]]), mstype.float32) + y = Tensor(np.array([[0, 1], [1, 0], [0, 1]]), mstype.float32) + # Pass the test if no error is reported + loss(y_pred, y).asnumpy() + + + +def test_dice_loss_check_shape(): + """ test_dice_loss """ + loss = nn.DiceLoss() + y_pred = Tensor(np.array([[0.2, 0.5], [0.3, 0.1], [0.9, 0.6]]), mstype.float32) + y = Tensor(np.array([[1, 0], [0, 1]]), mstype.float32) + with pytest.raises(ValueError): + loss(y_pred, y) From e1e10981d0b6fb946d86bc553a7337c4da12ea9f Mon Sep 17 00:00:00 2001 From: wandongdong Date: Fri, 15 Jan 2021 20:50:30 -0800 Subject: [PATCH 09/10] fix depthwise CreateImage2d bug --- .../runtime/kernel/opencl/kernel/argminmax.cc | 21 +-- .../kernel/opencl/kernel/depthwise_conv2d.cc | 112 ++++++++------- .../src/runtime/opencl/opencl_allocator.cc | 12 +- .../runtime/kernel/opencl/argminmax_tests.cc | 15 ++ .../kernel/opencl/depthwise_conv2d_tests.cc | 135 ++++++++++++++++-- 5 files changed, 223 insertions(+), 72 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc index a6867fe12b3..5e8e0a95dec 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc @@ -48,13 +48,12 @@ int ArgMinMaxOpenCLKernel::CheckSpecs() { return RET_ERROR; } auto *param = reinterpret_cast(this->op_parameter_); - param->dims_size_ = in_tensors_[0]->shape().size(); - param->axis_ = (param->axis_ + param->dims_size_) % param->dims_size_; - if (param->axis_ < 0 || param->axis_ >= param->dims_size_) { - MS_LOG(ERROR) << "Invalid axis " << param->axis_; + auto dims_size = in_tensors_[0]->shape().size(); + auto axis = (param->axis_ + dims_size) % dims_size; + if (axis < 0 || axis >= dims_size) { + MS_LOG(ERROR) << "Invalid axis " << axis; return RET_ERROR; } - param->get_max_ = (Type() == PrimitiveType_ArgMax); return RET_OK; } @@ -77,10 +76,10 @@ void ArgMinMaxOpenCLKernel::SetConstArgs() { void ArgMinMaxOpenCLKernel::SetGlobalLocal() { auto param = reinterpret_cast(op_parameter_); - auto in_shape = in_tensors_[0]->shape(); + im_in_ = GpuTensorInfo(in_tensors_[0]); + std::vector in_shape = {im_in_.N, im_in_.H, im_in_.W, im_in_.C}; auto in_shape_align = in_shape; in_shape_align[3] = UP_ROUND(in_shape[3], C4NUM); - im_in_ = GpuTensorInfo(in_tensors_[0]); auto out_shape_align = in_shape_align; out_shape_align.at(param->axis_) = param->axis_ == 3 ? UP_ROUND(param->topk_, C4NUM) : param->topk_; int reduce_len = GetUpPow2(in_shape.at(param->axis_)); @@ -92,7 +91,7 @@ void ArgMinMaxOpenCLKernel::SetGlobalLocal() { src_size_ = {std::accumulate(in_shape.begin() + param->axis_ + 1, in_shape.end(), 1, std::multiplies()), std::accumulate(in_shape.begin(), in_shape.begin() + param->axis_, 1, std::multiplies()), std::accumulate(in_shape.begin() + param->axis_, in_shape.end(), 1, std::multiplies()), - in_shape.at(param->axis_)}; + static_cast(in_shape.at(param->axis_))}; strides_ = { std::accumulate(in_shape_align.begin() + param->axis_ + 1, in_shape_align.end(), 1, std::multiplies()), std::accumulate(in_shape_align.begin() + param->axis_, in_shape_align.end(), 1, std::multiplies()), @@ -145,6 +144,12 @@ int ArgMinMaxOpenCLKernel::Prepare() { ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif + auto *param = reinterpret_cast(this->op_parameter_); + param->dims_size_ = in_tensors_[0]->shape().size(); + param->axis_ = (param->axis_ + param->dims_size_) % param->dims_size_; + param->axis_ = (4 - param->dims_size_) + param->axis_; + param->get_max_ = (Type() == PrimitiveType_ArgMax); + InitWeights(); SetGlobalLocal(); SetConstArgs(); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index d5646d93c8f..373963494d8 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -118,67 +118,77 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() { int alignment = ocl_runtime_->GetImagePitchAlignment(); plane_out = UP_ROUND(plane_out, alignment) * C4NUM; pack_weight_size = plane_out * CO4; - auto shape = in_tensors_[1]->shape(); size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT; - img_size = {(size_t)plane_out / C4NUM, (size_t)shape[0] * CO4, img_dtype}; + img_size = {(size_t)plane_out / C4NUM, (size_t)out_info.N * CO4, img_dtype}; } - if (is_fp16) { - packed_weight_ = allocator->Malloc(pack_weight_size * sizeof(int16_t), img_size); - packed_weight_ = allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true); - if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16) { - std::function to_dtype = [](int16_t x) -> int16_t { return x; }; - PackNCHWToNC4HW4(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype); - } else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat32) { - std::function to_dtype = [](float x) -> float16_t { return static_cast(x); }; - PackNCHWToNC4HW4(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype); - } else { // int8 or int16 - std::function to_dtype = [](int16_t x) -> int16_t { return x; }; - PackNCHWToNC4HW4(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype); + pack_weight_size = is_fp16 ? pack_weight_size * sizeof(int16_t) : pack_weight_size * sizeof(float); + auto ConvertFilter = [](void *src, void *dst, TypeId src_type, TypeId dst_type, size_t plane_in, size_t plane_out, + size_t channel) { + if (dst_type == kNumberTypeFloat16) { + if (src_type == kNumberTypeFloat16) { + std::function to_dtype = [](int16_t x) -> int16_t { return x; }; + PackNCHWToNC4HW4(src, dst, 1, plane_in, plane_out, channel, to_dtype); + } else if (src_type == kNumberTypeFloat32) { + std::function to_dtype = [](float x) -> float16_t { return static_cast(x); }; + PackNCHWToNC4HW4(src, dst, 1, plane_in, plane_out, channel, to_dtype); + } else { // int8 or int16 + std::function to_dtype = [](int16_t x) -> int16_t { return x; }; + PackNCHWToNC4HW4(src, dst, 1, plane_in, plane_out, channel, to_dtype); + } + } else { + if (src_type == kNumberTypeFloat32) { + std::function to_dtype = [](float x) -> float { return x; }; + PackNCHWToNC4HW4(src, dst, 1, plane_in, plane_out, channel, to_dtype); + } else if (src_type == kNumberTypeFloat16) { + std::function to_dtype = [](float16_t x) -> float { return static_cast(x); }; + PackNCHWToNC4HW4(src, dst, 1, plane_in, plane_out, channel, to_dtype); + } else { // int8 or int16 + std::function to_dtype = [](float x) -> float { return x; }; + PackNCHWToNC4HW4(src, dst, 1, plane_in, plane_out, channel, to_dtype); + } } - } else { - packed_weight_ = allocator->Malloc(pack_weight_size * sizeof(float), img_size); - packed_weight_ = allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true); - if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat32) { - std::function to_dtype = [](float x) -> float { return x; }; - PackNCHWToNC4HW4(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype); - } else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16) { - std::function to_dtype = [](float16_t x) -> float { return static_cast(x); }; - PackNCHWToNC4HW4(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype); - } else { // int8 or int16 - std::function to_dtype = [](float x) -> float { return x; }; - PackNCHWToNC4HW4(origin_weight, packed_weight_, 1, plane_in, plane_out, out_info.C, to_dtype); - } - } - allocator->UnmapBuffer(packed_weight_); + }; + std::vector temp_filter(pack_weight_size); + auto src_type = in_tensors_.at(kWeightIndex)->data_type(); + auto dst_type = is_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32; + ConvertFilter(origin_weight, temp_filter.data(), src_type, dst_type, plane_in, plane_out, out_info.C); + packed_weight_ = allocator->Malloc(pack_weight_size, img_size, temp_filter.data()); FreeDequantedWeight(); + if (packed_weight_ == nullptr) { + return RET_ERROR; + } + auto ConvertBias = [](void *src, void *dst, size_t size, size_t dtype_size, TypeId src_type, TypeId dst_type) { + if (dst_type == kNumberTypeFloat16 && src_type == kNumberTypeFloat32) { + float16_t *bias_ptr = static_cast(dst); + for (size_t i = 0; i < size; ++i) { + bias_ptr[i] = static_cast(static_cast(src)[i]); + } + } else if (dst_type == kNumberTypeFloat32 && src_type == kNumberTypeFloat16) { + float32_t *bias_ptr = static_cast(dst); + for (size_t i = 0; i < size; ++i) { + bias_ptr[i] = static_cast(static_cast(src)[i]); + } + } else { + memcpy(dst, src, size * dtype_size); + } + }; size_t dtype_size = sizeof(float); if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) { dtype_size = sizeof(int16_t); } - bias_data_ = allocator->Malloc(C4NUM * CO4 * dtype_size); - bias_data_ = allocator->MapBuffer(bias_data_, CL_MAP_WRITE, nullptr, true); - size_t up_co_size = C4NUM * CO4 * dtype_size; - memset(bias_data_, 0, up_co_size); - if (in_tensors_.size() == kInputSize2) { - auto ori_bias = in_tensors_.at(kBiasIndex)->data_c(); - if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat32) { - float16_t *bias_ptr = static_cast(bias_data_); - for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) { - bias_ptr[i] = static_cast(static_cast(ori_bias)[i]); - } - } else if (!is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) { - float32_t *bias_ptr = static_cast(bias_data_); - for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) { - bias_ptr[i] = static_cast(static_cast(ori_bias)[i]); - } - } else { - memcpy(bias_data_, ori_bias, out_info.C * dtype_size); - } - } else { - MS_ASSERT(in_tensors_.size() == kInputSize1); + std::vector temp_bias(pack_weight_size, 0); + if (in_tensors_.size() == 3) { + src_type = in_tensors_.at(kBiasIndex)->data_type(); + dst_type = is_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32; + auto element_size = in_tensors_.at(kBiasIndex)->ElementsNum(); + ConvertBias(in_tensors_.at(kBiasIndex)->data_c(), temp_bias.data(), element_size, dtype_size, src_type, dst_type); + } + size_t bias_size = C4NUM * CO4 * dtype_size; + bias_data_ = allocator->Malloc(bias_size, {}, temp_bias.data()); + if (bias_data_ == nullptr) { + return RET_ERROR; } - allocator->UnmapBuffer(bias_data_); return mindspore::lite::RET_OK; } void DepthwiseConv2dOpenCLKernel::SetConstArgs() { diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc index 1f1242f7938..660be24f9d5 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc @@ -107,14 +107,20 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector &img } if (*image == nullptr) { delete *buffer; - MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << ret << ")"; + MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")"; + return nullptr; + } + if (ret != CL_SUCCESS) { + delete *buffer; + delete *image; + MS_LOG(ERROR) << "Create OpenCL Image2D (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")"; return nullptr; } MS_LOG(DEBUG) << "Malloc a new Image2D, width=" << img_size[0] << ", height=" << img_size[1]; void *host_ptr = nullptr; if (is_map) { std::vector region{img_size[0], img_size[1], 1}; - host_ptr = ocl_runtime_->MapBuffer(**image, 0, CL_MAP_READ | CL_MAP_WRITE, region); + host_ptr = ocl_runtime_->MapBuffer(**image, true, CL_MAP_READ | CL_MAP_WRITE, region); if (host_ptr == nullptr) { delete *buffer; delete *image; @@ -340,7 +346,7 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, std::vector region{mem_buf->img_size[0], mem_buf->img_size[1], 1}; cl::Image2D *image = static_cast(mem_buf->image_ptr_); MS_ASSERT(image); - new_host_ptr = ocl_runtime_->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region); + new_host_ptr = ocl_runtime_->MapBuffer(*image, sync, CL_MAP_READ | CL_MAP_WRITE, region); } if (new_host_ptr == nullptr) { UnLock(); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc index 9ed0cd3d79d..126c115d131 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc @@ -185,4 +185,19 @@ TEST_F(TestOpenCL_ArgMinMax, axis3topk2value) { TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } } +TEST_F(TestOpenCL_ArgMinMax, axis1topk1index) { + schema::PrimitiveType type = schema::PrimitiveType_ArgMax; + int axis = 1; + int topk = 1; + bool out_value = false; + std::vector input_shape = {1, 2, 14}; + std::vector output_shape = {1, 14}; + float input_data[] = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, + 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25}; + float output_data[] = {1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(type, axis, topk, out_value); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable, 1e-1, 1e-1, true); + } +} } // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc index 4669cf05175..b9d1344b332 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc @@ -58,22 +58,24 @@ TEST_F(TestOpenCL_DepthwiseConv2d, NoPad) { std::vector output_shape = {1, 2, 2, 4}; std::vector weight_shape = {1, kernel_h, kernel_w, output_shape.back()}; std::vector bias_shape = {output_shape.back()}; - float input_data[] = {0.5488135, 0.0202184, 0.45615032, 0.31542835, 0.71518934, 0.83261985, 0.56843394, 0.36371076, - 0.60276335, 0.77815676, 0.0187898, 0.57019675, 0.5448832, 0.87001216, 0.6176355, 0.43860152, - 0.4236548, 0.9786183, 0.6120957, 0.9883738, 0.6458941, 0.7991586, 0.616934, 0.10204481, - 0.4375872, 0.46147937, 0.94374806, 0.20887676, 0.891773, 0.7805292, 0.6818203, 0.16130951, - 0.96366274, 0.11827443, 0.3595079, 0.6531083, 0.3834415, 0.639921, 0.43703195, 0.2532916, - 0.79172504, 0.14335328, 0.6976312, 0.46631077, 0.5288949, 0.9446689, 0.06022547, 0.2444256, - 0.56804454, 0.5218483, 0.6667667, 0.15896958, 0.92559665, 0.41466194, 0.67063785, 0.11037514, - 0.07103606, 0.2645556, 0.21038257, 0.6563296, 0.0871293, 0.7742337, 0.12892629, 0.13818295}; + float input_data[] = { + 0.5488135, 0.71518934, 0.60276335, 0.5448832, 0.4236548, 0.6458941, 0.4375872, 0.891773, + 0.96366274, 0.3834415, 0.79172504, 0.5288949, 0.56804454, 0.92559665, 0.07103606, 0.0871293, + 0.0202184, 0.83261985, 0.77815676, 0.87001216, 0.9786183, 0.7991586, 0.46147937, 0.7805292, + 0.11827443, 0.639921, 0.14335328, 0.9446689, 0.5218483, 0.41466194, 0.2645556, 0.7742337, + 0.45615032, 0.56843394, 0.0187898, 0.6176355, 0.6120957, 0.616934, 0.94374806, 0.6818203, + 0.3595079, 0.43703195, 0.6976312, 0.06022547, 0.6667667, 0.67063785, 0.21038257, 0.12892629, + 0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738, 0.10204481, 0.20887676, 0.16130951, + 0.6531083, 0.2532916, 0.46631077, 0.2444256, 0.15896958, 0.11037514, 0.6563296, 0.13818295, + }; float bias_data[] = {0, 0, 0, 0}; float weight_data[] = {0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449, 0.09609841, 0.97645944, 0.4686512, 0.9767611, 0.6048455, 0.7392636, 0.03918779, 0.28280696, 0.12019656, 0.2961402, 0.11872772, 0.31798318, 0.41426298, 0.06414749, 0.6924721, 0.56660146, 0.2653895, 0.5232481, 0.09394051, 0.5759465, 0.9292962, 0.31856894, 0.6674104, 0.13179787, 0.7163272, 0.2894061, 0.18319136, 0.5865129, 0.02010755, 0.82894003, 0.00469548}; - float output_data[] = {3.3848767, 1.4446403, 1.8428744, 1.3194335, 2.5873442, 2.1384869, 2.04022, 1.1872686, - 2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988}; + float output_data[] = {2.9720426, 1.890834, 2.3618119, 2.3867798, 2.5666943, 1.6261611, 2.0977764, 1.6445805, + 2.462798, 1.6643658, 1.6861027, 1.8428761, 2.5156446, 1.5366757, 1.6767557, 1.6905226}; for (auto fp16_enable : {false, true}) { auto *param = CreateParameter(kernel_h, kernel_w, stride_h, stride_w, pad_u, pad_d, pad_l, pad_r, dilation_h, @@ -132,4 +134,117 @@ TEST_F(TestOpenCL_DepthwiseConv2d, Pad) { } } +TEST_F(TestOpenCL_DepthwiseConv2d, NoPad1) { + int kernel_h = 2; + int kernel_w = 2; + int stride_h = 1; + int stride_w = 1; + int pad_u = 0; + int pad_d = 0; + int pad_l = 0; + int pad_r = 0; + int dilation_h = 1; + int dilation_w = 1; + ActType act_type = ActType_No; + + std::vector input_shape = {1, 4, 4, 4}; + std::vector output_shape = {1, 3, 3, 4}; + std::vector weight_shape = {1, kernel_h, kernel_w, output_shape.back()}; + std::vector bias_shape = {output_shape.back()}; + float input_data[] = {0.5488135, 0.71518934, 0.60276335, 0.5448832, 0.4236548, 0.6458941, 0.4375872, 0.891773, + 0.96366274, 0.3834415, 0.79172504, 0.5288949, 0.56804454, 0.92559665, 0.07103606, 0.0871293, + 0.0202184, 0.83261985, 0.77815676, 0.87001216, 0.9786183, 0.7991586, 0.46147937, 0.7805292, + 0.11827443, 0.639921, 0.14335328, 0.9446689, 0.5218483, 0.41466194, 0.2645556, 0.7742337, + 0.45615032, 0.56843394, 0.0187898, 0.6176355, 0.6120957, 0.616934, 0.94374806, 0.6818203, + 0.3595079, 0.43703195, 0.6976312, 0.06022547, 0.6667667, 0.67063785, 0.21038257, 0.12892629, + 0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738, 0.10204481, 0.20887676, 0.16130951, + 0.6531083, 0.2532916, 0.46631077, 0.2444256, 0.15896958, 0.11037514, 0.6563296, 0.13818295}; + float bias_data[] = {0, 0, 0, 0}; + float weight_data[] = {0.19658236, 0.36872517, 0.82099323, 0.09710128, 0.83794491, 0.09609841, + 0.97645947, 0.4686512, 0.97676109, 0.60484552, 0.73926358, 0.03918779, + 0.28280696, 0.12019656, 0.2961402, 0.11872772}; + float output_data[] = {0.3757235, 1.8489048, 1.4467758, 0.6116009, 1.2535334, 1.6583176, 1.2530621, 0.6590755, + 0.5466661, 1.22944, 0.93263525, 0.5317252, 0.7987474, 1.618667, 1.090071, 0.60372007, + 0.773425, 1.5383728, 1.262479, 0.54334986, 0.5755667, 1.3171062, 0.82401496, 0.39336145, + 0.6703031, 0.9385749, 1.018886, 0.40566355, 1.1277528, 0.7773028, 1.5164642, 0.27685273, + 0.86816025, 0.72971237, 1.1791146, 0.12131907}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(kernel_h, kernel_w, stride_h, stride_w, pad_u, pad_d, pad_l, pad_r, dilation_h, + dilation_w, act_type, input_shape.back()); + TestMain({{input_shape, input_data, VAR}, + {weight_shape, weight_data, CONST_TENSOR}, + {bias_shape, bias_data, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-2 : 1e-5, 1e-1, true); + } +} +TEST_F(TestOpenCL_DepthwiseConv2d, Pad1) { + int kernel_h = 3; + int kernel_w = 3; + int stride_h = 1; + int stride_w = 1; + int pad_u = 1; + int pad_d = 1; + int pad_l = 1; + int pad_r = 1; + int dilation_h = 1; + int dilation_w = 1; + ActType act_type = ActType_No; + + std::vector input_shape = {1, 5, 5, 6}; + std::vector output_shape = {1, 5, 5, 6}; + std::vector weight_shape = {1, kernel_h, kernel_w, output_shape.back()}; + std::vector bias_shape = {output_shape.back()}; + float input_data[] = { + 0.5488135, 0.71518934, 0.60276335, 0.5448832, 0.4236548, 0.6458941, 0.4375872, 0.891773, 0.96366274, + 0.3834415, 0.79172504, 0.5288949, 0.56804454, 0.92559665, 0.07103606, 0.0871293, 0.0202184, 0.83261985, + 0.77815676, 0.87001216, 0.9786183, 0.7991586, 0.46147937, 0.7805292, 0.11827443, 0.639921, 0.14335328, + 0.9446689, 0.5218483, 0.41466194, 0.2645556, 0.7742337, 0.45615032, 0.56843394, 0.0187898, 0.6176355, + 0.6120957, 0.616934, 0.94374806, 0.6818203, 0.3595079, 0.43703195, 0.6976312, 0.06022547, 0.6667667, + 0.67063785, 0.21038257, 0.12892629, 0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738, 0.10204481, + 0.20887676, 0.16130951, 0.6531083, 0.2532916, 0.46631077, 0.2444256, 0.15896958, 0.11037514, 0.6563296, + 0.13818295, 0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449, 0.09609841, 0.97645944, 0.4686512, + 0.9767611, 0.6048455, 0.7392636, 0.03918779, 0.28280696, 0.12019656, 0.2961402, 0.11872772, 0.31798318, + 0.41426298, 0.06414749, 0.6924721, 0.56660146, 0.2653895, 0.5232481, 0.09394051, 0.5759465, 0.9292962, + 0.31856894, 0.6674104, 0.13179787, 0.7163272, 0.2894061, 0.18319136, 0.5865129, 0.02010755, 0.82894003, + 0.00469548, 0.6778165, 0.27000797, 0.735194, 0.96218854, 0.24875315, 0.57615733, 0.5920419, 0.5722519, + 0.22308163, 0.952749, 0.44712538, 0.84640867, 0.6994793, 0.29743695, 0.81379783, 0.39650574, 0.8811032, + 0.5812729, 0.8817354, 0.6925316, 0.7252543, 0.50132436, 0.95608366, 0.6439902, 0.42385504, 0.6063932, + 0.0191932, 0.30157483, 0.66017354, 0.2900776, 0.6180154, 0.4287687, 0.13547407, 0.29828233, 0.5699649, + 0.59087276, 0.57432526, 0.6532008, 0.65210325, 0.43141845, 0.8965466, 0.36756188, 0.43586493, 0.89192337, + 0.806194, 0.7038886, 0.10022689, 0.9194826, 0.7142413, 0.998847}; + float weight_data[] = {0.1494483, 0.86812606, 0.16249293, 0.61555956, 0.12381998, 0.84800823, 0.80731896, 0.56910074, + 0.4071833, 0.069167, 0.69742877, 0.45354268, 0.7220556, 0.86638233, 0.97552151, 0.85580334, + 0.01171408, 0.35997806, 0.72999056, 0.17162968, 0.52103661, 0.05433799, 0.19999652, 0.01852179, + 0.7936977, 0.22392469, 0.34535168, 0.92808129, 0.7044144, 0.03183893, 0.16469416, 0.6214784, + 0.57722859, 0.23789282, 0.934214, 0.61396596, 0.5356328, 0.58990998, 0.73012203, 0.311945, + 0.39822106, 0.20984375, 0.18619301, 0.94437239, 0.7395508, 0.49045881, 0.22741463, 0.25435648, + 0.05802916, 0.43441663, 0.31179588, 0.69634349, 0.37775184, 0.17960368}; + float bias_data[] = {0, 0, 0, 0, 0, 0}; + float output_data[] = { + 0.8388255, 1.7207233, 0.56646764, 1.50962, 0.6184657, 0.7572999, 1.7197044, 2.8834608, 1.0304408, 1.5622743, + 0.95027775, 1.1451806, 2.0191956, 2.9541533, 1.1799709, 1.6366025, 1.3484346, 1.0071151, 1.3740869, 2.1602216, + 1.0846798, 1.7810996, 1.6170096, 0.6889053, 0.8671698, 1.4957678, 0.68065727, 1.0596768, 0.9761665, 0.38881996, + 1.524128, 2.2121127, 1.1506181, 1.330961, 1.8186853, 0.9094476, 2.3777275, 2.5568333, 1.8321692, 1.8297466, + 2.069798, 1.3701197, 2.7548862, 2.0871775, 2.3611763, 1.5387508, 1.6725919, 1.2565864, 2.6130712, 2.0915375, + 1.2955335, 1.6571269, 1.7603228, 1.3315495, 1.0005323, 1.0135669, 1.2701392, 1.8230836, 1.6048919, 1.4224635, + 1.4651375, 1.0251865, 1.0325887, 1.2355556, 1.3313429, 0.6756204, 2.602416, 2.1827717, 1.4354478, 1.6628273, + 2.0171032, 1.0299077, 2.6085434, 1.3310422, 2.1677747, 2.457499, 2.6715999, 1.0225507, 2.5822947, 2.1068158, + 1.6401942, 2.5422354, 2.6937182, 1.3813802, 1.1241511, 1.273326, 1.2024405, 1.4564767, 2.016776, 1.0182433, + 1.228782, 0.83329916, 1.033041, 1.3280122, 1.9437144, 0.6729013, 2.438968, 2.3275855, 2.289177, 1.4376242, + 2.4595368, 1.325891, 2.018128, 2.676854, 1.9685578, 1.8240746, 2.3104675, 1.4958379, 2.474168, 2.6657124, + 1.6738743, 2.336092, 2.3048637, 1.802324, 1.7594845, 1.6022205, 1.2564734, 1.8977238, 1.6991055, 1.8674731, + 0.47793916, 1.2031221, 0.6579696, 1.0724078, 0.96408695, 0.5074543, 1.2399375, 1.410824, 0.56263226, 1.3138686, + 1.4859737, 0.7219256, 1.3437214, 2.0015993, 1.0472497, 1.064316, 1.7359762, 0.9249617, 1.2835678, 2.1866667, + 0.92954785, 2.005947, 1.8761289, 1.2612648, 1.2410495, 1.263778, 0.54638237, 1.8269669, 1.3152003, 0.7890457}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(kernel_h, kernel_w, stride_h, stride_w, pad_u, pad_d, pad_l, pad_r, dilation_h, + dilation_w, act_type, input_shape.back()); + TestMain({{input_shape, input_data, VAR}, + {weight_shape, weight_data, CONST_TENSOR}, + {bias_shape, bias_data, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-2 : 1e-5, 1e-1, true); + } +} } // namespace mindspore::lite::opencl::test From af5f050b92ef688be19fd1da9eb7cd000d5e2593 Mon Sep 17 00:00:00 2001 From: zuochuanyong Date: Sat, 16 Jan 2021 02:31:13 -0500 Subject: [PATCH 10/10] Support parameter broadcast in data parallel mode under PyNaitve --- mindspore/common/api.py | 85 +++++++++++++++++++++++------------------ mindspore/nn/cell.py | 7 ++++ 2 files changed, 55 insertions(+), 37 deletions(-) diff --git a/mindspore/common/api.py b/mindspore/common/api.py index 89b0288b86e..9ae087aa70b 100644 --- a/mindspore/common/api.py +++ b/mindspore/common/api.py @@ -298,6 +298,49 @@ def _generate_pip_args(obj, *args, method="construct"): return args_names, args_list +def _get_auto_split_param_names(parameter_layout_dict): + auto_split_params = {} + for key, value in parameter_layout_dict.items(): + for dim in value[1]: + if dim != -1: + auto_split_params[key] = value + break + auto_split_param_names = (param_name for param_name in auto_split_params) + return auto_split_param_names + + +def _build_broadcast_graph(broadcast_params_dict, broadcast_phase): + """Build broadcast graph.""" + from mindspore.nn.wrap.cell_wrapper import _BroadCastCell + + if not broadcast_params_dict: + broadcast_params_dict = {} + broadcast_params = [] + for param in broadcast_params_dict.values(): + broadcast_params.append(Tensor(param.asnumpy())) + _broadcast_net = _BroadCastCell(broadcast_params) + _broadcast_net.phase = broadcast_phase + broadcasted_params = _broadcast_net() + for param_name, param in zip(broadcast_params_dict.keys(), broadcasted_params): + broadcast_params_dict[param_name].set_data(param) + + +def _parameter_broadcast(obj, auto_parallel_mode): + """Parameter broadcast.""" + auto_split_param_names = [] + if auto_parallel_mode: + auto_split_param_names = _get_auto_split_param_names(obj.parameter_layout_dict) + + broadcast_params_dict = obj.parameters_broadcast_dict() + if auto_split_param_names and broadcast_params_dict: + broadcast_params_dict = OrderedDict() + for param_name, param in obj.parameters_broadcast_dict().items(): + if param_name not in auto_split_param_names: + broadcast_params_dict[param_name] = param + broadcast_phase = "_broadcast_subgraph" + _build_broadcast_graph(broadcast_params_dict, broadcast_phase) + + class _PynativeExecutor: """ An pynative executor used to compile/manage/run graph. @@ -339,6 +382,10 @@ class _PynativeExecutor: def leave_construct(self, cell): self._executor.leave_construct(cell) + def parameter_broadcast(self, obj, phase, auto_parallel_mode): + if BROADCAST_PHASE not in phase and _get_parameter_broadcast(): + _parameter_broadcast(obj, auto_parallel_mode) + def __call__(self, obj, *args, **kwargs): args = args + tuple(kwargs.values()) return self._executor(obj, args, "") @@ -391,31 +438,6 @@ class _Executor: def _build_data_graph(self, obj, phase): self._executor.build_data_graph(obj.parameters_dict(), phase, obj.parameters_broadcast_dict()) - def _get_auto_split_param_names(self, parameter_layout_dict): - auto_split_params = {} - for key, value in parameter_layout_dict.items(): - for dim in value[1]: - if dim != -1: - auto_split_params[key] = value - break - auto_split_param_names = (param_name for param_name in auto_split_params) - return auto_split_param_names - - def _build_broadcast_graph(self, broadcast_params_dict, broadcast_phase): - """Build broadcast graph.""" - from mindspore.nn.wrap.cell_wrapper import _BroadCastCell - - if not broadcast_params_dict: - broadcast_params_dict = {} - broadcast_params = [] - for param in broadcast_params_dict.values(): - broadcast_params.append(Tensor(param.asnumpy())) - _broadcast_net = _BroadCastCell(broadcast_params) - _broadcast_net.phase = broadcast_phase - broadcasted_params = _broadcast_net() - for param_name, param in zip(broadcast_params_dict.keys(), broadcasted_params): - broadcast_params_dict[param_name].set_data(param) - def _set_dataset_mode(self, args_list): """set dataset mode.""" # decide whether to sink based on whether the inputs is virtual or args_list is () @@ -500,18 +522,7 @@ class _Executor: elif not enable_ge and "export" in phase: self._build_data_graph(obj, phase) elif BROADCAST_PHASE not in phase and _get_parameter_broadcast(): - auto_split_param_names = [] - if auto_parallel_mode: - auto_split_param_names = self._get_auto_split_param_names(obj.parameter_layout_dict) - - broadcast_params_dict = obj.parameters_broadcast_dict() - if auto_split_param_names and broadcast_params_dict: - broadcast_params_dict = OrderedDict() - for param_name, param in obj.parameters_broadcast_dict().items(): - if param_name not in auto_split_param_names: - broadcast_params_dict[param_name] = param - broadcast_phase = "_broadcast_subgraph" - self._build_broadcast_graph(broadcast_params_dict, broadcast_phase) + _parameter_broadcast(obj, auto_parallel_mode) return phase, True diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py index 370941a4e7d..442457b0684 100755 --- a/mindspore/nn/cell.py +++ b/mindspore/nn/cell.py @@ -23,6 +23,7 @@ import numpy from mindspore import log as logger from mindspore.common.parameter import PARAMETER_NAME_DEFAULT +from mindspore.context import ParallelMode from .. import context from .._c_expression import init_pipeline, Cell_ from .._checkparam import Validator @@ -90,6 +91,7 @@ class Cell(Cell_): self._parameter_layout_dict = {} self._create_time = int(time.time() * 1e9) self.phase_prefix = "" + self.parameter_broadcast_done = False init_pipeline() # call gc to release GE session resources used by non-used cell objects @@ -300,6 +302,11 @@ class Cell(Cell_): out = self.compile_and_run(*inputs) return out + if context.get_auto_parallel_context("parallel_mode") == ParallelMode.DATA_PARALLEL: + if not self.parameter_broadcast_done: + _pynative_exec.parameter_broadcast(self, self.phase, self._auto_parallel_mode) + self.parameter_broadcast_done = True + for item in inputs: if isinstance(item, numpy.ndarray): raise TypeError("cell inputs should not be numpy array.")