!11276 【MS】【LITE】【GPU】 reduce opencl so size 0.5M

From: @wangdongxu6
Reviewed-by: @ddwsky
Signed-off-by: @ddwsky
This commit is contained in:
mindspore-ci-bot 2021-01-15 14:04:16 +08:00 committed by Gitee
commit 8e9086bbe6
7 changed files with 160 additions and 154 deletions

View File

@ -1,9 +1,9 @@
cmake_minimum_required(VERSION 3.14) cmake_minimum_required(VERSION 3.14)
project (Lite) project(Lite)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0)
message(FATAL_ERROR "GCC vesion ${CMAKE_CXX_COMPILER_VERSION} must not be less than 7.3.0") message(FATAL_ERROR "GCC vesion ${CMAKE_CXX_COMPILER_VERSION} must not be less than 7.3.0")
endif () endif()
option(MS_VERSION_MAJOR "major version" 0) option(MS_VERSION_MAJOR "major version" 0)
option(MS_VERSION_MINOR "minor version" 7) option(MS_VERSION_MINOR "minor version" 7)
@ -28,12 +28,14 @@ set(DIR_PREFIX mindspore-lite)
set(MS_VERSION ${MS_VERSION_MAJOR}.${MS_VERSION_MINOR}.${MS_VERSION_REVISION}) set(MS_VERSION ${MS_VERSION_MAJOR}.${MS_VERSION_MINOR}.${MS_VERSION_REVISION})
set(MAIN_DIR ${DIR_PREFIX}-${MS_VERSION}) set(MAIN_DIR ${DIR_PREFIX}-${MS_VERSION})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} \
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") -DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} \
-DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv, wrapper or full") set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv, wrapper or full")
set(BUILD_LITE "on") set(BUILD_LITE "on")
set(PLATFORM_ARM "off") set(PLATFORM_ARM "off")
if (PLATFORM_ARM64 OR PLATFORM_ARM32) if(PLATFORM_ARM64 OR PLATFORM_ARM32)
set(PLATFORM_ARM "on") set(PLATFORM_ARM "on")
#set for cross-compiling toolchain #set for cross-compiling toolchain
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
@ -41,19 +43,19 @@ if (PLATFORM_ARM64 OR PLATFORM_ARM32)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH) set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
endif() endif()
if (SUPPORT_GPU) if(SUPPORT_GPU)
set(PROCESS_UNIT gpu) set(PROCESS_UNIT gpu)
elseif (SUPPORT_NPU) elseif(SUPPORT_NPU)
set(PROCESS_UNIT npu) set(PROCESS_UNIT npu)
else () else()
set(PROCESS_UNIT cpu) set(PROCESS_UNIT cpu)
endif () endif()
if (SUPPORT_NPU) if(SUPPORT_NPU)
set(DDK_PATH "$ENV{HWHIAI_DDK}/ddk/ai_ddk_lib") set(DDK_PATH "$ENV{HWHIAI_DDK}/ddk/ai_ddk_lib")
if (PLATFORM_ARM64) if(PLATFORM_ARM64)
set(DDK_LIB_PATH ${DDK_PATH}/lib64) set(DDK_LIB_PATH ${DDK_PATH}/lib64)
elseif (PLATFORM_ARM32) elseif(PLATFORM_ARM32)
set(DDK_LIB_PATH ${DDK_PATH}/lib) set(DDK_LIB_PATH ${DDK_PATH}/lib)
endif() endif()
add_compile_definitions(SUPPORT_NPU) add_compile_definitions(SUPPORT_NPU)
@ -62,39 +64,39 @@ endif()
add_compile_definitions(NO_DLIB) add_compile_definitions(NO_DLIB)
add_compile_options(-fPIC) add_compile_options(-fPIC)
if (SUPPORT_TRAIN) if(SUPPORT_TRAIN)
if (PLATFORM_ARM64) if(PLATFORM_ARM64)
set(RUNTIME_COMPONENT_NAME train-android-aarch64) set(RUNTIME_COMPONENT_NAME train-android-aarch64)
elseif (PLATFORM_ARM32) elseif(PLATFORM_ARM32)
set(RUNTIME_COMPONENT_NAME train-android-aarch32) set(RUNTIME_COMPONENT_NAME train-android-aarch32)
elseif (WIN32) elseif(WIN32)
set(RUNTIME_COMPONENT_NAME train-win-x64) set(RUNTIME_COMPONENT_NAME train-win-x64)
set(CONVERTER_COMPONENT_NAME train-converter-win-x64) set(CONVERTER_COMPONENT_NAME train-converter-win-x64)
else () else()
set(RUNTIME_COMPONENT_NAME train-linux-x64) set(RUNTIME_COMPONENT_NAME train-linux-x64)
set(CONVERTER_COMPONENT_NAME train-converter-linux-x64) set(CONVERTER_COMPONENT_NAME train-converter-linux-x64)
endif () endif()
else () else()
if (PLATFORM_ARM64) if(PLATFORM_ARM64)
set(RUNTIME_COMPONENT_NAME inference-android-aarch64) set(RUNTIME_COMPONENT_NAME inference-android-aarch64)
elseif (PLATFORM_ARM32) elseif(PLATFORM_ARM32)
set(RUNTIME_COMPONENT_NAME inference-android-aarch32) set(RUNTIME_COMPONENT_NAME inference-android-aarch32)
elseif (WIN32) elseif(WIN32)
if ("${X86_64_SIMD}" STREQUAL "off") if("${X86_64_SIMD}" STREQUAL "off")
set(RUNTIME_COMPONENT_NAME inference-win-x64) set(RUNTIME_COMPONENT_NAME inference-win-x64)
else () else()
set(RUNTIME_COMPONENT_NAME inference-win-x64-${X86_64_SIMD}) set(RUNTIME_COMPONENT_NAME inference-win-x64-${X86_64_SIMD})
endif() endif()
set(CONVERTER_COMPONENT_NAME converter-win-x64) set(CONVERTER_COMPONENT_NAME converter-win-x64)
else () else()
if ("${X86_64_SIMD}" STREQUAL "off") if("${X86_64_SIMD}" STREQUAL "off")
set(RUNTIME_COMPONENT_NAME inference-linux-x64) set(RUNTIME_COMPONENT_NAME inference-linux-x64)
else () else()
set(RUNTIME_COMPONENT_NAME inference-linux-x64-${X86_64_SIMD}) set(RUNTIME_COMPONENT_NAME inference-linux-x64-${X86_64_SIMD})
endif() endif()
set(CONVERTER_COMPONENT_NAME converter-linux-x64) set(CONVERTER_COMPONENT_NAME converter-linux-x64)
endif() endif()
endif () endif()
string(REPLACE "/mindspore/lite" "" TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}) string(REPLACE "/mindspore/lite" "" TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(CORE_DIR ${TOP_DIR}/mindspore/core) set(CORE_DIR ${TOP_DIR}/mindspore/core)
@ -112,140 +114,143 @@ include(${TOP_DIR}/cmake/utils.cmake)
include(${TOP_DIR}/cmake/dependency_utils.cmake) include(${TOP_DIR}/cmake/dependency_utils.cmake)
include(${TOP_DIR}/cmake/dependency_securec.cmake) include(${TOP_DIR}/cmake/dependency_securec.cmake)
include(${TOP_DIR}/cmake/external_libs/flatbuffers.cmake) include(${TOP_DIR}/cmake/external_libs/flatbuffers.cmake)
if (SUPPORT_GPU) if(SUPPORT_GPU)
include(${TOP_DIR}/cmake/external_libs/opencl.cmake) include(${TOP_DIR}/cmake/external_libs/opencl.cmake)
endif() endif()
if (ENABLE_CONVERTER OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") if(ENABLE_CONVERTER OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
include(${TOP_DIR}/cmake/external_libs/json.cmake) include(${TOP_DIR}/cmake/external_libs/json.cmake)
endif() endif()
file(GLOB FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/*.fbs) file(GLOB FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/*.fbs)
ms_build_flatbuffers_lite(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/ fbs_src ${CMAKE_BINARY_DIR}/schema "") ms_build_flatbuffers_lite(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/ fbs_src ${CMAKE_BINARY_DIR}/schema "")
ms_build_flatbuffers_lite(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/ fbs_inner_src ${CMAKE_BINARY_DIR}/schema/inner "inner") ms_build_flatbuffers_lite(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/ fbs_inner_src ${CMAKE_BINARY_DIR}/schema/inner
"inner")
string(REPLACE "-g" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") string(REPLACE "-g" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
string(REPLACE "-g" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "-g" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDebug -g") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDebug -g")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDebug -g") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDebug -g")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default")
else () else()
## enable for binscope for release ## enable for binscope for release
set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations -Wno-missing-braces ${CMAKE_C_FLAGS}") set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}") -Wno-deprecated-declarations -Wno-missing-braces ${CMAKE_C_FLAGS}")
if (NOT WIN32) set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
-Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}")
if(NOT WIN32)
set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}") set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}")
endif () endif()
endif () endif()
if (ENABLE_VERBOSE) if(ENABLE_VERBOSE)
set(CMAKE_VERBOSE_MAKEFILE on) set(CMAKE_VERBOSE_MAKEFILE on)
endif () endif()
if (SUPPORT_TRAIN) if(SUPPORT_TRAIN)
add_compile_definitions(SUPPORT_TRAIN) add_compile_definitions(SUPPORT_TRAIN)
endif () endif()
if (ENABLE_NEON) if(ENABLE_NEON)
add_compile_definitions(ENABLE_NEON) add_compile_definitions(ENABLE_NEON)
endif () endif()
if (ENABLE_FP16) if(ENABLE_FP16)
add_compile_definitions(ENABLE_FP16) add_compile_definitions(ENABLE_FP16)
endif () endif()
if (SUPPORT_GPU) if(SUPPORT_GPU)
gene_opencl(${CMAKE_CURRENT_SOURCE_DIR}) gene_opencl(${CMAKE_CURRENT_SOURCE_DIR})
add_definitions(-DUSE_OPENCL_WRAPPER) add_definitions(-DUSE_OPENCL_WRAPPER)
add_definitions(-DMS_OPENCL_PROFILE=false) add_definitions(-DMS_OPENCL_PROFILE=false)
add_definitions(-DCL_TARGET_OPENCL_VERSION=200) add_definitions(-DCL_TARGET_OPENCL_VERSION=200)
add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=200) add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=120)
add_definitions(-DCL_HPP_MINIMUM_OPENCL_VERSION=110) add_definitions(-DCL_HPP_MINIMUM_OPENCL_VERSION=120)
add_compile_definitions(SUPPORT_GPU) add_compile_definitions(SUPPORT_GPU)
if (OFFLINE_COMPILE) if(OFFLINE_COMPILE)
add_compile_definitions(PROGRAM_WITH_IL) add_compile_definitions(PROGRAM_WITH_IL)
endif () endif()
include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-headers-src/) include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-headers-src/)
include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-clhpp-src/include) include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-clhpp-src/include)
endif () endif()
if (WIN32) if(WIN32)
add_compile_definitions(LITE_EXPORTS) add_compile_definitions(LITE_EXPORTS)
add_compile_definitions(BUILDING_DLL) add_compile_definitions(BUILDING_DLL)
endif () endif()
if (ENABLE_CONVERTER) if(ENABLE_CONVERTER)
if (PLATFORM_ARM) if(PLATFORM_ARM)
MESSAGE(FATAL_ERROR "Cannot build converter in arm platform") MESSAGE(FATAL_ERROR "Cannot build converter in arm platform")
endif () endif()
include_directories(${PYTHON_INCLUDE_DIRS}) include_directories(${PYTHON_INCLUDE_DIRS})
include(${TOP_DIR}/cmake/external_libs/eigen.cmake) include(${TOP_DIR}/cmake/external_libs/eigen.cmake)
include(${TOP_DIR}/cmake/external_libs/protobuf.cmake) include(${TOP_DIR}/cmake/external_libs/protobuf.cmake)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter)
endif () endif()
if (PLATFORM_ARM32 OR PLATFORM_ARM64) if(PLATFORM_ARM32 OR PLATFORM_ARM64)
if (NOT DEFINED ENV{ANDROID_NDK}) if(NOT DEFINED ENV{ANDROID_NDK})
message(FATAL_ERROR "env ANDROID_NDK should be setted for ARM compile") message(FATAL_ERROR "env ANDROID_NDK should be setted for ARM compile")
endif () endif()
add_compile_definitions(ENABLE_ARM) add_compile_definitions(ENABLE_ARM)
endif () endif()
if (PLATFORM_ARM32) if(PLATFORM_ARM32)
add_definitions(-mfloat-abi=softfp -mfpu=neon) add_definitions(-mfloat-abi=softfp -mfpu=neon)
add_compile_definitions(ENABLE_ARM32) add_compile_definitions(ENABLE_ARM32)
endif () endif()
if (PLATFORM_ARM64) if(PLATFORM_ARM64)
add_compile_definitions(ENABLE_ARM64) add_compile_definitions(ENABLE_ARM64)
if (ENABLE_FP16) if(ENABLE_FP16)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
endif () endif()
endif () endif()
if (PLATFORM_ARM32 OR PLATFORM_ARM64) if(PLATFORM_ARM32 OR PLATFORM_ARM64)
if (ENABLE_CONVERTER) if(ENABLE_CONVERTER)
set(BUILD_MINDDATA "off") set(BUILD_MINDDATA "off")
endif() endif()
endif() endif()
if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64) if(NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
if ("${X86_64_SIMD}" STREQUAL "sse") if("${X86_64_SIMD}" STREQUAL "sse")
add_compile_definitions(ENABLE_SSE) add_compile_definitions(ENABLE_SSE)
endif () endif()
if ("${X86_64_SIMD}" STREQUAL "avx") if("${X86_64_SIMD}" STREQUAL "avx")
add_compile_definitions(ENABLE_SSE) add_compile_definitions(ENABLE_SSE)
add_compile_definitions(ENABLE_AVX) add_compile_definitions(ENABLE_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx -mfma") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx -mfma")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx -mfma") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx -mfma")
endif () endif()
endif () endif()
if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") if(BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
add_compile_definitions(ENABLE_ANDROID) add_compile_definitions(ENABLE_ANDROID)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata)
endif () endif()
if (BUILD_MINDDATA STREQUAL "lite_cv") if(BUILD_MINDDATA STREQUAL "lite_cv")
add_compile_definitions(ENABLE_ANDROID) add_compile_definitions(ENABLE_ANDROID)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata)
endif () endif()
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl)
if (ENABLE_TOOLS) if(ENABLE_TOOLS)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
if (SUPPORT_TRAIN) if(SUPPORT_TRAIN)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark_train) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark_train)
endif () endif()
endif () endif()
if (NOT WIN32) if(NOT WIN32)
if (ENABLE_TOOLS) if(ENABLE_TOOLS)
if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64) if(NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/schema_gen) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/schema_gen)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/cropper) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/cropper)
endif () endif()
endif () endif()
if (BUILD_TESTCASES) if(BUILD_TESTCASES)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test)
endif () endif()
endif () endif()
include(${TOP_DIR}/cmake/package_lite.cmake) include(${TOP_DIR}/cmake/package_lite.cmake)

View File

@ -1,23 +1,20 @@
add_compile_definitions(USE_ANDROID_LOG) add_compile_definitions(USE_ANDROID_LOG)
if (ENABLE_V0) if(ENABLE_V0)
add_definitions(-DENABLE_V0) add_definitions(-DENABLE_V0)
endif() endif()
set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..) set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
include_directories(${LITE_DIR}/nnacl/) include_directories(${LITE_DIR}/nnacl/)
include_directories(${LITE_DIR}/nnacl/optimize) include_directories(${LITE_DIR}/nnacl/optimize)
if (PLATFORM_ARM32 OR PLATFORM_ARM64) if(PLATFORM_ARM32 OR PLATFORM_ARM64)
#for performance #for performance
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release") if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
if (SUPPORT_GPU) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections \
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti") -fdata-sections -ffast-math -fno-rtti -fno-exceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections \
else () -fdata-sections -ffast-math -fno-rtti -fno-exceptions")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions") endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions") endif()
endif ()
endif ()
endif ()
set(LITE_SRC set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc
@ -42,7 +39,7 @@ set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/dequant.cc ${CMAKE_CURRENT_SOURCE_DIR}/dequant.cc
) )
if (SUPPORT_GPU) if(SUPPORT_GPU)
set(LITE_SRC set(LITE_SRC
${LITE_SRC} ${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/opencl_kernel.cc ${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/opencl_kernel.cc
@ -54,10 +51,10 @@ if (SUPPORT_GPU)
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_runtime.cc ${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_runtime.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_wrapper.cc ${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_wrapper.cc
) )
endif () endif()
if (SUPPORT_TRAIN) if(SUPPORT_TRAIN)
set(ANF_SRC set(ANF_SRC
${ANF_SRC} ${ANF_SRC}
) )
@ -70,7 +67,7 @@ if (SUPPORT_TRAIN)
${CMAKE_CURRENT_SOURCE_DIR}/train/train_model.cc ${CMAKE_CURRENT_SOURCE_DIR}/train/train_model.cc
${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc ${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc
) )
endif () endif()
add_subdirectory(ops) add_subdirectory(ops)
add_subdirectory(runtime/kernel/arm) add_subdirectory(runtime/kernel/arm)
@ -85,53 +82,54 @@ set_target_properties(mindspore-lite_static PROPERTIES OUTPUT_NAME "mindspore-li
set_target_properties(mindspore-lite_static PROPERTIES CLEAN_DIRECT_OUTPUT 1) set_target_properties(mindspore-lite_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
if (SUPPORT_GPU) if(SUPPORT_GPU)
add_subdirectory(runtime/kernel/opencl) add_subdirectory(runtime/kernel/opencl)
target_link_libraries(mindspore-lite cpu_kernel_mid opencl_kernel_mid nnacl cpu_ops_mid) target_link_libraries(mindspore-lite cpu_kernel_mid opencl_kernel_mid nnacl cpu_ops_mid)
target_link_libraries(mindspore-lite_static cpu_kernel_mid opencl_kernel_mid nnacl_mid cpu_ops_mid) target_link_libraries(mindspore-lite_static cpu_kernel_mid opencl_kernel_mid nnacl_mid cpu_ops_mid)
else () else()
target_link_libraries(mindspore-lite cpu_kernel_mid nnacl cpu_ops_mid) target_link_libraries(mindspore-lite cpu_kernel_mid nnacl cpu_ops_mid)
target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl_mid cpu_ops_mid) target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl_mid cpu_ops_mid)
endif () endif()
if (SUPPORT_NPU) if(SUPPORT_NPU)
add_subdirectory(runtime/agent/npu) add_subdirectory(runtime/agent/npu)
include_directories(${DDK_PATH}) include_directories(${DDK_PATH})
target_link_libraries(mindspore-lite npu_kernel_mid) target_link_libraries(mindspore-lite npu_kernel_mid)
target_link_libraries(mindspore-lite_static npu_kernel_mid) target_link_libraries(mindspore-lite_static npu_kernel_mid)
endif () endif()
if (PLATFORM_ARM32 OR PLATFORM_ARM64) if(PLATFORM_ARM32 OR PLATFORM_ARM64)
target_link_libraries(mindspore-lite log) target_link_libraries(mindspore-lite log)
target_link_libraries(mindspore-lite_static log) target_link_libraries(mindspore-lite_static log)
endif () endif()
if (BUILD_MINDDATA STREQUAL "lite") if(BUILD_MINDDATA STREQUAL "lite")
target_link_libraries(mindspore-lite minddata_eager_mid minddata-lite) target_link_libraries(mindspore-lite minddata_eager_mid minddata-lite)
target_link_libraries(mindspore-lite_static minddata_eager_mid) target_link_libraries(mindspore-lite_static minddata_eager_mid)
endif () endif()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND PLATFORM_ARM) if(PLATFORM_ARM)
add_custom_command(TARGET mindspore-lite POST_BUILD set(NDK_STRIP
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip "${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip")
endif()
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND PLATFORM_ARM)
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND ${NDK_STRIP}
${CMAKE_BINARY_DIR}/src/libmindspore-lite.so) ${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
endif () endif()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release") if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
if (PLATFORM_ARM) if(PLATFORM_ARM)
add_custom_command(TARGET mindspore-lite POST_BUILD add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND ${NDK_STRIP}
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
${CMAKE_BINARY_DIR}/src/libmindspore-lite.so) ${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
elseif (NOT WIN32) elseif(NOT WIN32)
add_custom_command(TARGET mindspore-lite POST_BUILD add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND strip ${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
COMMAND strip ${CMAKE_BINARY_DIR}/src/libmindspore-lite.so) endif()
endif () endif()
endif ()
########################## build optimize and float16 library #################################3 ########################## build optimize and float16 library #################################
if (PLATFORM_ARM64) if(PLATFORM_ARM64)
target_link_libraries(mindspore-lite cpu_opt_kernel_mid nnacl_optimize_mid) target_link_libraries(mindspore-lite cpu_opt_kernel_mid nnacl_optimize_mid)
target_link_libraries(mindspore-lite_static cpu_opt_kernel_mid nnacl_optimize_mid) target_link_libraries(mindspore-lite_static cpu_opt_kernel_mid nnacl_optimize_mid)
if (ENABLE_FP16) if(ENABLE_FP16)
target_link_libraries(mindspore-lite cpu_fp16_kernel_mid nnacl_fp16_mid) target_link_libraries(mindspore-lite cpu_fp16_kernel_mid nnacl_fp16_mid)
target_link_libraries(mindspore-lite_static cpu_fp16_kernel_mid nnacl_fp16_mid) target_link_libraries(mindspore-lite_static cpu_fp16_kernel_mid nnacl_fp16_mid)
endif () endif()
endif () endif()

View File

@ -94,11 +94,14 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector<size_t> &img
MS_ASSERT(buffer); MS_ASSERT(buffer);
MS_ASSERT(image); MS_ASSERT(image);
MS_ASSERT(img_size.size() == 3); MS_ASSERT(img_size.size() == 3);
cl::ImageFormat image_format(CL_RGBA, img_size[2]);
if (data == nullptr) { if (data == nullptr) {
*image = new (std::nothrow) // copy from cl2.hpp
cl::Image2D(*ocl_runtime_->Context(), image_format, **buffer, img_size[0], img_size[1], 0, &ret); cl_image_desc desc = {CL_MEM_OBJECT_IMAGE2D, img_size[0], img_size[1], 0, 0, 0, 0, 0, 0, (**buffer).get()};
const cl::Context &context = *ocl_runtime_->Context();
cl_image_format image_format{CL_RGBA, static_cast<uint32_t>(img_size[2])};
*image = new (std::nothrow) cl::Image2D(clCreateImage(context.get(), 0, &image_format, &desc, nullptr, &ret));
} else { } else {
cl::ImageFormat image_format(CL_RGBA, img_size[2]);
*image = new (std::nothrow) cl::Image2D(*ocl_runtime_->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, *image = new (std::nothrow) cl::Image2D(*ocl_runtime_->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
image_format, img_size[0], img_size[1], 0, data, &ret); image_format, img_size[0], img_size[1], 0, data, &ret);
} }

View File

@ -562,7 +562,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
if (command_queue == nullptr) { if (command_queue == nullptr) {
command_queue = default_command_queue_; command_queue = default_command_queue_;
} }
return command_queue->enqueueMapSVM(host_ptr, sync, flags, size); return clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr);
} }
void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region, void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
@ -591,7 +591,7 @@ int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue)
if (command_queue == nullptr) { if (command_queue == nullptr) {
command_queue = default_command_queue_; command_queue = default_command_queue_;
} }
return command_queue->enqueueUnmapSVM(host_ptr); return clEnqueueSVMUnmap(command_queue->get(), host_ptr, 0, nullptr, nullptr);
} }
bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) { bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {

View File

@ -83,7 +83,7 @@ class OpenCLRuntime {
auto svm_capabilities = GetSVMCapabilities(); auto svm_capabilities = GetSVMCapabilities();
if (svm_capabilities) { if (svm_capabilities) {
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value; MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value;
return kernel.setArg(index, value); return clSetKernelArgSVMPointer(kernel.get(), index, value);
} }
cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value)); cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value; MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;

View File

@ -142,13 +142,13 @@ bool LoadLibraryFromPath(const std::string &library_path, void **handle_ptr) {
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyImage); LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyImage);
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyBufferToImage); LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyBufferToImage);
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyImageToBuffer); LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyImageToBuffer);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120 #if CL_TARGET_OPENCL_VERSION >= 120
LOAD_OPENCL_FUNCTION_PTR(clRetainDevice); LOAD_OPENCL_FUNCTION_PTR(clRetainDevice);
LOAD_OPENCL_FUNCTION_PTR(clReleaseDevice); LOAD_OPENCL_FUNCTION_PTR(clReleaseDevice);
LOAD_OPENCL_FUNCTION_PTR(clCreateImage); LOAD_OPENCL_FUNCTION_PTR(clCreateImage);
LOAD_OPENCL_FUNCTION_PTR(clEnqueueFillImage); LOAD_OPENCL_FUNCTION_PTR(clEnqueueFillImage);
#endif #endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_TARGET_OPENCL_VERSION >= 200
LOAD_OPENCL_FUNCTION_PTR(clCreateCommandQueueWithProperties); LOAD_OPENCL_FUNCTION_PTR(clCreateCommandQueueWithProperties);
LOAD_OPENCL_FUNCTION_PTR(clGetExtensionFunctionAddress); LOAD_OPENCL_FUNCTION_PTR(clGetExtensionFunctionAddress);
LOAD_OPENCL_FUNCTION_PTR(clSVMAlloc); LOAD_OPENCL_FUNCTION_PTR(clSVMAlloc);
@ -232,13 +232,13 @@ CL_DEFINE_FUNC_PTR(clGetEventProfilingInfo);
CL_DEFINE_FUNC_PTR(clGetImageInfo); CL_DEFINE_FUNC_PTR(clGetImageInfo);
CL_DEFINE_FUNC_PTR(clEnqueueCopyBufferToImage); CL_DEFINE_FUNC_PTR(clEnqueueCopyBufferToImage);
CL_DEFINE_FUNC_PTR(clEnqueueCopyImageToBuffer); CL_DEFINE_FUNC_PTR(clEnqueueCopyImageToBuffer);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120 #if CL_TARGET_OPENCL_VERSION >= 120
CL_DEFINE_FUNC_PTR(clRetainDevice); CL_DEFINE_FUNC_PTR(clRetainDevice);
CL_DEFINE_FUNC_PTR(clReleaseDevice); CL_DEFINE_FUNC_PTR(clReleaseDevice);
CL_DEFINE_FUNC_PTR(clCreateImage); CL_DEFINE_FUNC_PTR(clCreateImage);
CL_DEFINE_FUNC_PTR(clEnqueueFillImage); CL_DEFINE_FUNC_PTR(clEnqueueFillImage);
#endif #endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_TARGET_OPENCL_VERSION >= 200
CL_DEFINE_FUNC_PTR(clGetKernelSubGroupInfoKHR); CL_DEFINE_FUNC_PTR(clGetKernelSubGroupInfoKHR);
CL_DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties); CL_DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties);
CL_DEFINE_FUNC_PTR(clGetExtensionFunctionAddress); CL_DEFINE_FUNC_PTR(clGetExtensionFunctionAddress);
@ -651,7 +651,7 @@ cl_int clEnqueueCopyImageToBuffer(cl_command_queue command_queue, cl_mem src_ima
event_wait_list, event); event_wait_list, event);
} }
#if CL_HPP_TARGET_OPENCL_VERSION >= 120 #if CL_TARGET_OPENCL_VERSION >= 120
// clRetainDevice wrapper, use OpenCLWrapper function. // clRetainDevice wrapper, use OpenCLWrapper function.
cl_int clRetainDevice(cl_device_id device) { cl_int clRetainDevice(cl_device_id device) {
@ -685,7 +685,7 @@ cl_int clEnqueueFillImage(cl_command_queue command_queue, cl_mem image, const vo
#endif #endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_TARGET_OPENCL_VERSION >= 200
// clCreateCommandQueueWithProperties wrapper, use OpenCLWrapper function. // clCreateCommandQueueWithProperties wrapper, use OpenCLWrapper function.
cl_command_queue clCreateCommandQueueWithProperties(cl_context context, cl_device_id device, cl_command_queue clCreateCommandQueueWithProperties(cl_context context, cl_device_id device,

View File

@ -110,7 +110,7 @@ using clEnqueueCopyBufferToImageFunc = cl_int(CL_API_CALL *)(cl_command_queue, c
using clEnqueueCopyImageToBufferFunc = cl_int(CL_API_CALL *)(cl_command_queue, cl_mem, cl_mem, const size_t *, using clEnqueueCopyImageToBufferFunc = cl_int(CL_API_CALL *)(cl_command_queue, cl_mem, cl_mem, const size_t *,
const size_t *, size_t, cl_uint, const cl_event *, const size_t *, size_t, cl_uint, const cl_event *,
cl_event *); cl_event *);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120 #if CL_TARGET_OPENCL_VERSION >= 120
using clRetainDeviceFunc = cl_int (*)(cl_device_id); using clRetainDeviceFunc = cl_int (*)(cl_device_id);
using clReleaseDeviceFunc = cl_int (*)(cl_device_id); using clReleaseDeviceFunc = cl_int (*)(cl_device_id);
using clCreateImageFunc = cl_mem (*)(cl_context, cl_mem_flags, const cl_image_format *, const cl_image_desc *, void *, using clCreateImageFunc = cl_mem (*)(cl_context, cl_mem_flags, const cl_image_format *, const cl_image_desc *, void *,
@ -118,7 +118,7 @@ using clCreateImageFunc = cl_mem (*)(cl_context, cl_mem_flags, const cl_image_fo
using clEnqueueFillImageFunc = cl_int (*)(cl_command_queue, cl_mem, const void *, const size_t *, const size_t *, using clEnqueueFillImageFunc = cl_int (*)(cl_command_queue, cl_mem, const void *, const size_t *, const size_t *,
cl_uint, const cl_event *, cl_event *); cl_uint, const cl_event *, cl_event *);
#endif #endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_TARGET_OPENCL_VERSION >= 200
using clCreateProgramWithILFunc = cl_program (*)(cl_context, const void *, size_t, cl_int *); using clCreateProgramWithILFunc = cl_program (*)(cl_context, const void *, size_t, cl_int *);
using clSVMAllocFunc = void *(*)(cl_context, cl_mem_flags, size_t size, cl_uint); using clSVMAllocFunc = void *(*)(cl_context, cl_mem_flags, size_t size, cl_uint);
using clSVMFreeFunc = void (*)(cl_context, void *); using clSVMFreeFunc = void (*)(cl_context, void *);
@ -185,13 +185,13 @@ CL_DECLARE_FUNC_PTR(clGetEventProfilingInfo);
CL_DECLARE_FUNC_PTR(clGetImageInfo); CL_DECLARE_FUNC_PTR(clGetImageInfo);
CL_DECLARE_FUNC_PTR(clEnqueueCopyBufferToImage); CL_DECLARE_FUNC_PTR(clEnqueueCopyBufferToImage);
CL_DECLARE_FUNC_PTR(clEnqueueCopyImageToBuffer); CL_DECLARE_FUNC_PTR(clEnqueueCopyImageToBuffer);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120 #if CL_TARGET_OPENCL_VERSION >= 120
CL_DECLARE_FUNC_PTR(clRetainDevice); CL_DECLARE_FUNC_PTR(clRetainDevice);
CL_DECLARE_FUNC_PTR(clReleaseDevice); CL_DECLARE_FUNC_PTR(clReleaseDevice);
CL_DECLARE_FUNC_PTR(clCreateImage); CL_DECLARE_FUNC_PTR(clCreateImage);
CL_DECLARE_FUNC_PTR(clEnqueueFillImage); CL_DECLARE_FUNC_PTR(clEnqueueFillImage);
#endif #endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_TARGET_OPENCL_VERSION >= 200
CL_DECLARE_FUNC_PTR(clGetKernelSubGroupInfoKHR); CL_DECLARE_FUNC_PTR(clGetKernelSubGroupInfoKHR);
CL_DECLARE_FUNC_PTR(clCreateCommandQueueWithProperties); CL_DECLARE_FUNC_PTR(clCreateCommandQueueWithProperties);
CL_DECLARE_FUNC_PTR(clGetExtensionFunctionAddress); CL_DECLARE_FUNC_PTR(clGetExtensionFunctionAddress);