forked from mindspore-Ecosystem/mindspore
!11276 【MS】【LITE】【GPU】 reduce opencl so size 0.5M
From: @wangdongxu6 Reviewed-by: @ddwsky Signed-off-by: @ddwsky
This commit is contained in:
commit
8e9086bbe6
|
@ -1,9 +1,9 @@
|
|||
cmake_minimum_required(VERSION 3.14)
|
||||
project (Lite)
|
||||
project(Lite)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0)
|
||||
message(FATAL_ERROR "GCC vesion ${CMAKE_CXX_COMPILER_VERSION} must not be less than 7.3.0")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
option(MS_VERSION_MAJOR "major version" 0)
|
||||
option(MS_VERSION_MINOR "minor version" 7)
|
||||
|
@ -28,12 +28,14 @@ set(DIR_PREFIX mindspore-lite)
|
|||
set(MS_VERSION ${MS_VERSION_MAJOR}.${MS_VERSION_MINOR}.${MS_VERSION_REVISION})
|
||||
set(MAIN_DIR ${DIR_PREFIX}-${MS_VERSION})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} \
|
||||
-DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} \
|
||||
-DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
|
||||
set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv, wrapper or full")
|
||||
set(BUILD_LITE "on")
|
||||
set(PLATFORM_ARM "off")
|
||||
if (PLATFORM_ARM64 OR PLATFORM_ARM32)
|
||||
if(PLATFORM_ARM64 OR PLATFORM_ARM32)
|
||||
set(PLATFORM_ARM "on")
|
||||
#set for cross-compiling toolchain
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
|
||||
|
@ -41,19 +43,19 @@ if (PLATFORM_ARM64 OR PLATFORM_ARM32)
|
|||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
|
||||
endif()
|
||||
|
||||
if (SUPPORT_GPU)
|
||||
if(SUPPORT_GPU)
|
||||
set(PROCESS_UNIT gpu)
|
||||
elseif (SUPPORT_NPU)
|
||||
elseif(SUPPORT_NPU)
|
||||
set(PROCESS_UNIT npu)
|
||||
else ()
|
||||
else()
|
||||
set(PROCESS_UNIT cpu)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (SUPPORT_NPU)
|
||||
if(SUPPORT_NPU)
|
||||
set(DDK_PATH "$ENV{HWHIAI_DDK}/ddk/ai_ddk_lib")
|
||||
if (PLATFORM_ARM64)
|
||||
if(PLATFORM_ARM64)
|
||||
set(DDK_LIB_PATH ${DDK_PATH}/lib64)
|
||||
elseif (PLATFORM_ARM32)
|
||||
elseif(PLATFORM_ARM32)
|
||||
set(DDK_LIB_PATH ${DDK_PATH}/lib)
|
||||
endif()
|
||||
add_compile_definitions(SUPPORT_NPU)
|
||||
|
@ -62,39 +64,39 @@ endif()
|
|||
add_compile_definitions(NO_DLIB)
|
||||
add_compile_options(-fPIC)
|
||||
|
||||
if (SUPPORT_TRAIN)
|
||||
if (PLATFORM_ARM64)
|
||||
if(SUPPORT_TRAIN)
|
||||
if(PLATFORM_ARM64)
|
||||
set(RUNTIME_COMPONENT_NAME train-android-aarch64)
|
||||
elseif (PLATFORM_ARM32)
|
||||
elseif(PLATFORM_ARM32)
|
||||
set(RUNTIME_COMPONENT_NAME train-android-aarch32)
|
||||
elseif (WIN32)
|
||||
elseif(WIN32)
|
||||
set(RUNTIME_COMPONENT_NAME train-win-x64)
|
||||
set(CONVERTER_COMPONENT_NAME train-converter-win-x64)
|
||||
else ()
|
||||
else()
|
||||
set(RUNTIME_COMPONENT_NAME train-linux-x64)
|
||||
set(CONVERTER_COMPONENT_NAME train-converter-linux-x64)
|
||||
endif ()
|
||||
else ()
|
||||
if (PLATFORM_ARM64)
|
||||
endif()
|
||||
else()
|
||||
if(PLATFORM_ARM64)
|
||||
set(RUNTIME_COMPONENT_NAME inference-android-aarch64)
|
||||
elseif (PLATFORM_ARM32)
|
||||
elseif(PLATFORM_ARM32)
|
||||
set(RUNTIME_COMPONENT_NAME inference-android-aarch32)
|
||||
elseif (WIN32)
|
||||
if ("${X86_64_SIMD}" STREQUAL "off")
|
||||
elseif(WIN32)
|
||||
if("${X86_64_SIMD}" STREQUAL "off")
|
||||
set(RUNTIME_COMPONENT_NAME inference-win-x64)
|
||||
else ()
|
||||
else()
|
||||
set(RUNTIME_COMPONENT_NAME inference-win-x64-${X86_64_SIMD})
|
||||
endif()
|
||||
set(CONVERTER_COMPONENT_NAME converter-win-x64)
|
||||
else ()
|
||||
if ("${X86_64_SIMD}" STREQUAL "off")
|
||||
else()
|
||||
if("${X86_64_SIMD}" STREQUAL "off")
|
||||
set(RUNTIME_COMPONENT_NAME inference-linux-x64)
|
||||
else ()
|
||||
else()
|
||||
set(RUNTIME_COMPONENT_NAME inference-linux-x64-${X86_64_SIMD})
|
||||
endif()
|
||||
set(CONVERTER_COMPONENT_NAME converter-linux-x64)
|
||||
endif()
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
string(REPLACE "/mindspore/lite" "" TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
set(CORE_DIR ${TOP_DIR}/mindspore/core)
|
||||
|
@ -112,140 +114,143 @@ include(${TOP_DIR}/cmake/utils.cmake)
|
|||
include(${TOP_DIR}/cmake/dependency_utils.cmake)
|
||||
include(${TOP_DIR}/cmake/dependency_securec.cmake)
|
||||
include(${TOP_DIR}/cmake/external_libs/flatbuffers.cmake)
|
||||
if (SUPPORT_GPU)
|
||||
if(SUPPORT_GPU)
|
||||
include(${TOP_DIR}/cmake/external_libs/opencl.cmake)
|
||||
endif()
|
||||
|
||||
if (ENABLE_CONVERTER OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
|
||||
if(ENABLE_CONVERTER OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
|
||||
include(${TOP_DIR}/cmake/external_libs/json.cmake)
|
||||
endif()
|
||||
|
||||
file(GLOB FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/*.fbs)
|
||||
ms_build_flatbuffers_lite(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/ fbs_src ${CMAKE_BINARY_DIR}/schema "")
|
||||
ms_build_flatbuffers_lite(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/ fbs_inner_src ${CMAKE_BINARY_DIR}/schema/inner "inner")
|
||||
ms_build_flatbuffers_lite(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/ fbs_inner_src ${CMAKE_BINARY_DIR}/schema/inner
|
||||
"inner")
|
||||
|
||||
string(REPLACE "-g" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
string(REPLACE "-g" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDebug -g")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDebug -g")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default")
|
||||
else ()
|
||||
else()
|
||||
## enable for binscope for release
|
||||
set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations -Wno-missing-braces ${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}")
|
||||
if (NOT WIN32)
|
||||
set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
|
||||
-Wno-deprecated-declarations -Wno-missing-braces ${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
|
||||
-Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}")
|
||||
if(NOT WIN32)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}")
|
||||
endif ()
|
||||
endif ()
|
||||
if (ENABLE_VERBOSE)
|
||||
endif()
|
||||
endif()
|
||||
if(ENABLE_VERBOSE)
|
||||
set(CMAKE_VERBOSE_MAKEFILE on)
|
||||
endif ()
|
||||
if (SUPPORT_TRAIN)
|
||||
endif()
|
||||
if(SUPPORT_TRAIN)
|
||||
add_compile_definitions(SUPPORT_TRAIN)
|
||||
endif ()
|
||||
if (ENABLE_NEON)
|
||||
endif()
|
||||
if(ENABLE_NEON)
|
||||
add_compile_definitions(ENABLE_NEON)
|
||||
endif ()
|
||||
if (ENABLE_FP16)
|
||||
endif()
|
||||
if(ENABLE_FP16)
|
||||
add_compile_definitions(ENABLE_FP16)
|
||||
endif ()
|
||||
if (SUPPORT_GPU)
|
||||
endif()
|
||||
if(SUPPORT_GPU)
|
||||
gene_opencl(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
add_definitions(-DUSE_OPENCL_WRAPPER)
|
||||
add_definitions(-DMS_OPENCL_PROFILE=false)
|
||||
add_definitions(-DCL_TARGET_OPENCL_VERSION=200)
|
||||
add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=200)
|
||||
add_definitions(-DCL_HPP_MINIMUM_OPENCL_VERSION=110)
|
||||
add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=120)
|
||||
add_definitions(-DCL_HPP_MINIMUM_OPENCL_VERSION=120)
|
||||
add_compile_definitions(SUPPORT_GPU)
|
||||
if (OFFLINE_COMPILE)
|
||||
if(OFFLINE_COMPILE)
|
||||
add_compile_definitions(PROGRAM_WITH_IL)
|
||||
endif ()
|
||||
endif()
|
||||
include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-headers-src/)
|
||||
include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-clhpp-src/include)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
if(WIN32)
|
||||
add_compile_definitions(LITE_EXPORTS)
|
||||
add_compile_definitions(BUILDING_DLL)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (ENABLE_CONVERTER)
|
||||
if (PLATFORM_ARM)
|
||||
if(ENABLE_CONVERTER)
|
||||
if(PLATFORM_ARM)
|
||||
MESSAGE(FATAL_ERROR "Cannot build converter in arm platform")
|
||||
endif ()
|
||||
endif()
|
||||
include_directories(${PYTHON_INCLUDE_DIRS})
|
||||
include(${TOP_DIR}/cmake/external_libs/eigen.cmake)
|
||||
include(${TOP_DIR}/cmake/external_libs/protobuf.cmake)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
if (NOT DEFINED ENV{ANDROID_NDK})
|
||||
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
if(NOT DEFINED ENV{ANDROID_NDK})
|
||||
message(FATAL_ERROR "env ANDROID_NDK should be setted for ARM compile")
|
||||
endif ()
|
||||
endif()
|
||||
add_compile_definitions(ENABLE_ARM)
|
||||
endif ()
|
||||
if (PLATFORM_ARM32)
|
||||
endif()
|
||||
if(PLATFORM_ARM32)
|
||||
add_definitions(-mfloat-abi=softfp -mfpu=neon)
|
||||
add_compile_definitions(ENABLE_ARM32)
|
||||
endif ()
|
||||
if (PLATFORM_ARM64)
|
||||
endif()
|
||||
if(PLATFORM_ARM64)
|
||||
add_compile_definitions(ENABLE_ARM64)
|
||||
if (ENABLE_FP16)
|
||||
if(ENABLE_FP16)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
|
||||
endif ()
|
||||
endif ()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
if (ENABLE_CONVERTER)
|
||||
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
if(ENABLE_CONVERTER)
|
||||
set(BUILD_MINDDATA "off")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
|
||||
if ("${X86_64_SIMD}" STREQUAL "sse")
|
||||
if(NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
|
||||
if("${X86_64_SIMD}" STREQUAL "sse")
|
||||
add_compile_definitions(ENABLE_SSE)
|
||||
endif ()
|
||||
if ("${X86_64_SIMD}" STREQUAL "avx")
|
||||
endif()
|
||||
if("${X86_64_SIMD}" STREQUAL "avx")
|
||||
add_compile_definitions(ENABLE_SSE)
|
||||
add_compile_definitions(ENABLE_AVX)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx -mfma")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx -mfma")
|
||||
endif ()
|
||||
endif ()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
|
||||
if(BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
|
||||
add_compile_definitions(ENABLE_ANDROID)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if (BUILD_MINDDATA STREQUAL "lite_cv")
|
||||
if(BUILD_MINDDATA STREQUAL "lite_cv")
|
||||
add_compile_definitions(ENABLE_ANDROID)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl)
|
||||
if (ENABLE_TOOLS)
|
||||
if(ENABLE_TOOLS)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
|
||||
if (SUPPORT_TRAIN)
|
||||
if(SUPPORT_TRAIN)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark_train)
|
||||
endif ()
|
||||
endif ()
|
||||
if (NOT WIN32)
|
||||
if (ENABLE_TOOLS)
|
||||
if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
|
||||
endif()
|
||||
endif()
|
||||
if(NOT WIN32)
|
||||
if(ENABLE_TOOLS)
|
||||
if(NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/schema_gen)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/cropper)
|
||||
endif ()
|
||||
endif ()
|
||||
if (BUILD_TESTCASES)
|
||||
endif()
|
||||
endif()
|
||||
if(BUILD_TESTCASES)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test)
|
||||
endif ()
|
||||
endif ()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(${TOP_DIR}/cmake/package_lite.cmake)
|
||||
|
||||
|
|
|
@ -1,23 +1,20 @@
|
|||
add_compile_definitions(USE_ANDROID_LOG)
|
||||
if (ENABLE_V0)
|
||||
add_definitions(-DENABLE_V0)
|
||||
if(ENABLE_V0)
|
||||
add_definitions(-DENABLE_V0)
|
||||
endif()
|
||||
set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
|
||||
include_directories(${LITE_DIR}/nnacl/)
|
||||
include_directories(${LITE_DIR}/nnacl/optimize)
|
||||
|
||||
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
#for performance
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
if (SUPPORT_GPU)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti")
|
||||
else ()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions")
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections \
|
||||
-fdata-sections -ffast-math -fno-rtti -fno-exceptions")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections \
|
||||
-fdata-sections -ffast-math -fno-rtti -fno-exceptions")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(LITE_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc
|
||||
|
@ -42,7 +39,7 @@ set(LITE_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/dequant.cc
|
||||
)
|
||||
|
||||
if (SUPPORT_GPU)
|
||||
if(SUPPORT_GPU)
|
||||
set(LITE_SRC
|
||||
${LITE_SRC}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/opencl_kernel.cc
|
||||
|
@ -54,10 +51,10 @@ if (SUPPORT_GPU)
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_runtime.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_wrapper.cc
|
||||
)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
|
||||
if (SUPPORT_TRAIN)
|
||||
if(SUPPORT_TRAIN)
|
||||
set(ANF_SRC
|
||||
${ANF_SRC}
|
||||
)
|
||||
|
@ -70,7 +67,7 @@ if (SUPPORT_TRAIN)
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/train/train_model.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc
|
||||
)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
add_subdirectory(ops)
|
||||
add_subdirectory(runtime/kernel/arm)
|
||||
|
@ -85,53 +82,54 @@ set_target_properties(mindspore-lite_static PROPERTIES OUTPUT_NAME "mindspore-li
|
|||
set_target_properties(mindspore-lite_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
|
||||
if (SUPPORT_GPU)
|
||||
if(SUPPORT_GPU)
|
||||
add_subdirectory(runtime/kernel/opencl)
|
||||
target_link_libraries(mindspore-lite cpu_kernel_mid opencl_kernel_mid nnacl cpu_ops_mid)
|
||||
target_link_libraries(mindspore-lite_static cpu_kernel_mid opencl_kernel_mid nnacl_mid cpu_ops_mid)
|
||||
else ()
|
||||
else()
|
||||
target_link_libraries(mindspore-lite cpu_kernel_mid nnacl cpu_ops_mid)
|
||||
target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl_mid cpu_ops_mid)
|
||||
endif ()
|
||||
if (SUPPORT_NPU)
|
||||
endif()
|
||||
if(SUPPORT_NPU)
|
||||
add_subdirectory(runtime/agent/npu)
|
||||
include_directories(${DDK_PATH})
|
||||
target_link_libraries(mindspore-lite npu_kernel_mid)
|
||||
target_link_libraries(mindspore-lite_static npu_kernel_mid)
|
||||
endif ()
|
||||
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
endif()
|
||||
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
target_link_libraries(mindspore-lite log)
|
||||
target_link_libraries(mindspore-lite_static log)
|
||||
endif ()
|
||||
if (BUILD_MINDDATA STREQUAL "lite")
|
||||
endif()
|
||||
if(BUILD_MINDDATA STREQUAL "lite")
|
||||
target_link_libraries(mindspore-lite minddata_eager_mid minddata-lite)
|
||||
target_link_libraries(mindspore-lite_static minddata_eager_mid)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND PLATFORM_ARM)
|
||||
add_custom_command(TARGET mindspore-lite POST_BUILD
|
||||
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
|
||||
if(PLATFORM_ARM)
|
||||
set(NDK_STRIP
|
||||
"${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip")
|
||||
endif()
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND PLATFORM_ARM)
|
||||
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND ${NDK_STRIP}
|
||||
${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
if (PLATFORM_ARM)
|
||||
add_custom_command(TARGET mindspore-lite POST_BUILD
|
||||
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
if(PLATFORM_ARM)
|
||||
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND ${NDK_STRIP}
|
||||
${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
|
||||
elseif (NOT WIN32)
|
||||
add_custom_command(TARGET mindspore-lite POST_BUILD
|
||||
COMMAND strip ${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
|
||||
endif ()
|
||||
endif ()
|
||||
elseif(NOT WIN32)
|
||||
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND strip ${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
########################## build optimize and float16 library #################################3
|
||||
if (PLATFORM_ARM64)
|
||||
########################## build optimize and float16 library #################################
|
||||
if(PLATFORM_ARM64)
|
||||
target_link_libraries(mindspore-lite cpu_opt_kernel_mid nnacl_optimize_mid)
|
||||
target_link_libraries(mindspore-lite_static cpu_opt_kernel_mid nnacl_optimize_mid)
|
||||
if (ENABLE_FP16)
|
||||
if(ENABLE_FP16)
|
||||
target_link_libraries(mindspore-lite cpu_fp16_kernel_mid nnacl_fp16_mid)
|
||||
target_link_libraries(mindspore-lite_static cpu_fp16_kernel_mid nnacl_fp16_mid)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -94,11 +94,14 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector<size_t> &img
|
|||
MS_ASSERT(buffer);
|
||||
MS_ASSERT(image);
|
||||
MS_ASSERT(img_size.size() == 3);
|
||||
cl::ImageFormat image_format(CL_RGBA, img_size[2]);
|
||||
if (data == nullptr) {
|
||||
*image = new (std::nothrow)
|
||||
cl::Image2D(*ocl_runtime_->Context(), image_format, **buffer, img_size[0], img_size[1], 0, &ret);
|
||||
// copy from cl2.hpp
|
||||
cl_image_desc desc = {CL_MEM_OBJECT_IMAGE2D, img_size[0], img_size[1], 0, 0, 0, 0, 0, 0, (**buffer).get()};
|
||||
const cl::Context &context = *ocl_runtime_->Context();
|
||||
cl_image_format image_format{CL_RGBA, static_cast<uint32_t>(img_size[2])};
|
||||
*image = new (std::nothrow) cl::Image2D(clCreateImage(context.get(), 0, &image_format, &desc, nullptr, &ret));
|
||||
} else {
|
||||
cl::ImageFormat image_format(CL_RGBA, img_size[2]);
|
||||
*image = new (std::nothrow) cl::Image2D(*ocl_runtime_->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
image_format, img_size[0], img_size[1], 0, data, &ret);
|
||||
}
|
||||
|
|
|
@ -562,7 +562,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
|
|||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_;
|
||||
}
|
||||
return command_queue->enqueueMapSVM(host_ptr, sync, flags, size);
|
||||
return clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr);
|
||||
}
|
||||
|
||||
void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> ®ion,
|
||||
|
@ -591,7 +591,7 @@ int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue)
|
|||
if (command_queue == nullptr) {
|
||||
command_queue = default_command_queue_;
|
||||
}
|
||||
return command_queue->enqueueUnmapSVM(host_ptr);
|
||||
return clEnqueueSVMUnmap(command_queue->get(), host_ptr, 0, nullptr, nullptr);
|
||||
}
|
||||
|
||||
bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {
|
||||
|
|
|
@ -83,7 +83,7 @@ class OpenCLRuntime {
|
|||
auto svm_capabilities = GetSVMCapabilities();
|
||||
if (svm_capabilities) {
|
||||
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value;
|
||||
return kernel.setArg(index, value);
|
||||
return clSetKernelArgSVMPointer(kernel.get(), index, value);
|
||||
}
|
||||
cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
|
||||
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;
|
||||
|
|
|
@ -142,13 +142,13 @@ bool LoadLibraryFromPath(const std::string &library_path, void **handle_ptr) {
|
|||
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyImage);
|
||||
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyBufferToImage);
|
||||
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyImageToBuffer);
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
|
||||
#if CL_TARGET_OPENCL_VERSION >= 120
|
||||
LOAD_OPENCL_FUNCTION_PTR(clRetainDevice);
|
||||
LOAD_OPENCL_FUNCTION_PTR(clReleaseDevice);
|
||||
LOAD_OPENCL_FUNCTION_PTR(clCreateImage);
|
||||
LOAD_OPENCL_FUNCTION_PTR(clEnqueueFillImage);
|
||||
#endif
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200
|
||||
LOAD_OPENCL_FUNCTION_PTR(clCreateCommandQueueWithProperties);
|
||||
LOAD_OPENCL_FUNCTION_PTR(clGetExtensionFunctionAddress);
|
||||
LOAD_OPENCL_FUNCTION_PTR(clSVMAlloc);
|
||||
|
@ -232,13 +232,13 @@ CL_DEFINE_FUNC_PTR(clGetEventProfilingInfo);
|
|||
CL_DEFINE_FUNC_PTR(clGetImageInfo);
|
||||
CL_DEFINE_FUNC_PTR(clEnqueueCopyBufferToImage);
|
||||
CL_DEFINE_FUNC_PTR(clEnqueueCopyImageToBuffer);
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
|
||||
#if CL_TARGET_OPENCL_VERSION >= 120
|
||||
CL_DEFINE_FUNC_PTR(clRetainDevice);
|
||||
CL_DEFINE_FUNC_PTR(clReleaseDevice);
|
||||
CL_DEFINE_FUNC_PTR(clCreateImage);
|
||||
CL_DEFINE_FUNC_PTR(clEnqueueFillImage);
|
||||
#endif
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200
|
||||
CL_DEFINE_FUNC_PTR(clGetKernelSubGroupInfoKHR);
|
||||
CL_DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties);
|
||||
CL_DEFINE_FUNC_PTR(clGetExtensionFunctionAddress);
|
||||
|
@ -651,7 +651,7 @@ cl_int clEnqueueCopyImageToBuffer(cl_command_queue command_queue, cl_mem src_ima
|
|||
event_wait_list, event);
|
||||
}
|
||||
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
|
||||
#if CL_TARGET_OPENCL_VERSION >= 120
|
||||
|
||||
// clRetainDevice wrapper, use OpenCLWrapper function.
|
||||
cl_int clRetainDevice(cl_device_id device) {
|
||||
|
@ -685,7 +685,7 @@ cl_int clEnqueueFillImage(cl_command_queue command_queue, cl_mem image, const vo
|
|||
|
||||
#endif
|
||||
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200
|
||||
|
||||
// clCreateCommandQueueWithProperties wrapper, use OpenCLWrapper function.
|
||||
cl_command_queue clCreateCommandQueueWithProperties(cl_context context, cl_device_id device,
|
||||
|
|
|
@ -110,7 +110,7 @@ using clEnqueueCopyBufferToImageFunc = cl_int(CL_API_CALL *)(cl_command_queue, c
|
|||
using clEnqueueCopyImageToBufferFunc = cl_int(CL_API_CALL *)(cl_command_queue, cl_mem, cl_mem, const size_t *,
|
||||
const size_t *, size_t, cl_uint, const cl_event *,
|
||||
cl_event *);
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
|
||||
#if CL_TARGET_OPENCL_VERSION >= 120
|
||||
using clRetainDeviceFunc = cl_int (*)(cl_device_id);
|
||||
using clReleaseDeviceFunc = cl_int (*)(cl_device_id);
|
||||
using clCreateImageFunc = cl_mem (*)(cl_context, cl_mem_flags, const cl_image_format *, const cl_image_desc *, void *,
|
||||
|
@ -118,7 +118,7 @@ using clCreateImageFunc = cl_mem (*)(cl_context, cl_mem_flags, const cl_image_fo
|
|||
using clEnqueueFillImageFunc = cl_int (*)(cl_command_queue, cl_mem, const void *, const size_t *, const size_t *,
|
||||
cl_uint, const cl_event *, cl_event *);
|
||||
#endif
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200
|
||||
using clCreateProgramWithILFunc = cl_program (*)(cl_context, const void *, size_t, cl_int *);
|
||||
using clSVMAllocFunc = void *(*)(cl_context, cl_mem_flags, size_t size, cl_uint);
|
||||
using clSVMFreeFunc = void (*)(cl_context, void *);
|
||||
|
@ -185,13 +185,13 @@ CL_DECLARE_FUNC_PTR(clGetEventProfilingInfo);
|
|||
CL_DECLARE_FUNC_PTR(clGetImageInfo);
|
||||
CL_DECLARE_FUNC_PTR(clEnqueueCopyBufferToImage);
|
||||
CL_DECLARE_FUNC_PTR(clEnqueueCopyImageToBuffer);
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
|
||||
#if CL_TARGET_OPENCL_VERSION >= 120
|
||||
CL_DECLARE_FUNC_PTR(clRetainDevice);
|
||||
CL_DECLARE_FUNC_PTR(clReleaseDevice);
|
||||
CL_DECLARE_FUNC_PTR(clCreateImage);
|
||||
CL_DECLARE_FUNC_PTR(clEnqueueFillImage);
|
||||
#endif
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200
|
||||
CL_DECLARE_FUNC_PTR(clGetKernelSubGroupInfoKHR);
|
||||
CL_DECLARE_FUNC_PTR(clCreateCommandQueueWithProperties);
|
||||
CL_DECLARE_FUNC_PTR(clGetExtensionFunctionAddress);
|
||||
|
|
Loading…
Reference in New Issue