!11276 【MS】【LITE】【GPU】 reduce opencl so size 0.5M

From: @wangdongxu6
Reviewed-by: @ddwsky
Signed-off-by: @ddwsky
This commit is contained in:
mindspore-ci-bot 2021-01-15 14:04:16 +08:00 committed by Gitee
commit 8e9086bbe6
7 changed files with 160 additions and 154 deletions

View File

@ -1,9 +1,9 @@
cmake_minimum_required(VERSION 3.14)
project (Lite)
project(Lite)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0)
message(FATAL_ERROR "GCC vesion ${CMAKE_CXX_COMPILER_VERSION} must not be less than 7.3.0")
endif ()
endif()
option(MS_VERSION_MAJOR "major version" 0)
option(MS_VERSION_MINOR "minor version" 7)
@ -28,12 +28,14 @@ set(DIR_PREFIX mindspore-lite)
set(MS_VERSION ${MS_VERSION_MAJOR}.${MS_VERSION_MINOR}.${MS_VERSION_REVISION})
set(MAIN_DIR ${DIR_PREFIX}-${MS_VERSION})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} \
-DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} \
-DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv, wrapper or full")
set(BUILD_LITE "on")
set(PLATFORM_ARM "off")
if (PLATFORM_ARM64 OR PLATFORM_ARM32)
if(PLATFORM_ARM64 OR PLATFORM_ARM32)
set(PLATFORM_ARM "on")
#set for cross-compiling toolchain
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
@ -41,19 +43,19 @@ if (PLATFORM_ARM64 OR PLATFORM_ARM32)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
endif()
if (SUPPORT_GPU)
if(SUPPORT_GPU)
set(PROCESS_UNIT gpu)
elseif (SUPPORT_NPU)
elseif(SUPPORT_NPU)
set(PROCESS_UNIT npu)
else ()
else()
set(PROCESS_UNIT cpu)
endif ()
endif()
if (SUPPORT_NPU)
if(SUPPORT_NPU)
set(DDK_PATH "$ENV{HWHIAI_DDK}/ddk/ai_ddk_lib")
if (PLATFORM_ARM64)
if(PLATFORM_ARM64)
set(DDK_LIB_PATH ${DDK_PATH}/lib64)
elseif (PLATFORM_ARM32)
elseif(PLATFORM_ARM32)
set(DDK_LIB_PATH ${DDK_PATH}/lib)
endif()
add_compile_definitions(SUPPORT_NPU)
@ -62,39 +64,39 @@ endif()
add_compile_definitions(NO_DLIB)
add_compile_options(-fPIC)
if (SUPPORT_TRAIN)
if (PLATFORM_ARM64)
if(SUPPORT_TRAIN)
if(PLATFORM_ARM64)
set(RUNTIME_COMPONENT_NAME train-android-aarch64)
elseif (PLATFORM_ARM32)
elseif(PLATFORM_ARM32)
set(RUNTIME_COMPONENT_NAME train-android-aarch32)
elseif (WIN32)
elseif(WIN32)
set(RUNTIME_COMPONENT_NAME train-win-x64)
set(CONVERTER_COMPONENT_NAME train-converter-win-x64)
else ()
else()
set(RUNTIME_COMPONENT_NAME train-linux-x64)
set(CONVERTER_COMPONENT_NAME train-converter-linux-x64)
endif ()
else ()
if (PLATFORM_ARM64)
endif()
else()
if(PLATFORM_ARM64)
set(RUNTIME_COMPONENT_NAME inference-android-aarch64)
elseif (PLATFORM_ARM32)
elseif(PLATFORM_ARM32)
set(RUNTIME_COMPONENT_NAME inference-android-aarch32)
elseif (WIN32)
if ("${X86_64_SIMD}" STREQUAL "off")
elseif(WIN32)
if("${X86_64_SIMD}" STREQUAL "off")
set(RUNTIME_COMPONENT_NAME inference-win-x64)
else ()
else()
set(RUNTIME_COMPONENT_NAME inference-win-x64-${X86_64_SIMD})
endif()
set(CONVERTER_COMPONENT_NAME converter-win-x64)
else ()
if ("${X86_64_SIMD}" STREQUAL "off")
else()
if("${X86_64_SIMD}" STREQUAL "off")
set(RUNTIME_COMPONENT_NAME inference-linux-x64)
else ()
else()
set(RUNTIME_COMPONENT_NAME inference-linux-x64-${X86_64_SIMD})
endif()
set(CONVERTER_COMPONENT_NAME converter-linux-x64)
endif()
endif ()
endif()
string(REPLACE "/mindspore/lite" "" TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(CORE_DIR ${TOP_DIR}/mindspore/core)
@ -112,140 +114,143 @@ include(${TOP_DIR}/cmake/utils.cmake)
include(${TOP_DIR}/cmake/dependency_utils.cmake)
include(${TOP_DIR}/cmake/dependency_securec.cmake)
include(${TOP_DIR}/cmake/external_libs/flatbuffers.cmake)
if (SUPPORT_GPU)
if(SUPPORT_GPU)
include(${TOP_DIR}/cmake/external_libs/opencl.cmake)
endif()
if (ENABLE_CONVERTER OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
if(ENABLE_CONVERTER OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
include(${TOP_DIR}/cmake/external_libs/json.cmake)
endif()
file(GLOB FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/*.fbs)
ms_build_flatbuffers_lite(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/ fbs_src ${CMAKE_BINARY_DIR}/schema "")
ms_build_flatbuffers_lite(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/ fbs_inner_src ${CMAKE_BINARY_DIR}/schema/inner "inner")
ms_build_flatbuffers_lite(FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/schema/ fbs_inner_src ${CMAKE_BINARY_DIR}/schema/inner
"inner")
string(REPLACE "-g" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
string(REPLACE "-g" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDebug -g")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDebug -g")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default")
else ()
else()
## enable for binscope for release
set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations -Wno-missing-braces ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}")
if (NOT WIN32)
set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
-Wno-deprecated-declarations -Wno-missing-braces ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
-Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}")
if(NOT WIN32)
set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}")
endif ()
endif ()
if (ENABLE_VERBOSE)
endif()
endif()
if(ENABLE_VERBOSE)
set(CMAKE_VERBOSE_MAKEFILE on)
endif ()
if (SUPPORT_TRAIN)
endif()
if(SUPPORT_TRAIN)
add_compile_definitions(SUPPORT_TRAIN)
endif ()
if (ENABLE_NEON)
endif()
if(ENABLE_NEON)
add_compile_definitions(ENABLE_NEON)
endif ()
if (ENABLE_FP16)
endif()
if(ENABLE_FP16)
add_compile_definitions(ENABLE_FP16)
endif ()
if (SUPPORT_GPU)
endif()
if(SUPPORT_GPU)
gene_opencl(${CMAKE_CURRENT_SOURCE_DIR})
add_definitions(-DUSE_OPENCL_WRAPPER)
add_definitions(-DMS_OPENCL_PROFILE=false)
add_definitions(-DCL_TARGET_OPENCL_VERSION=200)
add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=200)
add_definitions(-DCL_HPP_MINIMUM_OPENCL_VERSION=110)
add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=120)
add_definitions(-DCL_HPP_MINIMUM_OPENCL_VERSION=120)
add_compile_definitions(SUPPORT_GPU)
if (OFFLINE_COMPILE)
if(OFFLINE_COMPILE)
add_compile_definitions(PROGRAM_WITH_IL)
endif ()
endif()
include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-headers-src/)
include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-clhpp-src/include)
endif ()
endif()
if (WIN32)
if(WIN32)
add_compile_definitions(LITE_EXPORTS)
add_compile_definitions(BUILDING_DLL)
endif ()
endif()
if (ENABLE_CONVERTER)
if (PLATFORM_ARM)
if(ENABLE_CONVERTER)
if(PLATFORM_ARM)
MESSAGE(FATAL_ERROR "Cannot build converter in arm platform")
endif ()
endif()
include_directories(${PYTHON_INCLUDE_DIRS})
include(${TOP_DIR}/cmake/external_libs/eigen.cmake)
include(${TOP_DIR}/cmake/external_libs/protobuf.cmake)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter)
endif ()
endif()
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
if (NOT DEFINED ENV{ANDROID_NDK})
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
if(NOT DEFINED ENV{ANDROID_NDK})
message(FATAL_ERROR "env ANDROID_NDK should be setted for ARM compile")
endif ()
endif()
add_compile_definitions(ENABLE_ARM)
endif ()
if (PLATFORM_ARM32)
endif()
if(PLATFORM_ARM32)
add_definitions(-mfloat-abi=softfp -mfpu=neon)
add_compile_definitions(ENABLE_ARM32)
endif ()
if (PLATFORM_ARM64)
endif()
if(PLATFORM_ARM64)
add_compile_definitions(ENABLE_ARM64)
if (ENABLE_FP16)
if(ENABLE_FP16)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
endif ()
endif ()
endif()
endif()
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
if (ENABLE_CONVERTER)
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
if(ENABLE_CONVERTER)
set(BUILD_MINDDATA "off")
endif()
endif()
if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
if ("${X86_64_SIMD}" STREQUAL "sse")
if(NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
if("${X86_64_SIMD}" STREQUAL "sse")
add_compile_definitions(ENABLE_SSE)
endif ()
if ("${X86_64_SIMD}" STREQUAL "avx")
endif()
if("${X86_64_SIMD}" STREQUAL "avx")
add_compile_definitions(ENABLE_SSE)
add_compile_definitions(ENABLE_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx -mfma")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx -mfma")
endif ()
endif ()
endif()
endif()
if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
if(BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
add_compile_definitions(ENABLE_ANDROID)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata)
endif ()
endif()
if (BUILD_MINDDATA STREQUAL "lite_cv")
if(BUILD_MINDDATA STREQUAL "lite_cv")
add_compile_definitions(ENABLE_ANDROID)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata)
endif ()
endif()
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl)
if (ENABLE_TOOLS)
if(ENABLE_TOOLS)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
if (SUPPORT_TRAIN)
if(SUPPORT_TRAIN)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark_train)
endif ()
endif ()
if (NOT WIN32)
if (ENABLE_TOOLS)
if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
endif()
endif()
if(NOT WIN32)
if(ENABLE_TOOLS)
if(NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/schema_gen)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/cropper)
endif ()
endif ()
if (BUILD_TESTCASES)
endif()
endif()
if(BUILD_TESTCASES)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test)
endif ()
endif ()
endif()
endif()
include(${TOP_DIR}/cmake/package_lite.cmake)

View File

@ -1,23 +1,20 @@
add_compile_definitions(USE_ANDROID_LOG)
if (ENABLE_V0)
add_definitions(-DENABLE_V0)
if(ENABLE_V0)
add_definitions(-DENABLE_V0)
endif()
set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
include_directories(${LITE_DIR}/nnacl/)
include_directories(${LITE_DIR}/nnacl/optimize)
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
#for performance
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
if (SUPPORT_GPU)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti")
else ()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math -fno-rtti -fno-exceptions")
endif ()
endif ()
endif ()
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections \
-fdata-sections -ffast-math -fno-rtti -fno-exceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections \
-fdata-sections -ffast-math -fno-rtti -fno-exceptions")
endif()
endif()
set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc
@ -42,7 +39,7 @@ set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/dequant.cc
)
if (SUPPORT_GPU)
if(SUPPORT_GPU)
set(LITE_SRC
${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/opencl_kernel.cc
@ -54,10 +51,10 @@ if (SUPPORT_GPU)
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_runtime.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_wrapper.cc
)
endif ()
endif()
if (SUPPORT_TRAIN)
if(SUPPORT_TRAIN)
set(ANF_SRC
${ANF_SRC}
)
@ -70,7 +67,7 @@ if (SUPPORT_TRAIN)
${CMAKE_CURRENT_SOURCE_DIR}/train/train_model.cc
${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc
)
endif ()
endif()
add_subdirectory(ops)
add_subdirectory(runtime/kernel/arm)
@ -85,53 +82,54 @@ set_target_properties(mindspore-lite_static PROPERTIES OUTPUT_NAME "mindspore-li
set_target_properties(mindspore-lite_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
if (SUPPORT_GPU)
if(SUPPORT_GPU)
add_subdirectory(runtime/kernel/opencl)
target_link_libraries(mindspore-lite cpu_kernel_mid opencl_kernel_mid nnacl cpu_ops_mid)
target_link_libraries(mindspore-lite_static cpu_kernel_mid opencl_kernel_mid nnacl_mid cpu_ops_mid)
else ()
else()
target_link_libraries(mindspore-lite cpu_kernel_mid nnacl cpu_ops_mid)
target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl_mid cpu_ops_mid)
endif ()
if (SUPPORT_NPU)
endif()
if(SUPPORT_NPU)
add_subdirectory(runtime/agent/npu)
include_directories(${DDK_PATH})
target_link_libraries(mindspore-lite npu_kernel_mid)
target_link_libraries(mindspore-lite_static npu_kernel_mid)
endif ()
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
endif()
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
target_link_libraries(mindspore-lite log)
target_link_libraries(mindspore-lite_static log)
endif ()
if (BUILD_MINDDATA STREQUAL "lite")
endif()
if(BUILD_MINDDATA STREQUAL "lite")
target_link_libraries(mindspore-lite minddata_eager_mid minddata-lite)
target_link_libraries(mindspore-lite_static minddata_eager_mid)
endif ()
endif()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND PLATFORM_ARM)
add_custom_command(TARGET mindspore-lite POST_BUILD
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
if(PLATFORM_ARM)
set(NDK_STRIP
"${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip")
endif()
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND PLATFORM_ARM)
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND ${NDK_STRIP}
${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
endif ()
endif()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
if (PLATFORM_ARM)
add_custom_command(TARGET mindspore-lite POST_BUILD
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
if(PLATFORM_ARM)
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND ${NDK_STRIP}
${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
elseif (NOT WIN32)
add_custom_command(TARGET mindspore-lite POST_BUILD
COMMAND strip ${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
endif ()
endif ()
elseif(NOT WIN32)
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND strip ${CMAKE_BINARY_DIR}/src/libmindspore-lite.so)
endif()
endif()
########################## build optimize and float16 library #################################3
if (PLATFORM_ARM64)
########################## build optimize and float16 library #################################
if(PLATFORM_ARM64)
target_link_libraries(mindspore-lite cpu_opt_kernel_mid nnacl_optimize_mid)
target_link_libraries(mindspore-lite_static cpu_opt_kernel_mid nnacl_optimize_mid)
if (ENABLE_FP16)
if(ENABLE_FP16)
target_link_libraries(mindspore-lite cpu_fp16_kernel_mid nnacl_fp16_mid)
target_link_libraries(mindspore-lite_static cpu_fp16_kernel_mid nnacl_fp16_mid)
endif ()
endif ()
endif()
endif()

View File

@ -94,11 +94,14 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector<size_t> &img
MS_ASSERT(buffer);
MS_ASSERT(image);
MS_ASSERT(img_size.size() == 3);
cl::ImageFormat image_format(CL_RGBA, img_size[2]);
if (data == nullptr) {
*image = new (std::nothrow)
cl::Image2D(*ocl_runtime_->Context(), image_format, **buffer, img_size[0], img_size[1], 0, &ret);
// copy from cl2.hpp
cl_image_desc desc = {CL_MEM_OBJECT_IMAGE2D, img_size[0], img_size[1], 0, 0, 0, 0, 0, 0, (**buffer).get()};
const cl::Context &context = *ocl_runtime_->Context();
cl_image_format image_format{CL_RGBA, static_cast<uint32_t>(img_size[2])};
*image = new (std::nothrow) cl::Image2D(clCreateImage(context.get(), 0, &image_format, &desc, nullptr, &ret));
} else {
cl::ImageFormat image_format(CL_RGBA, img_size[2]);
*image = new (std::nothrow) cl::Image2D(*ocl_runtime_->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
image_format, img_size[0], img_size[1], 0, data, &ret);
}

View File

@ -562,7 +562,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command
if (command_queue == nullptr) {
command_queue = default_command_queue_;
}
return command_queue->enqueueMapSVM(host_ptr, sync, flags, size);
return clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr);
}
void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
@ -591,7 +591,7 @@ int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue)
if (command_queue == nullptr) {
command_queue = default_command_queue_;
}
return command_queue->enqueueUnmapSVM(host_ptr);
return clEnqueueSVMUnmap(command_queue->get(), host_ptr, 0, nullptr, nullptr);
}
bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) {

View File

@ -83,7 +83,7 @@ class OpenCLRuntime {
auto svm_capabilities = GetSVMCapabilities();
if (svm_capabilities) {
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] SVM pointer " << value;
return kernel.setArg(index, value);
return clSetKernelArgSVMPointer(kernel.get(), index, value);
}
cl::Buffer *buffer = reinterpret_cast<cl::Buffer *>(allocator_->GetBuffer(value));
MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value;

View File

@ -142,13 +142,13 @@ bool LoadLibraryFromPath(const std::string &library_path, void **handle_ptr) {
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyImage);
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyBufferToImage);
LOAD_OPENCL_FUNCTION_PTR(clEnqueueCopyImageToBuffer);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
#if CL_TARGET_OPENCL_VERSION >= 120
LOAD_OPENCL_FUNCTION_PTR(clRetainDevice);
LOAD_OPENCL_FUNCTION_PTR(clReleaseDevice);
LOAD_OPENCL_FUNCTION_PTR(clCreateImage);
LOAD_OPENCL_FUNCTION_PTR(clEnqueueFillImage);
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if CL_TARGET_OPENCL_VERSION >= 200
LOAD_OPENCL_FUNCTION_PTR(clCreateCommandQueueWithProperties);
LOAD_OPENCL_FUNCTION_PTR(clGetExtensionFunctionAddress);
LOAD_OPENCL_FUNCTION_PTR(clSVMAlloc);
@ -232,13 +232,13 @@ CL_DEFINE_FUNC_PTR(clGetEventProfilingInfo);
CL_DEFINE_FUNC_PTR(clGetImageInfo);
CL_DEFINE_FUNC_PTR(clEnqueueCopyBufferToImage);
CL_DEFINE_FUNC_PTR(clEnqueueCopyImageToBuffer);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
#if CL_TARGET_OPENCL_VERSION >= 120
CL_DEFINE_FUNC_PTR(clRetainDevice);
CL_DEFINE_FUNC_PTR(clReleaseDevice);
CL_DEFINE_FUNC_PTR(clCreateImage);
CL_DEFINE_FUNC_PTR(clEnqueueFillImage);
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if CL_TARGET_OPENCL_VERSION >= 200
CL_DEFINE_FUNC_PTR(clGetKernelSubGroupInfoKHR);
CL_DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties);
CL_DEFINE_FUNC_PTR(clGetExtensionFunctionAddress);
@ -651,7 +651,7 @@ cl_int clEnqueueCopyImageToBuffer(cl_command_queue command_queue, cl_mem src_ima
event_wait_list, event);
}
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
#if CL_TARGET_OPENCL_VERSION >= 120
// clRetainDevice wrapper, use OpenCLWrapper function.
cl_int clRetainDevice(cl_device_id device) {
@ -685,7 +685,7 @@ cl_int clEnqueueFillImage(cl_command_queue command_queue, cl_mem image, const vo
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if CL_TARGET_OPENCL_VERSION >= 200
// clCreateCommandQueueWithProperties wrapper, use OpenCLWrapper function.
cl_command_queue clCreateCommandQueueWithProperties(cl_context context, cl_device_id device,

View File

@ -110,7 +110,7 @@ using clEnqueueCopyBufferToImageFunc = cl_int(CL_API_CALL *)(cl_command_queue, c
using clEnqueueCopyImageToBufferFunc = cl_int(CL_API_CALL *)(cl_command_queue, cl_mem, cl_mem, const size_t *,
const size_t *, size_t, cl_uint, const cl_event *,
cl_event *);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
#if CL_TARGET_OPENCL_VERSION >= 120
using clRetainDeviceFunc = cl_int (*)(cl_device_id);
using clReleaseDeviceFunc = cl_int (*)(cl_device_id);
using clCreateImageFunc = cl_mem (*)(cl_context, cl_mem_flags, const cl_image_format *, const cl_image_desc *, void *,
@ -118,7 +118,7 @@ using clCreateImageFunc = cl_mem (*)(cl_context, cl_mem_flags, const cl_image_fo
using clEnqueueFillImageFunc = cl_int (*)(cl_command_queue, cl_mem, const void *, const size_t *, const size_t *,
cl_uint, const cl_event *, cl_event *);
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if CL_TARGET_OPENCL_VERSION >= 200
using clCreateProgramWithILFunc = cl_program (*)(cl_context, const void *, size_t, cl_int *);
using clSVMAllocFunc = void *(*)(cl_context, cl_mem_flags, size_t size, cl_uint);
using clSVMFreeFunc = void (*)(cl_context, void *);
@ -185,13 +185,13 @@ CL_DECLARE_FUNC_PTR(clGetEventProfilingInfo);
CL_DECLARE_FUNC_PTR(clGetImageInfo);
CL_DECLARE_FUNC_PTR(clEnqueueCopyBufferToImage);
CL_DECLARE_FUNC_PTR(clEnqueueCopyImageToBuffer);
#if CL_HPP_TARGET_OPENCL_VERSION >= 120
#if CL_TARGET_OPENCL_VERSION >= 120
CL_DECLARE_FUNC_PTR(clRetainDevice);
CL_DECLARE_FUNC_PTR(clReleaseDevice);
CL_DECLARE_FUNC_PTR(clCreateImage);
CL_DECLARE_FUNC_PTR(clEnqueueFillImage);
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if CL_TARGET_OPENCL_VERSION >= 200
CL_DECLARE_FUNC_PTR(clGetKernelSubGroupInfoKHR);
CL_DECLARE_FUNC_PTR(clCreateCommandQueueWithProperties);
CL_DECLARE_FUNC_PTR(clGetExtensionFunctionAddress);