From fa8612922878120309bfb5c8395ac6f7e8c220c5 Mon Sep 17 00:00:00 2001 From: wandongdong Date: Tue, 5 Jan 2021 20:15:07 -0800 Subject: [PATCH] add cuda backend for lite --- build.sh | 15 +-- cmake/external_libs/vulkan.cmake | 41 +++++++ mindspore/lite/CMakeLists.txt | 22 +++- mindspore/lite/src/CMakeLists.txt | 40 +++++-- mindspore/lite/src/lite_session.cc | 25 +++- mindspore/lite/src/lite_session.h | 10 +- .../lite/src/runtime/gpu/cuda/cuda_device.cc | 21 ++++ .../lite/src/runtime/gpu/cuda/cuda_device.h | 32 ++++++ .../lite/src/runtime/gpu/cuda/cuda_runtime.cc | 80 +++++++++++++ .../lite/src/runtime/gpu/cuda/cuda_runtime.h | 59 ++++++++++ mindspore/lite/src/runtime/gpu/gpu_runtime.cc | 32 ++++++ mindspore/lite/src/runtime/gpu/gpu_runtime.h | 107 ++++++++++++++++++ .../{ => gpu}/opencl/opencl_allocator.cc | 4 +- .../{ => gpu}/opencl/opencl_allocator.h | 0 .../{ => gpu}/opencl/opencl_executor.cc | 6 +- .../{ => gpu}/opencl/opencl_executor.h | 6 +- .../{ => gpu}/opencl/opencl_runtime.cc | 25 ++-- .../runtime/{ => gpu}/opencl/opencl_runtime.h | 24 ++-- .../{ => gpu}/opencl/opencl_wrapper.cc | 2 +- .../runtime/{ => gpu}/opencl/opencl_wrapper.h | 32 +++--- .../src/runtime/kernel/cuda/CMakeLists.txt | 6 + .../src/runtime/kernel/cuda/cuda_kernel.cc | 22 ++++ .../src/runtime/kernel/cuda/cuda_kernel.h | 33 ++++++ .../src/runtime/kernel/cuda/cuda_subgraph.cc | 26 +++++ .../src/runtime/kernel/cuda/cuda_subgraph.h | 26 +++++ .../src/runtime/kernel/opencl/CMakeLists.txt | 11 +- .../runtime/kernel/opencl/opencl_fusion.cc | 2 +- .../src/runtime/kernel/opencl/opencl_kernel.h | 2 +- .../runtime/kernel/opencl/opencl_subgraph.cc | 2 +- .../runtime/kernel/opencl/opencl_subgraph.h | 4 +- .../lite/src/runtime/opencl/CMakeLists.txt | 11 -- mindspore/lite/src/scheduler.cc | 8 +- mindspore/lite/test/CMakeLists.txt | 37 ++++-- .../src/runtime/kernel/opencl/cast_tests.cc | 1 - .../src/runtime/kernel/opencl/fill_tests.cc | 1 - .../runtime/kernel/opencl/to_format_tests.cc | 1 - 36 files changed, 665 insertions(+), 111 deletions(-) create mode 100644 cmake/external_libs/vulkan.cmake create mode 100644 mindspore/lite/src/runtime/gpu/cuda/cuda_device.cc create mode 100644 mindspore/lite/src/runtime/gpu/cuda/cuda_device.h create mode 100644 mindspore/lite/src/runtime/gpu/cuda/cuda_runtime.cc create mode 100644 mindspore/lite/src/runtime/gpu/cuda/cuda_runtime.h create mode 100644 mindspore/lite/src/runtime/gpu/gpu_runtime.cc create mode 100644 mindspore/lite/src/runtime/gpu/gpu_runtime.h rename mindspore/lite/src/runtime/{ => gpu}/opencl/opencl_allocator.cc (99%) rename mindspore/lite/src/runtime/{ => gpu}/opencl/opencl_allocator.h (100%) rename mindspore/lite/src/runtime/{ => gpu}/opencl/opencl_executor.cc (92%) rename mindspore/lite/src/runtime/{ => gpu}/opencl/opencl_executor.h (87%) rename mindspore/lite/src/runtime/{ => gpu}/opencl/opencl_runtime.cc (97%) rename mindspore/lite/src/runtime/{ => gpu}/opencl/opencl_runtime.h (91%) rename mindspore/lite/src/runtime/{ => gpu}/opencl/opencl_wrapper.cc (99%) rename mindspore/lite/src/runtime/{ => gpu}/opencl/opencl_wrapper.h (86%) create mode 100644 mindspore/lite/src/runtime/kernel/cuda/CMakeLists.txt create mode 100644 mindspore/lite/src/runtime/kernel/cuda/cuda_kernel.cc create mode 100644 mindspore/lite/src/runtime/kernel/cuda/cuda_kernel.h create mode 100644 mindspore/lite/src/runtime/kernel/cuda/cuda_subgraph.cc create mode 100644 mindspore/lite/src/runtime/kernel/cuda/cuda_subgraph.h delete mode 100644 mindspore/lite/src/runtime/opencl/CMakeLists.txt diff --git a/build.sh b/build.sh index 5c9f9180de7..545aa966bb3 100755 --- a/build.sh +++ b/build.sh @@ -341,6 +341,7 @@ checkopts() # Parse device # Process build option if [[ "X$DEVICE" == "Xgpu" ]]; then + LITE_ENABLE_GPU="opencl" ENABLE_GPU="on" ENABLE_CPU="on" ENABLE_MPI="on" @@ -378,6 +379,12 @@ checkopts() ENABLE_CPU="on" elif [[ "X$DEVICE" == "Xcpu" ]]; then ENABLE_CPU="on" + elif [[ "X$DEVICE" == "Xopencl" ]]; then + LITE_ENABLE_GPU="opencl" + elif [[ "X$DEVICE" == "Xvulkan" ]]; then + LITE_ENABLE_GPU="vulkan" + elif [[ "X$DEVICE" == "Xcuda" ]]; then + LITE_ENABLE_GPU="cuda" elif [[ "X$DEVICE" == "X" ]]; then : else @@ -520,18 +527,12 @@ build_lite() get_version echo "============ Start building MindSpore Lite ${VERSION_STR} ============" - LITE_ENABLE_GPU=${ENABLE_GPU} LITE_ENABLE_NPU=${ENABLE_NPU} if [[ "${DEVICE}" == "" && "${LITE_PLATFORM}" == "arm64" ]]; then - LITE_ENABLE_GPU="on" + LITE_ENABLE_GPU="opencl" LITE_ENABLE_NPU="on" fi - if [[ $1 == "arm64" && "X$DEVICE" != "Xcpu" ]]; then - LITE_ENABLE_GPU="on" - echo "start get opencl" - fi - if [ "${LITE_ENABLE_NPU}" == "on" ]; then if [ "${LITE_PLATFORM}" == "arm64" ]; then checkddk diff --git a/cmake/external_libs/vulkan.cmake b/cmake/external_libs/vulkan.cmake new file mode 100644 index 00000000000..bcd4266a477 --- /dev/null +++ b/cmake/external_libs/vulkan.cmake @@ -0,0 +1,41 @@ +if(ENABLE_GITEE) + set(REQ_URL "https://gitee.com/mirrors/Vulkan-Headers/archive/v1.2.166.zip") + set(MD5 "8797a525aff953ea536ebe338a9f5ef6") + set(PKG_GIT_TAG "") + __download_pkg_with_git(Vulkan-Headers ${REQ_URL} ${PKG_GIT_TAG} ${MD5}) +else() + set(REQ_URL "https://github.com/KhronosGroup/Vulkan-Headers/archive/v1.2.166.zip") + set(MD5 "91eae880a0ad9ad77c89d79b95b7399a") + __download_pkg(Vulkan-Headers ${REQ_URL} ${MD5}) +endif() + +function(gene_spirv BASEPATH) + string(CONCAT CL_SRC_DIR "${BASEPATH}" "/src/runtime/kernel/vulkan/glsl") + message(STATUS "**********gene spirv*********base path: " "${BASEPATH}" ", glsl path: " "${CL_SRC_DIR}") + if(NOT EXISTS ${CL_SRC_DIR}) + return() + endif() + file(GLOB_RECURSE CL_LIST ${CL_SRC_DIR}/*.cl) + foreach(file_path ${CL_LIST}) + file(REMOVE ${file_path}.inc) + string(REGEX REPLACE ".+/(.+)\\..*" "\\1" kernel_name "${file_path}") + set(inc_file_ex "${kernel_name}.cl.inc") + execute_process( + COMMAND bash -c "sed 's/\\\\/\\\\\\\\/g' " + COMMAND bash -c "sed 's/\\\"/\\\\\\\"/g' " + COMMAND bash -c "sed 's/$/\\\\n\\\" \\\\/' " + COMMAND bash -c "sed 's/^/\\\"/' " + WORKING_DIRECTORY ${CL_SRC_DIR} + INPUT_FILE ${file_path} + OUTPUT_FILE ${inc_file_ex} + RESULT_VARIABLE RESULT) + if(NOT RESULT EQUAL "0") + message(FATAL_ERROR "error! when generate ${inc_file_ex}") + endif() + __exec_cmd(COMMAND sed -i + "1i\\static const char *${kernel_name}_source =\\\"\\\\n\\\" \\\\" + ${inc_file_ex} WORKING_DIRECTORY ${CL_SRC_DIR} + ) + __exec_cmd(COMMAND sed -i "$a\\\\\;" ${inc_file_ex} WORKING_DIRECTORY ${CL_SRC_DIR}) + endforeach() +endfunction() diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index aafaf77944e..3aada4bf43a 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -17,6 +17,9 @@ option(ENABLE_FP16 "if build fp16 ops" off) option(ENABLE_TOOLS "if build tools" on) option(BUILD_TESTCASES "if build testcase" on) option(SUPPORT_GPU "if support gpu" off) +option(GPU_OPENCL "if support gpu opencl" off) +option(GPU_VULKAN "if support gpu vulkan" off) +option(GPU_CUDA "if support gpu cuda" off) option(SUPPORT_NPU "if support npu" off) option(OFFLINE_COMPILE "if offline compile OpenCL kernel" off) option(BUILD_MINDDATA_EXAMPLE "" on) @@ -43,6 +46,7 @@ if(PLATFORM_ARM64 OR PLATFORM_ARM32) set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH) endif() +#if(BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") if(SUPPORT_GPU) set(PROCESS_UNIT gpu) elseif(SUPPORT_NPU) @@ -114,9 +118,12 @@ include(${TOP_DIR}/cmake/utils.cmake) include(${TOP_DIR}/cmake/dependency_utils.cmake) include(${TOP_DIR}/cmake/dependency_securec.cmake) include(${TOP_DIR}/cmake/external_libs/flatbuffers.cmake) -if(SUPPORT_GPU) +if(SUPPORT_GPU STREQUAL opencl) include(${TOP_DIR}/cmake/external_libs/opencl.cmake) endif() +if(SUPPORT_GPU STREQUAL vulkan) + include(${TOP_DIR}/cmake/external_libs/vulkan.cmake) +endif() if(ENABLE_CONVERTER OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") include(${TOP_DIR}/cmake/external_libs/json.cmake) @@ -157,7 +164,8 @@ endif() if(ENABLE_FP16) add_compile_definitions(ENABLE_FP16) endif() -if(SUPPORT_GPU) +if(SUPPORT_GPU STREQUAL opencl) + add_definitions(-DGPU_OPENCL) gene_opencl(${CMAKE_CURRENT_SOURCE_DIR}) add_definitions(-DUSE_OPENCL_WRAPPER) add_definitions(-DMS_OPENCL_PROFILE=false) @@ -171,6 +179,16 @@ if(SUPPORT_GPU) include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-headers-src/) include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-clhpp-src/include) endif() +if(SUPPORT_GPU STREQUAL vulkan) + add_definitions(-DGPU_VULKAN) + add_definitions(-DVK_NO_PROTOTYPES) + add_compile_definitions(SUPPORT_GPU) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/build/_deps/vulkan-headers-src/include) +endif() +if(SUPPORT_GPU STREQUAL cuda) + add_definitions(-DGPU_CUDA) + add_compile_definitions(SUPPORT_GPU) +endif() if(WIN32) add_compile_definitions(LITE_EXPORTS) diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt index d610f0df093..ec554a5bce1 100644 --- a/mindspore/lite/src/CMakeLists.txt +++ b/mindspore/lite/src/CMakeLists.txt @@ -40,21 +40,37 @@ set(LITE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc ) -if(SUPPORT_GPU) +if(SUPPORT_GPU STREQUAL opencl) + file(GLOB_RECURSE OPENCL_RUNTIME_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/runtime/gpu/opencl/*.cc + ) set(LITE_SRC ${LITE_SRC} - ${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/opencl_kernel.cc - ${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/opencl_subgraph.cc - ${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/opencl_fusion.cc - ${CMAKE_CURRENT_SOURCE_DIR}/runtime/kernel/opencl/utils.cc - ${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_executor.cc - ${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_allocator.cc - ${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_runtime.cc - ${CMAKE_CURRENT_SOURCE_DIR}/runtime/opencl/opencl_wrapper.cc + ${OPENCL_RUNTIME_SRC} ) endif() +if(SUPPORT_GPU STREQUAL vulkan) + file(GLOB VULKAN_RUNTIME_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/runtime/gpu/*.cc + ${CMAKE_CURRENT_SOURCE_DIR}/runtime/gpu/vulkan/*.cc + ) + set(LITE_SRC + ${LITE_SRC} + ${VULKAN_RUNTIME_SRC} + ) +endif() +if(SUPPORT_GPU STREQUAL cuda) + file(GLOB CUDA_RUNTIME_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/runtime/gpu/*.cc + ${CMAKE_CURRENT_SOURCE_DIR}/runtime/gpu/cuda/*.cc + ) + set(LITE_SRC + ${LITE_SRC} + ${CUDA_RUNTIME_SRC} + ) +endif() if(SUPPORT_TRAIN) set(ANF_SRC ${ANF_SRC} @@ -86,10 +102,14 @@ set_target_properties(mindspore-lite_static PROPERTIES OUTPUT_NAME "mindspore-li set_target_properties(mindspore-lite_static PROPERTIES CLEAN_DIRECT_OUTPUT 1) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field") -if(SUPPORT_GPU) +if(SUPPORT_GPU STREQUAL opencl) add_subdirectory(runtime/kernel/opencl) target_link_libraries(mindspore-lite cpu_kernel_mid opencl_kernel_mid nnacl cpu_ops_mid) target_link_libraries(mindspore-lite_static cpu_kernel_mid opencl_kernel_mid nnacl_mid cpu_ops_mid) +elseif(SUPPORT_GPU STREQUAL cuda) + add_subdirectory(runtime/kernel/cuda) + target_link_libraries(mindspore-lite cpu_kernel_mid cuda_kernel_mid nnacl cpu_ops_mid) + target_link_libraries(mindspore-lite_static cpu_kernel_mid cuda_kernel_mid nnacl_mid cpu_ops_mid) else() target_link_libraries(mindspore-lite cpu_kernel_mid nnacl cpu_ops_mid) target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl_mid cpu_ops_mid) diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index 7d04b804be4..1dc8134bc3c 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -32,7 +32,7 @@ #include "src/runtime/agent/npu/npu_manager.h" #include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" #endif -#if SUPPORT_GPU +#if GPU_OPENCL #include "src/runtime/kernel/opencl/opencl_subgraph.h" #endif @@ -562,7 +562,7 @@ LiteSession::~LiteSession() { mindspore::lite::NPUPassManager::GetInstance()->Clear(); mindspore::lite::NPUManager::GetInstance()->Reset(); #endif -#if SUPPORT_GPU && !SUPPORT_TRAIN +#if GPU_OPENCL && !SUPPORT_TRAIN delete opencl_runtime_wrapper_; #endif delete (model_); @@ -646,7 +646,7 @@ int LiteSession::ReSizeKernels(const std::vector &kernels) } auto ret = RET_OK; if (kernel->subgraph_type() == kernel::kGpuSubGraph) { -#if SUPPORT_GPU +#if GPU_OPENCL auto sub_graph = reinterpret_cast(kernel); ret = sub_graph->ReSize(false); #endif @@ -700,7 +700,7 @@ int LiteSession::Resize(const std::vector &inputs } int LiteSession::InitGPURuntime() { -#if SUPPORT_GPU && !SUPPORT_TRAIN +#if GPU_OPENCL && !SUPPORT_TRAIN if (this->context_->IsGpuEnabled()) { opencl_runtime_wrapper_ = new (std::nothrow) opencl::OpenCLRuntimeWrapper(); if (opencl_runtime_wrapper_ == nullptr) { @@ -717,6 +717,23 @@ int LiteSession::InitGPURuntime() { MS_LOG(INFO) << "Init OpenCL runtime success."; } } +#elif GPU_VULKAN && !SUPPORT_TRAIN + if (this->context_->IsGpuEnabled()) { + auto gpu_device_info = this->context_->GetGpuInfo(); + vk_runtime_wrap_ = new (std::nothrow) gpu::GpuRuntimeWrapper; + if (vk_runtime_wrap_ == nullptr) { + MS_LOG(ERROR) << "create vk_runtime failed"; + return RET_ERROR; + } + auto vk_runtime = vk_runtime_wrap_->GetInstance(); + vk_runtime->SetFp16Enable(gpu_device_info.enable_float16_); + if (vk_runtime->Init() != RET_OK) { + this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}}; + MS_LOG(WARNING) << "Init Vulkan runtime failed, change to CPU mode."; + } else { + MS_LOG(INFO) << "Init Vulkan runtime success."; + } + } #endif return RET_OK; } diff --git a/mindspore/lite/src/lite_session.h b/mindspore/lite/src/lite_session.h index d1963d019f6..181a6a93784 100644 --- a/mindspore/lite/src/lite_session.h +++ b/mindspore/lite/src/lite_session.h @@ -31,8 +31,10 @@ #include "src/executor.h" #include "src/tensor.h" #include "src/tensorlist.h" -#if SUPPORT_GPU -#include "src/runtime/opencl/opencl_runtime.h" +#if GPU_OPENCL +#include "src/runtime/gpu/opencl/opencl_runtime.h" +#elif GPU_VULKAN +#include "src/runtime/gpu/vulkan/vulkan_runtime.h" #endif namespace mindspore { @@ -127,8 +129,10 @@ class LiteSession : public session::LiteSession { Executor *executor_ = nullptr; Model *model_ = nullptr; std::atomic is_running_ = false; -#if SUPPORT_GPU && !SUPPORT_TRAIN +#if GPU_OPENCL && !SUPPORT_TRAIN opencl::OpenCLRuntimeWrapper *opencl_runtime_wrapper_{nullptr}; +#elif GPU_VULKAN && !SUPPORT_TRAIN + gpu::GpuRuntimeWrapper *vk_runtime_wrap_{nullptr}; #endif }; } // namespace lite diff --git a/mindspore/lite/src/runtime/gpu/cuda/cuda_device.cc b/mindspore/lite/src/runtime/gpu/cuda/cuda_device.cc new file mode 100644 index 00000000000..eb6b181a2a8 --- /dev/null +++ b/mindspore/lite/src/runtime/gpu/cuda/cuda_device.cc @@ -0,0 +1,21 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/gpu/cuda/cuda_device.h" +#include + +namespace mindspore::lite::cuda { +CudaDevice::~CudaDevice() {} +} // namespace mindspore::lite::cuda diff --git a/mindspore/lite/src/runtime/gpu/cuda/cuda_device.h b/mindspore/lite/src/runtime/gpu/cuda/cuda_device.h new file mode 100644 index 00000000000..be7236504dc --- /dev/null +++ b/mindspore/lite/src/runtime/gpu/cuda/cuda_device.h @@ -0,0 +1,32 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_CUDA_DEVICE_H_ +#define MINDSPORE_LITE_SRC_CUDA_DEVICE_H_ + +#include +#include +#include +#include +#include "src/runtime/gpu/gpu_runtime.h" + +namespace mindspore::lite::cuda { +class CudaDevice { + public: + CudaDevice() {} + virtual ~CudaDevice(); +}; +} // namespace mindspore::lite::cuda +#endif // MINDSPORE_LITE_SRC_CUDA_DEVICE_H_ diff --git a/mindspore/lite/src/runtime/gpu/cuda/cuda_runtime.cc b/mindspore/lite/src/runtime/gpu/cuda/cuda_runtime.cc new file mode 100644 index 00000000000..fef2fc0e50a --- /dev/null +++ b/mindspore/lite/src/runtime/gpu/cuda/cuda_runtime.cc @@ -0,0 +1,80 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/gpu/cuda/cuda_runtime.h" +#include +#include +#include "include/errorcode.h" +#include "src/common/file_utils.h" + +namespace mindspore::lite::cuda { + +static std::mutex g_mtx; + +bool CudaRuntime::initialized_ = false; +uint32_t CudaRuntime::instance_count_ = 0; +CudaRuntime *CudaRuntime::cuda_runtime_instance_ = nullptr; + +CudaRuntime *CudaRuntime::GetInstance() { + std::unique_lock lck(g_mtx); + static CudaRuntime vk_runtime; + if (instance_count_ == 0) { + cuda_runtime_instance_ = &vk_runtime; + cuda_runtime_instance_->Init(); + } + instance_count_++; + return cuda_runtime_instance_; +} + +void CudaRuntime::DeleteInstance() { + std::unique_lock lck(g_mtx); + if (instance_count_ == 0) { + MS_LOG(ERROR) << "No VulkanRuntime instance could delete!"; + } + instance_count_--; + if (instance_count_ == 0) { + cuda_runtime_instance_->Uninit(); + } +} + +CudaRuntime::CudaRuntime() {} + +// Init will get platforms info, get devices info, create opencl context. +int CudaRuntime::Init() { + if (initialized_) { + return RET_OK; + } + + initialized_ = true; + MS_LOG(INFO) << "CudaRuntime init done!"; + + return RET_OK; +} + +int CudaRuntime::Uninit() { + if (!initialized_) { + return RET_OK; + } + initialized_ = false; + return RET_OK; +} + +CudaRuntime::~CudaRuntime() { Uninit(); } + +const GpuInfo &CudaRuntime::GetGpuInfo() { return gpu_info_; } +bool CudaRuntime::GetFp16Enable() const { return true; } + +} // namespace mindspore::lite::cuda diff --git a/mindspore/lite/src/runtime/gpu/cuda/cuda_runtime.h b/mindspore/lite/src/runtime/gpu/cuda/cuda_runtime.h new file mode 100644 index 00000000000..28024402952 --- /dev/null +++ b/mindspore/lite/src/runtime/gpu/cuda/cuda_runtime.h @@ -0,0 +1,59 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_CUDA_RUNTIME_H_ +#define MINDSPORE_LITE_SRC_CUDA_RUNTIME_H_ +#include +#include +#include +#include +#include +#include +#include "src/common/log_adapter.h" +#include "src/runtime/gpu/gpu_runtime.h" +#include "schema/gpu_cache_generated.h" + +using mindspore::lite::gpu::GpuInfo; +using mindspore::lite::gpu::GpuRuntime; +using mindspore::lite::gpu::GpuRuntimeWrapper; + +namespace mindspore::lite::cuda { + +class CudaRuntime : public GpuRuntime { + public: + friend GpuRuntimeWrapper; + ~CudaRuntime() override; + CudaRuntime(const CudaRuntime &) = delete; + CudaRuntime &operator=(const CudaRuntime &) = delete; + + int Init() override; + int Uninit() override; + const GpuInfo &GetGpuInfo() override; + bool GetFp16Enable() const override; + + static CudaRuntime *GetInstance(); + static void DeleteInstance(); + + private: + CudaRuntime(); + + private: + static bool initialized_; + static uint32_t instance_count_; + static CudaRuntime *cuda_runtime_instance_; +}; +} // namespace mindspore::lite::cuda +#endif // MINDSPORE_LITE_SRC_CUDA_RUNTIME_H_ diff --git a/mindspore/lite/src/runtime/gpu/gpu_runtime.cc b/mindspore/lite/src/runtime/gpu/gpu_runtime.cc new file mode 100644 index 00000000000..d86be4f2354 --- /dev/null +++ b/mindspore/lite/src/runtime/gpu/gpu_runtime.cc @@ -0,0 +1,32 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/gpu/gpu_runtime.h" +#include +#include +#include +#include +#ifdef SHARING_MEM_WITH_OPENGL +#include +#endif +#include "include/errorcode.h" +#include "src/common/file_utils.h" + +namespace mindspore::lite::gpu { + +const GpuInfo &GpuRuntime::GetGpuInfo() { return gpu_info_; } + +} // namespace mindspore::lite::gpu diff --git a/mindspore/lite/src/runtime/gpu/gpu_runtime.h b/mindspore/lite/src/runtime/gpu/gpu_runtime.h new file mode 100644 index 00000000000..51aad9f220c --- /dev/null +++ b/mindspore/lite/src/runtime/gpu/gpu_runtime.h @@ -0,0 +1,107 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_GPU_RUNTIME_H_ +#define MINDSPORE_LITE_SRC_GPU_RUNTIME_H_ +#include +#include +#include +#include +#include +#include +#include +#include "include/errorcode.h" +#include "src/common/log_adapter.h" +#include "src/runtime/allocator.h" +#include "schema/gpu_cache_generated.h" + +namespace mindspore::lite::gpu { + +enum GpuType { OTHER = 0, ADRENO = 1, MALI = 2, MALI_T = 3, MALI_G = 4 }; +struct GpuInfo { + GpuType type = OTHER; + int model_num = 0; + float version = 0; + uint64_t global_memery_cachesize{0}; + uint64_t global_memery_size{0}; + uint64_t max_alloc_size{0}; + uint32_t max_work_group_size{1}; + uint32_t compute_units{0}; + uint32_t max_freq{0}; + uint32_t image_pitch_align{0}; + std::vector max_work_item_sizes; + bool support_fp16{false}; + bool support_svm{false}; +}; +enum class GpuBackendType { OPENCL = 0, CUDA = 1, VULKAN = 2 }; +class DevKey { + public: + std::string name{""}; +}; +class GpuContext { + public: + GpuBackendType type; +}; +class GpuDevice { + public: + GpuDevice(); + ~GpuDevice(); +}; +class DevKernel { + public: + void *data{nullptr}; +}; +class GpuAllocator : public Allocator {}; +class GpuRuntime { + public: + GpuRuntime() {} + virtual ~GpuRuntime() {} + GpuRuntime(const GpuRuntime &) = delete; + GpuRuntime &operator=(const GpuRuntime &) = delete; + + virtual int Init() { return RET_ERROR; } + virtual int Uninit() { return RET_ERROR; } + virtual const GpuInfo &GetGpuInfo() = 0; + virtual bool GetFp16Enable() const = 0; + + uint64_t GetGlobalMemSize() const { return gpu_info_.global_memery_size; } + uint64_t GetMaxAllocSize() const { return gpu_info_.max_alloc_size; } + const std::vector &GetWorkItemSize() const { return gpu_info_.max_work_item_sizes; } + + protected: + // gpu hal native defines + std::unordered_map dev_kernels_; + GpuContext *context_{nullptr}; + GpuDevice *device_{nullptr}; + GpuInfo gpu_info_; + + private: +}; +template +class GpuRuntimeWrapper { + public: + GpuRuntimeWrapper() { gpu_runtime_ = T::GetInstance(); } + ~GpuRuntimeWrapper() { T::DeleteInstance(); } + GpuRuntimeWrapper(const GpuRuntimeWrapper &) = delete; + GpuRuntimeWrapper &operator=(const GpuRuntimeWrapper &) = delete; + T *GetInstance() { return gpu_runtime_; } + + private: + T *gpu_runtime_{nullptr}; +}; + +} // namespace mindspore::lite::gpu +#endif // MINDSPORE_LITE_SRC_GPU_RUNTIME_H_ diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc similarity index 99% rename from mindspore/lite/src/runtime/opencl/opencl_allocator.cc rename to mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc index 4f6e884a1b7..fe4dd8e1c38 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "src/runtime/opencl/opencl_allocator.h" +#include "src/runtime/gpu/opencl/opencl_allocator.h" #include -#include "src/runtime/opencl/opencl_runtime.h" +#include "src/runtime/gpu/opencl/opencl_runtime.h" #include "src/runtime/kernel/opencl/utils.h" #include "src/common/log_adapter.h" #include "include/errorcode.h" diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.h similarity index 100% rename from mindspore/lite/src/runtime/opencl/opencl_allocator.h rename to mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.h diff --git a/mindspore/lite/src/runtime/opencl/opencl_executor.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc similarity index 92% rename from mindspore/lite/src/runtime/opencl/opencl_executor.cc rename to mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc index ce9b4bc187a..9399ae1a0ad 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_executor.cc +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "src/runtime/opencl/opencl_executor.h" +#include "src/runtime/gpu/opencl/opencl_executor.h" #include "src/runtime/kernel/opencl/utils.h" #include "nnacl/pack.h" #include "include/errorcode.h" @@ -27,8 +27,8 @@ int OpenCLExecutor::Run(std::vector &inputs, std::vector &ou return RunOrTune(inputs, outputs, kernels, allocator, before, after, false); } -int OpenCLExecutor::RunOrTune(std::vector &inputs, std::vector &outputs, - std::vector &kernels, Allocator *allocator, +int OpenCLExecutor::RunOrTune(const std::vector &inputs, const std::vector &outputs, + const std::vector &kernels, Allocator *allocator, const KernelCallBack &before, const KernelCallBack &after, bool is_tune) { int ret{RET_OK}; auto opencl_runtime_ins = ocl_runtime.GetInstance(); diff --git a/mindspore/lite/src/runtime/opencl/opencl_executor.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.h similarity index 87% rename from mindspore/lite/src/runtime/opencl/opencl_executor.h rename to mindspore/lite/src/runtime/gpu/opencl/opencl_executor.h index d76fcef69d0..484dd0fafde 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_executor.h +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.h @@ -18,7 +18,7 @@ #define MINDSPORE_LITE_SRC_RUNTIME_OPENCL_EXECUTOR_H_ #include -#include "src/runtime/opencl/opencl_runtime.h" +#include "src/runtime/gpu/opencl/opencl_runtime.h" #include "src/runtime/allocator.h" #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "src/executor.h" @@ -34,8 +34,8 @@ class OpenCLExecutor : public Executor { int Run(std::vector &inputs, std::vector &outputs, std::vector &kernels, Allocator *allocator = nullptr, const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr) override; - int RunOrTune(std::vector &inputs, std::vector &outputs, - std::vector &kernels, Allocator *allocator = nullptr, + int RunOrTune(const std::vector &inputs, const std::vector &outputs, + const std::vector &kernels, Allocator *allocator = nullptr, const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr, bool is_tune = false); protected: diff --git a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc similarity index 97% rename from mindspore/lite/src/runtime/opencl/opencl_runtime.cc rename to mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc index 40da92a946b..a148a52ebe5 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "src/runtime/opencl/opencl_runtime.h" +#include "src/runtime/gpu/opencl/opencl_runtime.h" #include #include #include @@ -23,7 +23,7 @@ #endif #include "include/errorcode.h" #include "src/runtime/kernel/opencl/utils.h" -#include "src/runtime/opencl/opencl_allocator.h" +#include "src/runtime/gpu/opencl/opencl_allocator.h" #include "src/common/file_utils.h" #ifdef PROGRAM_WITH_IL #include "src/backend/opencl/cl/program.inc" @@ -72,11 +72,12 @@ void printf_callback(const char *buffer, size_t length, size_t final, void *user fwrite(buffer, 1, length, stdout); } -int OpenCLRuntime::InitGPUDevice(std::vector &platforms) { +int OpenCLRuntime::InitGPUDevice(std::vector *platforms) { + MS_ASSERT(platforms); // search GPU std::vector devices; int ret = RET_OK; - for (auto &platform : platforms) { + for (auto &platform : *platforms) { std::string platform_name; ret = platform.getInfo(CL_PLATFORM_NAME, &platform_name); if (ret != CL_SUCCESS) { @@ -173,7 +174,8 @@ int OpenCLRuntime::InitGPUDevice(std::vector &platforms) { return RET_OK; } -int OpenCLRuntime::InitQueue(std::vector &platforms) { +int OpenCLRuntime::InitQueue(std::vector *platforms) { + MS_ASSERT(platforms); cl_int ret; #if defined(SHARING_MEM_WITH_OPENGL) && (CL_HPP_TARGET_OPENCL_VERSION >= 120) // create context from glcontext @@ -195,7 +197,7 @@ int OpenCLRuntime::InitQueue(std::vector &platforms) { MS_LOG(INFO) << "Create common opencl context"; #ifdef Debug std::vector ctx_properties = {CL_CONTEXT_PLATFORM, - (cl_context_properties)platforms[0](), + (cl_context_properties)(*platforms)[0](), CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, CL_PRINTF_BUFFERSIZE_ARM, @@ -258,12 +260,12 @@ int OpenCLRuntime::Init() { MS_LOG(ERROR) << "OpenCL Platform not found!" << CLErrorCode(ret); return RET_ERROR; } - auto ms_ret = InitGPUDevice(platforms); + auto ms_ret = InitGPUDevice(&platforms); if (ms_ret != RET_OK) { return ms_ret; } - ms_ret = InitQueue(platforms); + ms_ret = InitQueue(&platforms); if (ms_ret != RET_OK) { return ms_ret; } @@ -362,8 +364,9 @@ bool OpenCLRuntime::SetFp16Enable(bool enable) { return fp16_enable_ == enable; } -int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name, - const std::vector &build_options_ext, TypeId data_type) { +int OpenCLRuntime::BuildKernel(const cl::Kernel &kernel, const std::string &program_name, + const std::string &kernel_name, const std::vector &build_options_ext, + TypeId data_type) { std::string build_option = default_build_option_; if (fp16_enable_ && data_type != kNumberTypeInt32) { build_option += @@ -399,7 +402,7 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na } cl_int ret; - kernel = cl::Kernel(program, kernel_name.c_str(), &ret); + const_cast(kernel) = cl::Kernel(program, kernel_name.c_str(), &ret); if (ret != CL_SUCCESS) { MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(ret); return RET_ERROR; diff --git a/mindspore/lite/src/runtime/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h similarity index 91% rename from mindspore/lite/src/runtime/opencl/opencl_runtime.h rename to mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h index 48b82e9c7cb..e9981484328 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_runtime.h +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h @@ -27,8 +27,8 @@ j* you may not use this file except in compliance with the License. #include #include "dtype/type_id.h" #include "src/common/log_adapter.h" -#include "src/runtime/opencl/opencl_wrapper.h" -#include "src/runtime/opencl/opencl_allocator.h" +#include "src/runtime/gpu/opencl/opencl_wrapper.h" +#include "src/runtime/gpu/opencl/opencl_allocator.h" #include "schema/gpu_cache_generated.h" namespace mindspore::lite::opencl { @@ -76,8 +76,8 @@ class OpenCLRuntime { cl_device_svm_capabilities GetSVMCapabilities() const { return svm_enable_ ? svm_capabilities_ : 0; } template - typename std::enable_if::value, cl_int>::type SetKernelArg(cl::Kernel &kernel, uint32_t index, - const T value, + typename std::enable_if::value, cl_int>::type SetKernelArg(const cl::Kernel &kernel, + uint32_t index, const T value, const MemType mem_type = MemType::IMG) { switch (mem_type) { case MemType::BUF: { @@ -88,7 +88,7 @@ class OpenCLRuntime { } cl::Buffer *buffer = reinterpret_cast(allocator_->GetBuffer(value)); MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value; - return kernel.setArg(index, *buffer); + return const_cast(kernel).setArg(index, *buffer); } case MemType::IMG: { cl::Image2D *image = reinterpret_cast(allocator_->GetImage(value)); @@ -96,10 +96,10 @@ class OpenCLRuntime { MS_LOG(WARNING) << "Can't get Image2D, try to use Buffer. Please confirm the buffer type."; cl::Buffer *buffer = reinterpret_cast(allocator_->GetBuffer(value)); MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Buffer " << buffer << ", host_ptr: " << value; - return kernel.setArg(index, *buffer); + return const_cast(kernel).setArg(index, *buffer); } MS_LOG(DEBUG) << "Set kernel arg[" << index << "] OpenCL Image2D " << image << ", host_ptr: " << value; - return kernel.setArg(index, *image); + return const_cast(kernel).setArg(index, *image); } default: MS_LOG(ERROR) << "Unsupported opencl memory type: " << static_cast(mem_type); @@ -109,8 +109,8 @@ class OpenCLRuntime { template typename std::enable_if::value, cl_int>::type SetKernelArg( - cl::Kernel &kernel, uint32_t index, const T value, const MemType mem_type = MemType::IMG) { - return kernel.setArg(index, value); + const cl::Kernel &kernel, uint32_t index, const T value, const MemType mem_type = MemType::IMG) { + return const_cast(kernel).setArg(index, value); } cl::Program CreateProgramFromIL(const std::vector &binary, const std::string &flag); @@ -118,7 +118,7 @@ class OpenCLRuntime { cl::Kernel GetKernelFromBinary(const std::string &kernel_name); std::vector GetProgramBinary(const cl::Program &program); bool LoadSource(const std::string &program_name, const std::string &source); - int BuildKernel(cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name, + int BuildKernel(const cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name, const std::vector &build_options_ext = {}, TypeId data_type = kNumberTypeFloat32); int RunKernel(const cl::Kernel &kernel, const cl::NDRange &global, const cl::NDRange &local, cl::CommandQueue *command_queue = nullptr, cl::Event *event = nullptr); @@ -160,8 +160,8 @@ class OpenCLRuntime { bool LoadProgram(const std::string &program_name, cl::Program *program); bool BuildProgram(const std::string &build_options, const cl::Program &program); - int InitGPUDevice(std::vector &platforms); - int InitQueue(std::vector &platforms); + int InitGPUDevice(std::vector *platforms); + int InitQueue(std::vector *platforms); private: static InitState init_state_; diff --git a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_wrapper.cc similarity index 99% rename from mindspore/lite/src/runtime/opencl/opencl_wrapper.cc rename to mindspore/lite/src/runtime/gpu/opencl/opencl_wrapper.cc index e5ad0caf640..1f5541d5649 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_wrapper.cc @@ -16,7 +16,7 @@ #ifdef USE_OPENCL_WRAPPER -#include "src/runtime/opencl/opencl_wrapper.h" +#include "src/runtime/gpu/opencl/opencl_wrapper.h" #include #include #include diff --git a/mindspore/lite/src/runtime/opencl/opencl_wrapper.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_wrapper.h similarity index 86% rename from mindspore/lite/src/runtime/opencl/opencl_wrapper.h rename to mindspore/lite/src/runtime/gpu/opencl/opencl_wrapper.h index c15bfbd7f36..0939bb0f35f 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_wrapper.h +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_wrapper.h @@ -29,7 +29,7 @@ namespace mindspore::lite::opencl { bool LoadOpenCLLibrary(void **handle_ptr); bool UnLoadOpenCLLibrary(void *handle); -// get platfrom id +// get platform id using clGetPlatformIDsFunc = cl_int (*)(cl_uint, cl_platform_id *, cl_uint *); // get platform info using clGetPlatformInfoFunc = cl_int (*)(cl_platform_id, cl_platform_info, size_t, void *, size_t *); @@ -74,8 +74,7 @@ using clEnqueueMapBufferFunc = void *(*)(cl_command_queue, cl_mem, cl_bool, cl_m const cl_event *, cl_event *, cl_int *); using clEnqueueMapImageFunc = void *(*)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t *, const size_t *, size_t *, size_t *, cl_uint, const cl_event *, cl_event *, cl_int *); -using clCreateCommandQueueFunc = cl_command_queue(CL_API_CALL *)(cl_context, cl_device_id, cl_command_queue_properties, - cl_int *); +using clCreateCommandQueueFunc = cl_command_queue (*)(cl_context, cl_device_id, cl_command_queue_properties, cl_int *); using clGetCommandQueueInfoFunc = cl_int (*)(cl_command_queue, cl_command_queue_info, size_t, void *, size_t *); using clReleaseCommandQueueFunc = cl_int (*)(cl_command_queue); using clCreateProgramWithBinaryFunc = cl_program (*)(cl_context, cl_uint, const cl_device_id *, const size_t *, @@ -89,10 +88,10 @@ using clGetProgramInfoFunc = cl_int (*)(cl_program, cl_program_info, size_t, voi using clCreateKernelFunc = cl_kernel (*)(cl_program, const char *, cl_int *); using clRetainKernelFunc = cl_int (*)(cl_kernel kernel); using clCreateBufferFunc = cl_mem (*)(cl_context, cl_mem_flags, size_t, void *, cl_int *); -using clCreateImage2DFunc = cl_mem(CL_API_CALL *)(cl_context, cl_mem_flags, const cl_image_format *, size_t, size_t, - size_t, void *, cl_int *); -using clCreateImage3DFunc = cl_mem(CL_API_CALL *)(cl_context, cl_mem_flags, const cl_image_format *, size_t, size_t, - size_t, size_t, size_t, void *, cl_int *); +using clCreateImage2DFunc = cl_mem (*)(cl_context, cl_mem_flags, const cl_image_format *, size_t, size_t, size_t, + void *, cl_int *); +using clCreateImage3DFunc = cl_mem (*)(cl_context, cl_mem_flags, const cl_image_format *, size_t, size_t, size_t, + size_t, size_t, void *, cl_int *); using clCreateProgramWithSourceFunc = cl_program (*)(cl_context, cl_uint, const char **, const size_t *, cl_int *); using clReleaseKernelFunc = cl_int (*)(cl_kernel kernel); using clGetDeviceInfoFunc = cl_int (*)(cl_device_id, cl_device_info, size_t, void *, size_t *); @@ -105,11 +104,10 @@ using clGetEventInfoFunc = cl_int (*)(cl_event event, cl_event_info param_name, using clGetEventProfilingInfoFunc = cl_int (*)(cl_event event, cl_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); using clGetImageInfoFunc = cl_int (*)(cl_mem, cl_image_info, size_t, void *, size_t *); -using clEnqueueCopyBufferToImageFunc = cl_int(CL_API_CALL *)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t *, - const size_t *, cl_uint, const cl_event *, cl_event *); -using clEnqueueCopyImageToBufferFunc = cl_int(CL_API_CALL *)(cl_command_queue, cl_mem, cl_mem, const size_t *, - const size_t *, size_t, cl_uint, const cl_event *, - cl_event *); +using clEnqueueCopyBufferToImageFunc = cl_int (*)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t *, + const size_t *, cl_uint, const cl_event *, cl_event *); +using clEnqueueCopyImageToBufferFunc = cl_int (*)(cl_command_queue, cl_mem, cl_mem, const size_t *, const size_t *, + size_t, cl_uint, const cl_event *, cl_event *); #if CL_TARGET_OPENCL_VERSION >= 120 using clRetainDeviceFunc = cl_int (*)(cl_device_id); using clReleaseDeviceFunc = cl_int (*)(cl_device_id); @@ -127,11 +125,11 @@ using clEnqueueSVMMapFunc = cl_int (*)(cl_command_queue, cl_bool, cl_map_flags, using clEnqueueSVMUnmapFunc = cl_int (*)(cl_command_queue, void *, cl_uint, const cl_event *, cl_event *); using clSetKernelArgSVMPointerFunc = cl_int (*)(cl_kernel, cl_uint, const void *); // opencl 2.0 can get sub group info and wave size. -using clGetKernelSubGroupInfoKHRFunc = cl_int(CL_API_CALL *)(cl_kernel, cl_device_id, cl_kernel_sub_group_info, size_t, - const void *, size_t, void *, size_t *); -using clCreateCommandQueueWithPropertiesFunc = cl_command_queue(CL_API_CALL *)(cl_context, cl_device_id, - const cl_queue_properties *, cl_int *); -using clGetExtensionFunctionAddressFunc = void *(CL_API_CALL *)(const char *); +using clGetKernelSubGroupInfoKHRFunc = cl_int (*)(cl_kernel, cl_device_id, cl_kernel_sub_group_info, size_t, + const void *, size_t, void *, size_t *); +using clCreateCommandQueueWithPropertiesFunc = cl_command_queue (*)(cl_context, cl_device_id, + const cl_queue_properties *, cl_int *); +using clGetExtensionFunctionAddressFunc = void *(*)(const char *); #endif #define CL_DECLARE_FUNC_PTR(func) extern func##Func func diff --git a/mindspore/lite/src/runtime/kernel/cuda/CMakeLists.txt b/mindspore/lite/src/runtime/kernel/cuda/CMakeLists.txt new file mode 100644 index 00000000000..7632dea3e10 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/cuda/CMakeLists.txt @@ -0,0 +1,6 @@ +file(GLOB_RECURSE CUDA_KERNEL_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/*.cc + ${CMAKE_CURRENT_SOURCE_DIR}/kernel/*.cc) +add_library(cuda_kernel_mid OBJECT ${CUDA_KERNEL_SRC}) +add_dependencies(cuda_kernel_mid fbs_src) + diff --git a/mindspore/lite/src/runtime/kernel/cuda/cuda_kernel.cc b/mindspore/lite/src/runtime/kernel/cuda/cuda_kernel.cc new file mode 100644 index 00000000000..e1997009d2d --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/cuda/cuda_kernel.cc @@ -0,0 +1,22 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/cuda/cuda_kernel.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel {} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/cuda/cuda_kernel.h b/mindspore/lite/src/runtime/kernel/cuda/cuda_kernel.h new file mode 100644 index 00000000000..c887acad46b --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/cuda/cuda_kernel.h @@ -0,0 +1,33 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_CUDA_KERNEL_H_ +#define MINDSPORE_LITE_SRC_CUDA_KERNEL_H_ + +#include +#include +#include +#include +#include "src/lite_kernel.h" +#include "include/errorcode.h" +#include "src/runtime/gpu/gpu_runtime.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel {} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_CUDA_KERNEL_H_ diff --git a/mindspore/lite/src/runtime/kernel/cuda/cuda_subgraph.cc b/mindspore/lite/src/runtime/kernel/cuda/cuda_subgraph.cc new file mode 100644 index 00000000000..fd414406843 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/cuda/cuda_subgraph.cc @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/cuda/cuda_subgraph.h" +#include +#include "include/errorcode.h" +#include "src/common/utils.h" + +namespace mindspore::kernel { +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/cuda/cuda_subgraph.h b/mindspore/lite/src/runtime/kernel/cuda/cuda_subgraph.h new file mode 100644 index 00000000000..7594bbdb52e --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/cuda/cuda_subgraph.h @@ -0,0 +1,26 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CUDA_KERNEL_CUDA_SUBGRAPH_KERNEL_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CUDA_KERNEL_CUDA_SUBGRAPH_KERNEL_H_ + +#include +#include +#include "src/sub_graph_kernel.h" + +namespace mindspore::kernel {} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CUDA_KERNEL_CUDA_SUBGRAPH_KERNEL_H_ diff --git a/mindspore/lite/src/runtime/kernel/opencl/CMakeLists.txt b/mindspore/lite/src/runtime/kernel/opencl/CMakeLists.txt index ddae211135e..15308a928b5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/CMakeLists.txt +++ b/mindspore/lite/src/runtime/kernel/opencl/CMakeLists.txt @@ -1,4 +1,7 @@ -file(GLOB_RECURSE OPENCL_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/kernel/*.cc) -add_library(opencl_kernel_mid OBJECT ${OPENCL_KERNEL_SRC}) -add_dependencies(opencl_kernel_mid fbs_src) - +if(${SUPPORT_GPU} STREQUAL opencl) + file(GLOB_RECURSE OPENCL_KERNEL_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/*.cc + ${CMAKE_CURRENT_SOURCE_DIR}/kernel/*.cc) + add_library(opencl_kernel_mid OBJECT ${OPENCL_KERNEL_SRC}) + add_dependencies(opencl_kernel_mid fbs_src) +endif() diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_fusion.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_fusion.cc index 746c95106ad..7ae93c03904 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_fusion.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_fusion.cc @@ -23,7 +23,7 @@ #include "src/runtime/kernel/opencl/kernel/conv2d.h" #include "src/runtime/kernel/opencl/kernel/fusion_eltwise.h" #include "src/runtime/kernel/opencl/utils.h" -#include "src/runtime/opencl/opencl_executor.h" +#include "src/runtime/gpu/opencl/opencl_executor.h" #include "include/errorcode.h" #include "schema/ops_generated.h" #include "src/common/utils.h" diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index 2ca69103098..44ff8135954 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -24,7 +24,7 @@ #include #include "src/lite_kernel.h" #include "include/errorcode.h" -#include "src/runtime/opencl/opencl_runtime.h" +#include "src/runtime/gpu/opencl/opencl_runtime.h" #include "mindspore/lite/src/dequant.h" #include "src/runtime/kernel/opencl/utils.h" diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc index e826e2379ff..16805abeb8c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc @@ -18,7 +18,7 @@ #include #include #include -#include "src/runtime/opencl/opencl_executor.h" +#include "src/runtime/gpu/opencl/opencl_executor.h" #include "src/runtime/kernel/opencl/utils.h" #include "include/errorcode.h" #include "src/common/utils.h" diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h index 6c2024a80e8..953e3e4ba91 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h @@ -20,8 +20,8 @@ #include #include #include "src/runtime/kernel/opencl/opencl_kernel.h" -#include "src/runtime/opencl/opencl_allocator.h" -#include "src/runtime/opencl/opencl_executor.h" +#include "src/runtime/gpu/opencl/opencl_allocator.h" +#include "src/runtime/gpu/opencl/opencl_executor.h" #include "src/sub_graph_kernel.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/opencl/CMakeLists.txt b/mindspore/lite/src/runtime/opencl/CMakeLists.txt deleted file mode 100644 index 5f5e73f8677..00000000000 --- a/mindspore/lite/src/runtime/opencl/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -set(OPENCL_RUNTIME_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/opencl_allocator.cc - ${CMAKE_CURRENT_SOURCE_DIR}/opencl_executor.cc - ${CMAKE_CURRENT_SOURCE_DIR}/opencl_allocator.h - ${CMAKE_CURRENT_SOURCE_DIR}/opencl_kernel.h - ${CMAKE_CURRENT_SOURCE_DIR}/opencl_runtime.cc - ${CMAKE_CURRENT_SOURCE_DIR}/opencl_runtime.h - ${CMAKE_CURRENT_SOURCE_DIR}/opencl_wrapper.cc - ${CMAKE_CURRENT_SOURCE_DIR}/opencl_wrapper.h - - ) diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index ff1d1a16235..af7e445f54e 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -28,9 +28,9 @@ #include "src/kernel_registry.h" #include "src/sub_graph_kernel.h" #include "src/dequant.h" -#if SUPPORT_GPU +#if GPU_OPENCL #include "src/runtime/kernel/opencl/opencl_subgraph.h" -#include "src/runtime/opencl/opencl_runtime.h" +#include "src/runtime/gpu/opencl/opencl_runtime.h" #endif #if SUPPORT_NPU #include "src/runtime/agent/npu/subgraph_npu_kernel.h" @@ -462,7 +462,7 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector input_kernels = kernel::LiteKernelUtil::SubgraphInputNodes(kernels); std::vector output_kernels = kernel::LiteKernelUtil::SubgraphOutputNodes(kernels); if (type == kernel::kGpuSubGraph) { -#if SUPPORT_GPU +#if GPU_OPENCL auto sub_kernel = new (std::nothrow) kernel::OpenCLSubGraph(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_); if (sub_kernel == nullptr) { @@ -470,6 +470,8 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector #include "src/common/log_adapter.h" #include "common/common_test.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" #include "mindspore/lite/src/common/file_utils.h" #include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h" #include "mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h" diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/fill_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/fill_tests.cc index 52b430be036..4ad5ddd18b3 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/fill_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/fill_tests.cc @@ -17,7 +17,6 @@ #include #include "src/common/log_adapter.h" #include "common/common_test.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" #include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h" #include "mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h" using mindspore::lite::Tensor; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc index ffbd200a845..d0852796370 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc @@ -18,7 +18,6 @@ #include "src/common/log_adapter.h" #include "common/common_test.h" #include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" #include "mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h" #include "mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h"