forked from mindspore-Ecosystem/mindspore
!9042 support opencl program binary cache
From: @ddwsky Reviewed-by: @HilbertDavid,@zhanghaibo5 Signed-off-by: @HilbertDavid
This commit is contained in:
commit
83c1540c36
|
@ -0,0 +1,38 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
namespace mindspore.schema;
|
||||
|
||||
table TuneParam {
|
||||
local: [int];
|
||||
block: [int];
|
||||
shape: [int];
|
||||
opPara: [int];
|
||||
}
|
||||
|
||||
table KernelBin {
|
||||
name: string;
|
||||
tune: TuneParam;
|
||||
data: [ubyte];
|
||||
}
|
||||
|
||||
table GpuCache {
|
||||
name: string;
|
||||
version: string;
|
||||
allBins: [KernelBin];
|
||||
}
|
||||
|
||||
root_type GpuCache;
|
|
@ -17,6 +17,7 @@ if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
|||
endif ()
|
||||
|
||||
set(LITE_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/file_utils.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/utils.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/graph_util.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/log_adapter.cc
|
||||
|
|
|
@ -67,7 +67,7 @@ void *OpenCLAllocator::MinimumFit(size_t size, const std::vector<size_t> &img_si
|
|||
void *OpenCLAllocator::CreateBuffer(size_t size, void *data, size_t flags, cl::Buffer **buffer) {
|
||||
cl_int ret = CL_SUCCESS;
|
||||
MS_ASSERT(buffer);
|
||||
*buffer = new (std::nothrow) cl::Buffer(*ocl_runtime_->Context(), flags, size, data, &ret);
|
||||
*buffer = new (std::nothrow) cl::Buffer(*ocl_runtime_->Context(), static_cast<cl_mem_flags>(flags), size, data, &ret);
|
||||
if (*buffer == nullptr) {
|
||||
MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")";
|
||||
return nullptr;
|
||||
|
@ -90,6 +90,9 @@ void *OpenCLAllocator::CreateBuffer(size_t size, void *data, size_t flags, cl::B
|
|||
void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector<size_t> &img_size, void *data, size_t flags,
|
||||
bool is_map, cl::Buffer **buffer, cl::Image2D **image) {
|
||||
cl_int ret = CL_SUCCESS;
|
||||
MS_ASSERT(buffer);
|
||||
MS_ASSERT(image);
|
||||
MS_ASSERT(img_size.size() == 3);
|
||||
cl::ImageFormat image_format(CL_RGBA, img_size[2]);
|
||||
if (data == nullptr) {
|
||||
*image = new (std::nothrow)
|
||||
|
@ -332,7 +335,7 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
|
|||
}
|
||||
MemBuf *mem_buf = it->second;
|
||||
MS_ASSERT(mem_buf);
|
||||
void *new_host_ptr;
|
||||
void *new_host_ptr{nullptr};
|
||||
if (mem_buf->img_size.empty()) {
|
||||
cl::Buffer *buffer = static_cast<cl::Buffer *>(mem_buf->device_ptr_);
|
||||
MS_ASSERT(buffer);
|
||||
|
|
|
@ -17,12 +17,14 @@
|
|||
#include "src/runtime/opencl/opencl_runtime.h"
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
#ifdef SHARING_MEM_WITH_OPENGL
|
||||
#include <EGL/egl.h>
|
||||
#endif
|
||||
#include "include/errorcode.h"
|
||||
#include "src/runtime/kernel/opencl/utils.h"
|
||||
#include "src/runtime/opencl/opencl_allocator.h"
|
||||
#include "src/common/file_utils.h"
|
||||
#ifdef PROGRAM_WITH_IL
|
||||
#include "src/backend/opencl/cl/program.inc"
|
||||
#endif
|
||||
|
@ -254,6 +256,9 @@ int OpenCLRuntime::Init() {
|
|||
std::string flag = "";
|
||||
binary_program_ = CreateProgramFromIL(g_program_binary, flag);
|
||||
#endif
|
||||
if (enable_cache_) {
|
||||
InitGpuCache();
|
||||
}
|
||||
init_done_ = true;
|
||||
MS_LOG(INFO) << "OpenCLRuntime init done!";
|
||||
|
||||
|
@ -261,6 +266,10 @@ int OpenCLRuntime::Init() {
|
|||
}
|
||||
|
||||
int OpenCLRuntime::Uninit() {
|
||||
if (enable_cache_) {
|
||||
StoreCache();
|
||||
}
|
||||
binary_map_.clear();
|
||||
program_map_.clear();
|
||||
delete allocator_;
|
||||
delete default_command_queue_;
|
||||
|
@ -374,6 +383,12 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na
|
|||
MS_LOG(ERROR) << program_name << " build failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (enable_cache_) {
|
||||
need_write_ = true;
|
||||
auto bin = GetProgramBinaries(program);
|
||||
MS_ASSERT(bin.size() >= 1);
|
||||
binary_map_.emplace(build_program_key, bin[0]);
|
||||
}
|
||||
program_map_.emplace(build_program_key, program);
|
||||
}
|
||||
|
||||
|
@ -673,9 +688,8 @@ cl::Program OpenCLRuntime::CreateProgramFromIL(const std::vector<char> &binary,
|
|||
}
|
||||
|
||||
// build program with binary
|
||||
cl::Program OpenCLRuntime::CreateProgramFromBinary(const std::vector<std::vector<unsigned char>> &binary,
|
||||
const std::string &flag) {
|
||||
cl::Program program = cl::Program(*context_, {*device_}, binary);
|
||||
cl::Program OpenCLRuntime::CreateProgramFromBinary(const std::vector<unsigned char> &binary, const std::string &flag) {
|
||||
cl::Program program = cl::Program(*context_, {*device_}, {binary});
|
||||
bool status = BuildProgram(default_build_opts_, program);
|
||||
if (!status) {
|
||||
MS_LOG(ERROR) << "Build program with binary failed!";
|
||||
|
@ -691,4 +705,75 @@ std::vector<std::vector<unsigned char>> OpenCLRuntime::GetProgramBinaries(const
|
|||
}
|
||||
return binary;
|
||||
}
|
||||
void OpenCLRuntime::InitGpuCache() {
|
||||
size_t len;
|
||||
char *buf = lite::ReadFile(cache_path_.c_str(), &len);
|
||||
if (LoadCache(buf) != RET_OK) {
|
||||
MS_LOG(ERROR) << "Load opencl cache fail";
|
||||
}
|
||||
delete buf;
|
||||
MS_LOG(INFO) << "Init opencl cache success";
|
||||
}
|
||||
int OpenCLRuntime::LoadCache(const void *buf) {
|
||||
if (buf == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto gpu_cache = schema::GetGpuCache(buf);
|
||||
if (gpu_cache == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *bins = gpu_cache->allBins();
|
||||
if (bins == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto n = bins->size();
|
||||
for (auto i = 0; i < n; ++i) {
|
||||
auto *kernel_bin = bins->template GetAs<schema::KernelBin>(i);
|
||||
if (kernel_bin == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel_bin[" << i << "] null";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *pdata = kernel_bin->data();
|
||||
MS_ASSERT(pdata);
|
||||
if (pdata->size() == 0) {
|
||||
continue;
|
||||
}
|
||||
std::vector<unsigned char> bin(pdata->begin(), pdata->end());
|
||||
auto program = CreateProgramFromBinary(bin, kernel_bin->name()->str());
|
||||
program_map_.emplace(kernel_bin->name()->str(), program);
|
||||
binary_map_.emplace(kernel_bin->name()->str(), bin);
|
||||
MS_LOG(INFO) << "LoadCache " << kernel_bin->name()->str() << " success, size=" << pdata->size();
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
void OpenCLRuntime::StoreCache() {
|
||||
if (need_write_) {
|
||||
auto fbb_ = new (std::nothrow) flatbuffers::FlatBufferBuilder;
|
||||
if (fbb_ == nullptr) {
|
||||
MS_LOG(ERROR) << "new opencl FlatBufferBuilder fail";
|
||||
return;
|
||||
}
|
||||
std::vector<flatbuffers::Offset<schema::KernelBin>> vec_kernel_bin;
|
||||
for (auto iv : binary_map_) {
|
||||
auto name = fbb_->CreateString(iv.first);
|
||||
auto data = fbb_->CreateVector<uint8_t>(iv.second);
|
||||
std::vector<int32_t> shape;
|
||||
auto tune = schema::CreateTuneParam(*fbb_, fbb_->CreateVector<int32_t>(shape), fbb_->CreateVector<int32_t>(shape),
|
||||
fbb_->CreateVector<int32_t>(shape), fbb_->CreateVector<int32_t>(shape));
|
||||
auto kbin = schema::CreateKernelBin(*fbb_, name, tune, data);
|
||||
vec_kernel_bin.emplace_back(kbin);
|
||||
MS_LOG(INFO) << "StoreCache " << iv.first << " success, size=" << iv.second.size();
|
||||
}
|
||||
|
||||
auto data = fbb_->CreateVector<flatbuffers::Offset<schema::KernelBin>>(vec_kernel_bin);
|
||||
auto name = fbb_->CreateString("OpenCLCache");
|
||||
auto version = fbb_->CreateString(version_);
|
||||
auto gpu_cache = schema::CreateGpuCache(*fbb_, name, version, data);
|
||||
fbb_->Finish(gpu_cache);
|
||||
uint8_t *buf = fbb_->GetBufferPointer();
|
||||
lite::WriteToBin(cache_path_, reinterpret_cast<void *>(buf), fbb_->GetSize());
|
||||
MS_LOG(INFO) << "store opencl cache ok, size=" << fbb_->GetSize();
|
||||
delete fbb_;
|
||||
}
|
||||
}
|
||||
} // namespace mindspore::lite::opencl
|
||||
|
|
|
@ -27,6 +27,7 @@ j* you may not use this file except in compliance with the License.
|
|||
#include "src/common/log_adapter.h"
|
||||
#include "src/runtime/opencl/opencl_wrapper.h"
|
||||
#include "src/runtime/opencl/opencl_allocator.h"
|
||||
#include "schema/gpu_cache_generated.h"
|
||||
|
||||
namespace mindspore::lite::opencl {
|
||||
|
||||
|
@ -107,7 +108,7 @@ class OpenCLRuntime {
|
|||
}
|
||||
|
||||
cl::Program CreateProgramFromIL(const std::vector<char> &binary, const std::string &flag);
|
||||
cl::Program CreateProgramFromBinary(const std::vector<std::vector<unsigned char>> &binary, const std::string &flag);
|
||||
cl::Program CreateProgramFromBinary(const std::vector<unsigned char> &binary, const std::string &flag);
|
||||
cl::Kernel GetKernelFromBinary(const std::string &kernel_name);
|
||||
std::vector<std::vector<unsigned char>> GetProgramBinaries(const cl::Program &program);
|
||||
bool LoadSource(const std::string &program_name, const std::string &source);
|
||||
|
@ -139,6 +140,10 @@ class OpenCLRuntime {
|
|||
*/
|
||||
int GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id);
|
||||
|
||||
void InitGpuCache();
|
||||
int LoadCache(const void *buf);
|
||||
void StoreCache();
|
||||
|
||||
private:
|
||||
static OpenCLRuntime *GetInstance();
|
||||
static void DeleteInstance();
|
||||
|
@ -171,6 +176,11 @@ class OpenCLRuntime {
|
|||
cl_uint image_pitch_align_{0};
|
||||
std::vector<size_t> max_work_item_sizes_;
|
||||
void *handle_{nullptr};
|
||||
std::map<std::string, std::vector<unsigned char>> binary_map_;
|
||||
std::string cache_path_{"/data/local/tmp/opencl_cache"};
|
||||
const std::string version_{"V0.1"};
|
||||
bool need_write_{false};
|
||||
bool enable_cache_{false};
|
||||
};
|
||||
|
||||
class OpenCLRuntimeWrapper {
|
||||
|
|
Loading…
Reference in New Issue