!23957 [MS][LITE]Add Lite gitee enable

Merge pull request !23957 from gongdaguo/gitee_enable
This commit is contained in:
i-robot 2021-09-23 12:27:56 +00:00 committed by Gitee
commit 73d75f6368
4 changed files with 67 additions and 65 deletions

View File

@ -199,29 +199,6 @@ if(MSVC)
set(MSLITE_ENABLE_CONVERTER off)
endif()
if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
NOT MSLITE_ENABLE_MINDRT
OR NOT MSLITE_STRING_KERNEL
OR NOT MSLITE_CONTROLFLOW_TENSORLIST
OR NOT MSLITE_WEIGHT_DECODE
OR NOT MSLITE_CUSTOM_KERNEL_REGISTRY))
message(FATAL_ERROR "If one of 'MSLITE_ENABLE_MINDRT MSLITE_STRING_KERNEL "
"MSLITE_CONTROLFLOW_TENSORLIST MSLITE_WEIGHT_DECODE MSLITE_CUSTOM_KERNEL_REGISTRY'"
"is configured as off, MSLITE_ENABLE_CONVERTER and MSLITE_ENABLE_TESTCASES must also be configured as off")
endif()
if(((MSLITE_GPU_BACKEND STREQUAL tensorrt) OR MSLITE_ENABLE_NPU) AND (
NOT MSLITE_DELEGATE_USE))
message(FATAL_ERROR "If MSLITE_DELEGATE_USE use is configured as off, MSLITE_ENABLE_NPU must also be configured
as off and MSLITE_GPU_BACKEND nor can it be configured as tensorrt.")
endif()
if(MSLITE_ENABLE_FP16 AND PLATFORM_ARM32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
message(FATAL_ERROR "If you want to build fp16 in arm82_a32, \
your Clang version:[${CMAKE_CXX_COMPILER_VERSION}] must not be less than 9.0 and please use android nkd r21e!")
endif()
message(STATUS "************MindSpore Lite Build Option:************")
message(STATUS "\tMSLITE_GPU_BACKEND = \t${MSLITE_GPU_BACKEND}")
message(STATUS "\tMSLITE_ENABLE_NPU = \t${MSLITE_ENABLE_NPU}")
@ -247,6 +224,29 @@ message(STATUS "\tMSLITE_ENABLE_FP16 = \t${MSLITE_ENABLE_FP16}")
message(STATUS "\tMSLITE_ENABLE_MODEL_ENCRYPTION = \t${MSLITE_ENABLE_MODEL_ENCRYPTION}")
message(STATUS "\tMSLITE_ENABLE_SPARSE_COMPUTE = \t${MSLITE_ENABLE_SPARSE_COMPUTE}")
if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
NOT MSLITE_ENABLE_MINDRT
OR NOT MSLITE_STRING_KERNEL
OR NOT MSLITE_CONTROLFLOW_TENSORLIST
OR NOT MSLITE_WEIGHT_DECODE
OR NOT MSLITE_CUSTOM_KERNEL_REGISTRY))
message(FATAL_ERROR "If one of 'MSLITE_ENABLE_MINDRT MSLITE_STRING_KERNEL "
"MSLITE_CONTROLFLOW_TENSORLIST MSLITE_WEIGHT_DECODE MSLITE_CUSTOM_KERNEL_REGISTRY'"
"is configured as off, MSLITE_ENABLE_CONVERTER and MSLITE_ENABLE_TESTCASES must also be configured as off")
endif()
if(((MSLITE_GPU_BACKEND STREQUAL tensorrt) OR MSLITE_ENABLE_NPU) AND (
NOT MSLITE_DELEGATE_USE))
message(FATAL_ERROR "If MSLITE_DELEGATE_USE use is configured as off, MSLITE_ENABLE_NPU must also be configured
as off and MSLITE_GPU_BACKEND nor can it be configured as tensorrt.")
endif()
if(MSLITE_ENABLE_FP16 AND PLATFORM_ARM32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
message(FATAL_ERROR "If you want to build fp16 in arm82_a32, \
your Clang version:[${CMAKE_CXX_COMPILER_VERSION}] must not be less than 9.0 and please use android nkd r21e!")
endif()
if(MSLITE_ENABLE_HIGH_PERFORMANCE)
add_compile_definitions(ENABLE_HIGH_PERFORMANCE)
endif()

View File

@ -64,7 +64,7 @@ build_lite_x86_64_jni_and_jar() {
rm -rf java/jni && mkdir -pv java/jni
cd java/jni
cmake -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
-DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DENABLE_VERBOSE=${ENABLE_VERBOSE} -DSUPPORT_TRAIN=${is_train} "${LITE_JAVA_PATH}/native/"
$CMAKE_ARGS -DENABLE_VERBOSE=${ENABLE_VERBOSE} -DSUPPORT_TRAIN=${is_train} "${LITE_JAVA_PATH}/native/"
make -j$THREAD_NUM
if [[ $? -ne 0 ]]; then
echo "---------------- mindspore lite: build jni x86_64 failed----------------"
@ -163,7 +163,7 @@ build_lite() {
echo "default link libc++_static.a, export MSLITE_ANDROID_STL=c++_shared to link libc++_shared.so"
cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \
-DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \
-DANDROID_STL=${MSLITE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
-DANDROID_STL=${MSLITE_ANDROID_STL} $CMAKE_ARGS -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
-DPLATFORM_ARM64="on" -DENABLE_NEON="on" -DMSLITE_ENABLE_FP16="on" -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp \
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
-DENABLE_ASAN=${ENABLE_ASAN} -DENABLE_VERBOSE=${ENABLE_VERBOSE} "${BASEPATH}/mindspore/lite"
@ -180,13 +180,13 @@ build_lite() {
echo "default link libc++_static.a, export MSLITE_ANDROID_STL=c++_shared to link libc++_shared.so"
cmake -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DTOOLCHAIN_NAME=${CMAKE_TOOLCHAIN_NAME} -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} \
-DANDROID_NDK=${CMAKE_ANDROID_NDK} -DANDROID_ABI=${CMAKE_ANDROID_ABI} -DANDROID_TOOLCHAIN_NAME=${CMAKE_ANDROID_TOOLCHAIN_NAME} \
-DANDROID_STL=${CMAKE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
-DANDROID_STL=${CMAKE_ANDROID_STL} $CMAKE_ARGS -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
-DPLATFORM_ARM32="on" -DENABLE_NEON="on" -DMSLITE_ENABLE_FP16=${MSLITE_ENABLE_FP16} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp \
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
-DENABLE_ASAN=${ENABLE_ASAN} -DENABLE_VERBOSE=${ENABLE_VERBOSE} "${BASEPATH}/mindspore/lite"
fi
else
cmake -DPLATFORM_X86_64=on -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
cmake -DPLATFORM_X86_64=on $CMAKE_ARGS -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
-DENABLE_ASAN=${ENABLE_ASAN} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp -DENABLE_VERBOSE=${ENABLE_VERBOSE} "${BASEPATH}/mindspore/lite"
fi
@ -282,7 +282,7 @@ build_lite_arm64_and_jni() {
cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \
-DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
-DANDROID_STL=${MSLITE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
-DANDROID_STL=${MSLITE_ANDROID_STL} $CMAKE_ARGS -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
-DSUPPORT_TRAIN=${is_train} -DPLATFORM_ARM64=on "${LITE_JAVA_PATH}/native/"
make -j$THREAD_NUM
if [[ $? -ne 0 ]]; then
@ -326,7 +326,7 @@ build_lite_arm32_and_jni() {
cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \
-DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="armeabi-v7a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
-DANDROID_STL=${MSLITE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
-DANDROID_STL=${MSLITE_ANDROID_STL} $CMAKE_ARGS -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
-DSUPPORT_TRAIN=${is_train} -DPLATFORM_ARM32=on "${LITE_JAVA_PATH}/native"
make -j$THREAD_NUM
if [[ $? -ne 0 ]]; then
@ -397,13 +397,19 @@ update_submodule()
}
LITE_JAVA_PATH=${BASEPATH}/mindspore/lite/java
LITE_BUILD_TYPE="Release"
if [[ "${MSLITE_ENABLE_ACL}" == "on" ]]; then
update_submodule
fi
if [[ "${DEBUG_MODE}" == "on" ]]; then
LITE_BUILD_TYPE="Debug"
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug "
else
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release "
fi
if [[ "X$ENABLE_GITEE" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GITEE=ON"
fi
if [[ "X$LITE_ENABLE_AAR" = "Xon" ]]; then
build_aar
elif [[ "X$LITE_PLATFORM" != "X" ]]; then

View File

@ -38,33 +38,31 @@ class CustomAddKernelGpu : public kernel::Kernel {
CustomAddKernelGpu(const std::vector<MSTensor> &inputs, const std::vector<MSTensor> &outputs,
const schema::Primitive *primitive, const mindspore::Context *ctx,
const std::string &build_options, bool fp16_enable)
: Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {
opencl_runtime_ = new registry::opencl::OpenCLRuntimeWrapper();
}
: Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {}
~CustomAddKernelGpu() override { FreeWeight(); }
// Prepare will be called during graph compilation
int Prepare() override {
const std::string kernel_name_ = "ElementAdd";
const std::string program_name = "Arithmetic";
std::string source = arithmetic_source;
if (opencl_runtime_->LoadSource(program_name, source) != kSuccess) {
if (opencl_runtime_.LoadSource(program_name, source) != kSuccess) {
std::cerr << "Load source failed.";
return lite::RET_ERROR;
}
std::vector<std::string> build_options_ext = {"-cl-mad-enable -cl-fast-relaxed-math -Werror"};
build_options_ext.push_back(build_options_);
if (opencl_runtime_->BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
if (opencl_runtime_.BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
std::cerr << "Build kernel failed.";
return lite::RET_ERROR;
}
auto out_shape = custom_common::GpuTensorInfo(&outputs_[0], opencl_runtime_);
auto out_shape = custom_common::GpuTensorInfo(&outputs_[0], &opencl_runtime_);
local_range_ = cl::NullRange;
global_range_ = cl::NDRange(out_shape.width, out_shape.height);
for (int i = 0; i < inputs_.size(); ++i) {
auto &in_tensor = inputs_.at(i);
custom_common::GpuTensorInfo in_shape = custom_common::GpuTensorInfo(&in_tensor, opencl_runtime_);
custom_common::GpuTensorInfo in_shape = custom_common::GpuTensorInfo(&in_tensor, &opencl_runtime_);
if (in_tensor.IsConst()) {
std::vector<char> weight(in_shape.Image2DSize, 0);
bool src_is_fp16 = in_tensor.DataType() == mindspore::DataType::kNumberTypeFloat16;
@ -72,7 +70,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
in_tensor.DataType());
DataType dtype =
fp16_enable_ ? mindspore::DataType::kNumberTypeFloat16 : mindspore::DataType::kNumberTypeFloat32;
auto allocator = opencl_runtime_->GetAllocator();
auto allocator = opencl_runtime_.GetAllocator();
if (allocator == nullptr) {
std::cerr << "GetAllocator fail.";
FreeWeight();
@ -86,7 +84,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
}
weight_ptrs_.push_back(weight_ptr);
// Use API to write GPU memory
if (opencl_runtime_->WriteImage(weight_ptr, weight.data()) != kSuccess) {
if (opencl_runtime_.WriteImage(weight_ptr, weight.data()) != kSuccess) {
std::cerr << "WriteImage fail.";
FreeWeight();
return lite::RET_ERROR;
@ -98,7 +96,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
int arg_idx = 3;
cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
std::cerr << "Set kernel arg" << arg_idx << "failed.";
FreeWeight();
return lite::RET_ERROR;
@ -118,19 +116,19 @@ class CustomAddKernelGpu : public kernel::Kernel {
auto input_0_ptr = weight_ptrs_[0] == nullptr ? inputs_[0].MutableData() : weight_ptrs_[0];
auto input_1_ptr = weight_ptrs_[1] == nullptr ? inputs_[1].MutableData() : weight_ptrs_[1];
int arg_idx = 0;
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
return lite::RET_ERROR;
}
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
return lite::RET_ERROR;
}
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
return lite::RET_ERROR;
}
if (opencl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
if (opencl_runtime_.RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
std::cerr << "Run kernel failed.";
return lite::RET_ERROR;
}
@ -192,7 +190,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
cl::NDRange global_range_{cl::NullRange};
cl::NDRange local_range_{cl::NullRange};
std::vector<void *> weight_ptrs_;
registry::opencl::OpenCLRuntimeWrapper *opencl_runtime_;
registry::opencl::OpenCLRuntimeWrapper opencl_runtime_;
int PreProcess() {
int ret = 0;
@ -202,7 +200,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
}
for (auto i = 0; i < outputs_.size(); ++i) {
auto *output = &outputs_.at(i);
auto img_info = custom_common::GpuTensorInfo(output, opencl_runtime_);
auto img_info = custom_common::GpuTensorInfo(output, &opencl_runtime_);
auto allocator = output->allocator();
if (allocator == nullptr) {
std::cerr << "The output tensor of OpenCL kernel must have an allocator.";
@ -219,7 +217,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
}
void FreeWeight() {
auto allocator = opencl_runtime_->GetAllocator();
auto allocator = opencl_runtime_.GetAllocator();
if (allocator == nullptr) {
std::cerr << "GetAllocator fail.";
return;

View File

@ -158,33 +158,31 @@ class CustomAddKernel : public kernel::Kernel {
CustomAddKernel(const std::vector<MSTensor> &inputs, const std::vector<MSTensor> &outputs,
const schema::Primitive *primitive, const mindspore::Context *ctx, const std::string &build_options,
bool fp16_enable)
: Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {
opencl_runtime_ = new registry::opencl::OpenCLRuntimeWrapper();
}
: Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {}
~CustomAddKernel() override { FreeWeight(); }
// Prepare will be called during graph compilation
int Prepare() override {
const std::string kernel_name_ = "ElementAdd";
const std::string program_name = "Arithmetic";
std::string source = arithmetic_source;
if (opencl_runtime_->LoadSource(program_name, source) != kSuccess) {
if (opencl_runtime_.LoadSource(program_name, source) != kSuccess) {
std::cerr << "Load source failed.";
return lite::RET_ERROR;
}
std::vector<std::string> build_options_ext = {"-cl-mad-enable -cl-fast-relaxed-math -Werror"};
build_options_ext.push_back(build_options_);
if (opencl_runtime_->BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
if (opencl_runtime_.BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
std::cerr << "Build kernel failed.";
return lite::RET_ERROR;
}
auto out_shape = GpuTensorInfo(&outputs_[0], opencl_runtime_);
auto out_shape = GpuTensorInfo(&outputs_[0], &opencl_runtime_);
local_range_ = cl::NullRange;
global_range_ = cl::NDRange(out_shape.width, out_shape.height);
for (int i = 0; i < inputs_.size(); ++i) {
auto &in_tensor = inputs_.at(i);
GpuTensorInfo in_shape = GpuTensorInfo(&in_tensor, opencl_runtime_);
GpuTensorInfo in_shape = GpuTensorInfo(&in_tensor, &opencl_runtime_);
if (in_tensor.IsConst()) {
std::vector<char> weight(in_shape.Image2DSize, 0);
bool src_is_fp16 = in_tensor.DataType() == mindspore::DataType::kNumberTypeFloat16;
@ -192,7 +190,7 @@ class CustomAddKernel : public kernel::Kernel {
in_tensor.DataType());
DataType dtype =
fp16_enable_ ? mindspore::DataType::kNumberTypeFloat16 : mindspore::DataType::kNumberTypeFloat32;
auto allocator = opencl_runtime_->GetAllocator();
auto allocator = opencl_runtime_.GetAllocator();
if (allocator == nullptr) {
std::cerr << "GetAllocator fail.";
FreeWeight();
@ -205,7 +203,7 @@ class CustomAddKernel : public kernel::Kernel {
return lite::RET_ERROR;
}
weight_ptrs_.push_back(weight_ptr);
if (opencl_runtime_->WriteImage(weight_ptr, weight.data()) != kSuccess) {
if (opencl_runtime_.WriteImage(weight_ptr, weight.data()) != kSuccess) {
std::cerr << "WriteImage fail.";
FreeWeight();
return lite::RET_ERROR;
@ -217,7 +215,7 @@ class CustomAddKernel : public kernel::Kernel {
int arg_idx = 3;
cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
std::cerr << "Set kernel arg" << arg_idx << "failed.";
FreeWeight();
return lite::RET_ERROR;
@ -237,19 +235,19 @@ class CustomAddKernel : public kernel::Kernel {
auto input_0_ptr = weight_ptrs_[0] == nullptr ? inputs_[0].MutableData() : weight_ptrs_[0];
auto input_1_ptr = weight_ptrs_[1] == nullptr ? inputs_[1].MutableData() : weight_ptrs_[1];
int arg_idx = 0;
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
return lite::RET_ERROR;
}
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
return lite::RET_ERROR;
}
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
return lite::RET_ERROR;
}
if (opencl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
if (opencl_runtime_.RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
std::cerr << "Run kernel failed.";
return lite::RET_ERROR;
}
@ -311,7 +309,7 @@ class CustomAddKernel : public kernel::Kernel {
cl::NDRange global_range_{cl::NullRange};
cl::NDRange local_range_{cl::NullRange};
std::vector<void *> weight_ptrs_;
registry::opencl::OpenCLRuntimeWrapper *opencl_runtime_;
registry::opencl::OpenCLRuntimeWrapper opencl_runtime_;
int PreProcess() {
int ret;
@ -321,7 +319,7 @@ class CustomAddKernel : public kernel::Kernel {
}
for (auto i = 0; i < outputs_.size(); ++i) {
auto *output = &outputs_.at(i);
auto img_info = GpuTensorInfo(output, opencl_runtime_);
auto img_info = GpuTensorInfo(output, &opencl_runtime_);
auto allocator = output->allocator();
if (allocator == nullptr) {
std::cerr << "The output tensor of OpenCL kernel must have an allocator.";
@ -382,7 +380,7 @@ class CustomAddKernel : public kernel::Kernel {
}
void FreeWeight() {
auto allocator = opencl_runtime_->GetAllocator();
auto allocator = opencl_runtime_.GetAllocator();
if (allocator == nullptr) {
std::cerr << "GetAllocator fail.";
return;