!23957 [MS][LITE]Add Lite gitee enable
Merge pull request !23957 from gongdaguo/gitee_enable
This commit is contained in:
commit
73d75f6368
|
@ -199,29 +199,6 @@ if(MSVC)
|
|||
set(MSLITE_ENABLE_CONVERTER off)
|
||||
endif()
|
||||
|
||||
if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
|
||||
NOT MSLITE_ENABLE_MINDRT
|
||||
OR NOT MSLITE_STRING_KERNEL
|
||||
OR NOT MSLITE_CONTROLFLOW_TENSORLIST
|
||||
OR NOT MSLITE_WEIGHT_DECODE
|
||||
OR NOT MSLITE_CUSTOM_KERNEL_REGISTRY))
|
||||
message(FATAL_ERROR "If one of 'MSLITE_ENABLE_MINDRT MSLITE_STRING_KERNEL "
|
||||
"MSLITE_CONTROLFLOW_TENSORLIST MSLITE_WEIGHT_DECODE MSLITE_CUSTOM_KERNEL_REGISTRY'"
|
||||
"is configured as off, MSLITE_ENABLE_CONVERTER and MSLITE_ENABLE_TESTCASES must also be configured as off")
|
||||
endif()
|
||||
|
||||
if(((MSLITE_GPU_BACKEND STREQUAL tensorrt) OR MSLITE_ENABLE_NPU) AND (
|
||||
NOT MSLITE_DELEGATE_USE))
|
||||
message(FATAL_ERROR "If MSLITE_DELEGATE_USE use is configured as off, MSLITE_ENABLE_NPU must also be configured
|
||||
as off and MSLITE_GPU_BACKEND nor can it be configured as tensorrt.")
|
||||
endif()
|
||||
|
||||
if(MSLITE_ENABLE_FP16 AND PLATFORM_ARM32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
|
||||
AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
|
||||
message(FATAL_ERROR "If you want to build fp16 in arm82_a32, \
|
||||
your Clang version:[${CMAKE_CXX_COMPILER_VERSION}] must not be less than 9.0 and please use android nkd r21e!")
|
||||
endif()
|
||||
|
||||
message(STATUS "************MindSpore Lite Build Option:************")
|
||||
message(STATUS "\tMSLITE_GPU_BACKEND = \t${MSLITE_GPU_BACKEND}")
|
||||
message(STATUS "\tMSLITE_ENABLE_NPU = \t${MSLITE_ENABLE_NPU}")
|
||||
|
@ -247,6 +224,29 @@ message(STATUS "\tMSLITE_ENABLE_FP16 = \t${MSLITE_ENABLE_FP16}")
|
|||
message(STATUS "\tMSLITE_ENABLE_MODEL_ENCRYPTION = \t${MSLITE_ENABLE_MODEL_ENCRYPTION}")
|
||||
message(STATUS "\tMSLITE_ENABLE_SPARSE_COMPUTE = \t${MSLITE_ENABLE_SPARSE_COMPUTE}")
|
||||
|
||||
if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
|
||||
NOT MSLITE_ENABLE_MINDRT
|
||||
OR NOT MSLITE_STRING_KERNEL
|
||||
OR NOT MSLITE_CONTROLFLOW_TENSORLIST
|
||||
OR NOT MSLITE_WEIGHT_DECODE
|
||||
OR NOT MSLITE_CUSTOM_KERNEL_REGISTRY))
|
||||
message(FATAL_ERROR "If one of 'MSLITE_ENABLE_MINDRT MSLITE_STRING_KERNEL "
|
||||
"MSLITE_CONTROLFLOW_TENSORLIST MSLITE_WEIGHT_DECODE MSLITE_CUSTOM_KERNEL_REGISTRY'"
|
||||
"is configured as off, MSLITE_ENABLE_CONVERTER and MSLITE_ENABLE_TESTCASES must also be configured as off")
|
||||
endif()
|
||||
|
||||
if(((MSLITE_GPU_BACKEND STREQUAL tensorrt) OR MSLITE_ENABLE_NPU) AND (
|
||||
NOT MSLITE_DELEGATE_USE))
|
||||
message(FATAL_ERROR "If MSLITE_DELEGATE_USE use is configured as off, MSLITE_ENABLE_NPU must also be configured
|
||||
as off and MSLITE_GPU_BACKEND nor can it be configured as tensorrt.")
|
||||
endif()
|
||||
|
||||
if(MSLITE_ENABLE_FP16 AND PLATFORM_ARM32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang"
|
||||
AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
|
||||
message(FATAL_ERROR "If you want to build fp16 in arm82_a32, \
|
||||
your Clang version:[${CMAKE_CXX_COMPILER_VERSION}] must not be less than 9.0 and please use android nkd r21e!")
|
||||
endif()
|
||||
|
||||
if(MSLITE_ENABLE_HIGH_PERFORMANCE)
|
||||
add_compile_definitions(ENABLE_HIGH_PERFORMANCE)
|
||||
endif()
|
||||
|
|
|
@ -64,7 +64,7 @@ build_lite_x86_64_jni_and_jar() {
|
|||
rm -rf java/jni && mkdir -pv java/jni
|
||||
cd java/jni
|
||||
cmake -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
|
||||
-DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DENABLE_VERBOSE=${ENABLE_VERBOSE} -DSUPPORT_TRAIN=${is_train} "${LITE_JAVA_PATH}/native/"
|
||||
$CMAKE_ARGS -DENABLE_VERBOSE=${ENABLE_VERBOSE} -DSUPPORT_TRAIN=${is_train} "${LITE_JAVA_PATH}/native/"
|
||||
make -j$THREAD_NUM
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo "---------------- mindspore lite: build jni x86_64 failed----------------"
|
||||
|
@ -163,7 +163,7 @@ build_lite() {
|
|||
echo "default link libc++_static.a, export MSLITE_ANDROID_STL=c++_shared to link libc++_shared.so"
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \
|
||||
-DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \
|
||||
-DANDROID_STL=${MSLITE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
|
||||
-DANDROID_STL=${MSLITE_ANDROID_STL} $CMAKE_ARGS -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
|
||||
-DPLATFORM_ARM64="on" -DENABLE_NEON="on" -DMSLITE_ENABLE_FP16="on" -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp \
|
||||
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
|
||||
-DENABLE_ASAN=${ENABLE_ASAN} -DENABLE_VERBOSE=${ENABLE_VERBOSE} "${BASEPATH}/mindspore/lite"
|
||||
|
@ -180,13 +180,13 @@ build_lite() {
|
|||
echo "default link libc++_static.a, export MSLITE_ANDROID_STL=c++_shared to link libc++_shared.so"
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DTOOLCHAIN_NAME=${CMAKE_TOOLCHAIN_NAME} -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} \
|
||||
-DANDROID_NDK=${CMAKE_ANDROID_NDK} -DANDROID_ABI=${CMAKE_ANDROID_ABI} -DANDROID_TOOLCHAIN_NAME=${CMAKE_ANDROID_TOOLCHAIN_NAME} \
|
||||
-DANDROID_STL=${CMAKE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
|
||||
-DANDROID_STL=${CMAKE_ANDROID_STL} $CMAKE_ARGS -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
|
||||
-DPLATFORM_ARM32="on" -DENABLE_NEON="on" -DMSLITE_ENABLE_FP16=${MSLITE_ENABLE_FP16} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp \
|
||||
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
|
||||
-DENABLE_ASAN=${ENABLE_ASAN} -DENABLE_VERBOSE=${ENABLE_VERBOSE} "${BASEPATH}/mindspore/lite"
|
||||
fi
|
||||
else
|
||||
cmake -DPLATFORM_X86_64=on -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
|
||||
cmake -DPLATFORM_X86_64=on $CMAKE_ARGS -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
|
||||
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
|
||||
-DENABLE_ASAN=${ENABLE_ASAN} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp -DENABLE_VERBOSE=${ENABLE_VERBOSE} "${BASEPATH}/mindspore/lite"
|
||||
fi
|
||||
|
@ -282,7 +282,7 @@ build_lite_arm64_and_jni() {
|
|||
cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \
|
||||
-DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \
|
||||
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
|
||||
-DANDROID_STL=${MSLITE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
|
||||
-DANDROID_STL=${MSLITE_ANDROID_STL} $CMAKE_ARGS -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
|
||||
-DSUPPORT_TRAIN=${is_train} -DPLATFORM_ARM64=on "${LITE_JAVA_PATH}/native/"
|
||||
make -j$THREAD_NUM
|
||||
if [[ $? -ne 0 ]]; then
|
||||
|
@ -326,7 +326,7 @@ build_lite_arm32_and_jni() {
|
|||
cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \
|
||||
-DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="armeabi-v7a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \
|
||||
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
|
||||
-DANDROID_STL=${MSLITE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
|
||||
-DANDROID_STL=${MSLITE_ANDROID_STL} $CMAKE_ARGS -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
|
||||
-DSUPPORT_TRAIN=${is_train} -DPLATFORM_ARM32=on "${LITE_JAVA_PATH}/native"
|
||||
make -j$THREAD_NUM
|
||||
if [[ $? -ne 0 ]]; then
|
||||
|
@ -397,13 +397,19 @@ update_submodule()
|
|||
}
|
||||
|
||||
LITE_JAVA_PATH=${BASEPATH}/mindspore/lite/java
|
||||
LITE_BUILD_TYPE="Release"
|
||||
if [[ "${MSLITE_ENABLE_ACL}" == "on" ]]; then
|
||||
update_submodule
|
||||
fi
|
||||
|
||||
if [[ "${DEBUG_MODE}" == "on" ]]; then
|
||||
LITE_BUILD_TYPE="Debug"
|
||||
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug "
|
||||
else
|
||||
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release "
|
||||
fi
|
||||
if [[ "X$ENABLE_GITEE" = "Xon" ]]; then
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GITEE=ON"
|
||||
fi
|
||||
|
||||
if [[ "X$LITE_ENABLE_AAR" = "Xon" ]]; then
|
||||
build_aar
|
||||
elif [[ "X$LITE_PLATFORM" != "X" ]]; then
|
||||
|
|
|
@ -38,33 +38,31 @@ class CustomAddKernelGpu : public kernel::Kernel {
|
|||
CustomAddKernelGpu(const std::vector<MSTensor> &inputs, const std::vector<MSTensor> &outputs,
|
||||
const schema::Primitive *primitive, const mindspore::Context *ctx,
|
||||
const std::string &build_options, bool fp16_enable)
|
||||
: Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {
|
||||
opencl_runtime_ = new registry::opencl::OpenCLRuntimeWrapper();
|
||||
}
|
||||
: Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {}
|
||||
~CustomAddKernelGpu() override { FreeWeight(); }
|
||||
// Prepare will be called during graph compilation
|
||||
int Prepare() override {
|
||||
const std::string kernel_name_ = "ElementAdd";
|
||||
const std::string program_name = "Arithmetic";
|
||||
std::string source = arithmetic_source;
|
||||
if (opencl_runtime_->LoadSource(program_name, source) != kSuccess) {
|
||||
if (opencl_runtime_.LoadSource(program_name, source) != kSuccess) {
|
||||
std::cerr << "Load source failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
std::vector<std::string> build_options_ext = {"-cl-mad-enable -cl-fast-relaxed-math -Werror"};
|
||||
|
||||
build_options_ext.push_back(build_options_);
|
||||
if (opencl_runtime_->BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
|
||||
if (opencl_runtime_.BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
|
||||
std::cerr << "Build kernel failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
auto out_shape = custom_common::GpuTensorInfo(&outputs_[0], opencl_runtime_);
|
||||
auto out_shape = custom_common::GpuTensorInfo(&outputs_[0], &opencl_runtime_);
|
||||
local_range_ = cl::NullRange;
|
||||
global_range_ = cl::NDRange(out_shape.width, out_shape.height);
|
||||
for (int i = 0; i < inputs_.size(); ++i) {
|
||||
auto &in_tensor = inputs_.at(i);
|
||||
custom_common::GpuTensorInfo in_shape = custom_common::GpuTensorInfo(&in_tensor, opencl_runtime_);
|
||||
custom_common::GpuTensorInfo in_shape = custom_common::GpuTensorInfo(&in_tensor, &opencl_runtime_);
|
||||
if (in_tensor.IsConst()) {
|
||||
std::vector<char> weight(in_shape.Image2DSize, 0);
|
||||
bool src_is_fp16 = in_tensor.DataType() == mindspore::DataType::kNumberTypeFloat16;
|
||||
|
@ -72,7 +70,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
|
|||
in_tensor.DataType());
|
||||
DataType dtype =
|
||||
fp16_enable_ ? mindspore::DataType::kNumberTypeFloat16 : mindspore::DataType::kNumberTypeFloat32;
|
||||
auto allocator = opencl_runtime_->GetAllocator();
|
||||
auto allocator = opencl_runtime_.GetAllocator();
|
||||
if (allocator == nullptr) {
|
||||
std::cerr << "GetAllocator fail.";
|
||||
FreeWeight();
|
||||
|
@ -86,7 +84,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
|
|||
}
|
||||
weight_ptrs_.push_back(weight_ptr);
|
||||
// Use API to write GPU memory
|
||||
if (opencl_runtime_->WriteImage(weight_ptr, weight.data()) != kSuccess) {
|
||||
if (opencl_runtime_.WriteImage(weight_ptr, weight.data()) != kSuccess) {
|
||||
std::cerr << "WriteImage fail.";
|
||||
FreeWeight();
|
||||
return lite::RET_ERROR;
|
||||
|
@ -98,7 +96,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
|
|||
|
||||
int arg_idx = 3;
|
||||
cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
|
||||
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
|
||||
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
|
||||
std::cerr << "Set kernel arg" << arg_idx << "failed.";
|
||||
FreeWeight();
|
||||
return lite::RET_ERROR;
|
||||
|
@ -118,19 +116,19 @@ class CustomAddKernelGpu : public kernel::Kernel {
|
|||
auto input_0_ptr = weight_ptrs_[0] == nullptr ? inputs_[0].MutableData() : weight_ptrs_[0];
|
||||
auto input_1_ptr = weight_ptrs_[1] == nullptr ? inputs_[1].MutableData() : weight_ptrs_[1];
|
||||
int arg_idx = 0;
|
||||
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
|
||||
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
|
||||
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
|
||||
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
|
||||
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
|
||||
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
|
||||
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (opencl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
|
||||
if (opencl_runtime_.RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
|
||||
std::cerr << "Run kernel failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
@ -192,7 +190,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
|
|||
cl::NDRange global_range_{cl::NullRange};
|
||||
cl::NDRange local_range_{cl::NullRange};
|
||||
std::vector<void *> weight_ptrs_;
|
||||
registry::opencl::OpenCLRuntimeWrapper *opencl_runtime_;
|
||||
registry::opencl::OpenCLRuntimeWrapper opencl_runtime_;
|
||||
|
||||
int PreProcess() {
|
||||
int ret = 0;
|
||||
|
@ -202,7 +200,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
|
|||
}
|
||||
for (auto i = 0; i < outputs_.size(); ++i) {
|
||||
auto *output = &outputs_.at(i);
|
||||
auto img_info = custom_common::GpuTensorInfo(output, opencl_runtime_);
|
||||
auto img_info = custom_common::GpuTensorInfo(output, &opencl_runtime_);
|
||||
auto allocator = output->allocator();
|
||||
if (allocator == nullptr) {
|
||||
std::cerr << "The output tensor of OpenCL kernel must have an allocator.";
|
||||
|
@ -219,7 +217,7 @@ class CustomAddKernelGpu : public kernel::Kernel {
|
|||
}
|
||||
|
||||
void FreeWeight() {
|
||||
auto allocator = opencl_runtime_->GetAllocator();
|
||||
auto allocator = opencl_runtime_.GetAllocator();
|
||||
if (allocator == nullptr) {
|
||||
std::cerr << "GetAllocator fail.";
|
||||
return;
|
||||
|
|
|
@ -158,33 +158,31 @@ class CustomAddKernel : public kernel::Kernel {
|
|||
CustomAddKernel(const std::vector<MSTensor> &inputs, const std::vector<MSTensor> &outputs,
|
||||
const schema::Primitive *primitive, const mindspore::Context *ctx, const std::string &build_options,
|
||||
bool fp16_enable)
|
||||
: Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {
|
||||
opencl_runtime_ = new registry::opencl::OpenCLRuntimeWrapper();
|
||||
}
|
||||
: Kernel(inputs, outputs, primitive, ctx), build_options_(build_options), fp16_enable_(fp16_enable) {}
|
||||
~CustomAddKernel() override { FreeWeight(); }
|
||||
// Prepare will be called during graph compilation
|
||||
int Prepare() override {
|
||||
const std::string kernel_name_ = "ElementAdd";
|
||||
const std::string program_name = "Arithmetic";
|
||||
std::string source = arithmetic_source;
|
||||
if (opencl_runtime_->LoadSource(program_name, source) != kSuccess) {
|
||||
if (opencl_runtime_.LoadSource(program_name, source) != kSuccess) {
|
||||
std::cerr << "Load source failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
std::vector<std::string> build_options_ext = {"-cl-mad-enable -cl-fast-relaxed-math -Werror"};
|
||||
|
||||
build_options_ext.push_back(build_options_);
|
||||
if (opencl_runtime_->BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
|
||||
if (opencl_runtime_.BuildKernel(&kernel_, program_name, kernel_name_, build_options_ext) != kSuccess) {
|
||||
std::cerr << "Build kernel failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
auto out_shape = GpuTensorInfo(&outputs_[0], opencl_runtime_);
|
||||
auto out_shape = GpuTensorInfo(&outputs_[0], &opencl_runtime_);
|
||||
local_range_ = cl::NullRange;
|
||||
global_range_ = cl::NDRange(out_shape.width, out_shape.height);
|
||||
for (int i = 0; i < inputs_.size(); ++i) {
|
||||
auto &in_tensor = inputs_.at(i);
|
||||
GpuTensorInfo in_shape = GpuTensorInfo(&in_tensor, opencl_runtime_);
|
||||
GpuTensorInfo in_shape = GpuTensorInfo(&in_tensor, &opencl_runtime_);
|
||||
if (in_tensor.IsConst()) {
|
||||
std::vector<char> weight(in_shape.Image2DSize, 0);
|
||||
bool src_is_fp16 = in_tensor.DataType() == mindspore::DataType::kNumberTypeFloat16;
|
||||
|
@ -192,7 +190,7 @@ class CustomAddKernel : public kernel::Kernel {
|
|||
in_tensor.DataType());
|
||||
DataType dtype =
|
||||
fp16_enable_ ? mindspore::DataType::kNumberTypeFloat16 : mindspore::DataType::kNumberTypeFloat32;
|
||||
auto allocator = opencl_runtime_->GetAllocator();
|
||||
auto allocator = opencl_runtime_.GetAllocator();
|
||||
if (allocator == nullptr) {
|
||||
std::cerr << "GetAllocator fail.";
|
||||
FreeWeight();
|
||||
|
@ -205,7 +203,7 @@ class CustomAddKernel : public kernel::Kernel {
|
|||
return lite::RET_ERROR;
|
||||
}
|
||||
weight_ptrs_.push_back(weight_ptr);
|
||||
if (opencl_runtime_->WriteImage(weight_ptr, weight.data()) != kSuccess) {
|
||||
if (opencl_runtime_.WriteImage(weight_ptr, weight.data()) != kSuccess) {
|
||||
std::cerr << "WriteImage fail.";
|
||||
FreeWeight();
|
||||
return lite::RET_ERROR;
|
||||
|
@ -217,7 +215,7 @@ class CustomAddKernel : public kernel::Kernel {
|
|||
|
||||
int arg_idx = 3;
|
||||
cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
|
||||
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
|
||||
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx, output_shape) != kSuccess) {
|
||||
std::cerr << "Set kernel arg" << arg_idx << "failed.";
|
||||
FreeWeight();
|
||||
return lite::RET_ERROR;
|
||||
|
@ -237,19 +235,19 @@ class CustomAddKernel : public kernel::Kernel {
|
|||
auto input_0_ptr = weight_ptrs_[0] == nullptr ? inputs_[0].MutableData() : weight_ptrs_[0];
|
||||
auto input_1_ptr = weight_ptrs_[1] == nullptr ? inputs_[1].MutableData() : weight_ptrs_[1];
|
||||
int arg_idx = 0;
|
||||
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
|
||||
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_0_ptr) != kSuccess) {
|
||||
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
|
||||
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, input_1_ptr) != kSuccess) {
|
||||
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (opencl_runtime_->SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
|
||||
if (opencl_runtime_.SetKernelArg(kernel_, arg_idx++, outputs_[0].MutableData()) != kSuccess) {
|
||||
std::cerr << "Set kernel arg" << arg_idx - 1 << "failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (opencl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
|
||||
if (opencl_runtime_.RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != kSuccess) {
|
||||
std::cerr << "Run kernel failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
@ -311,7 +309,7 @@ class CustomAddKernel : public kernel::Kernel {
|
|||
cl::NDRange global_range_{cl::NullRange};
|
||||
cl::NDRange local_range_{cl::NullRange};
|
||||
std::vector<void *> weight_ptrs_;
|
||||
registry::opencl::OpenCLRuntimeWrapper *opencl_runtime_;
|
||||
registry::opencl::OpenCLRuntimeWrapper opencl_runtime_;
|
||||
|
||||
int PreProcess() {
|
||||
int ret;
|
||||
|
@ -321,7 +319,7 @@ class CustomAddKernel : public kernel::Kernel {
|
|||
}
|
||||
for (auto i = 0; i < outputs_.size(); ++i) {
|
||||
auto *output = &outputs_.at(i);
|
||||
auto img_info = GpuTensorInfo(output, opencl_runtime_);
|
||||
auto img_info = GpuTensorInfo(output, &opencl_runtime_);
|
||||
auto allocator = output->allocator();
|
||||
if (allocator == nullptr) {
|
||||
std::cerr << "The output tensor of OpenCL kernel must have an allocator.";
|
||||
|
@ -382,7 +380,7 @@ class CustomAddKernel : public kernel::Kernel {
|
|||
}
|
||||
|
||||
void FreeWeight() {
|
||||
auto allocator = opencl_runtime_->GetAllocator();
|
||||
auto allocator = opencl_runtime_.GetAllocator();
|
||||
if (allocator == nullptr) {
|
||||
std::cerr << "GetAllocator fail.";
|
||||
return;
|
||||
|
|
Loading…
Reference in New Issue