fix fp16 bug and add gpu fp16 model to ci

This commit is contained in:
wandongdong 2020-09-15 00:33:52 -07:00
parent 15244de50a
commit 2c6cfce70e
7 changed files with 63 additions and 2 deletions

View File

@ -14,6 +14,7 @@
* limitations under the License.
*/
#include <cstring>
#include <string>
#include <algorithm>
#include <set>
#include "src/kernel_registry.h"

View File

@ -69,7 +69,7 @@ int ConvolutionOpenCLKernel::Init() {
TILES_X_ = UP_DIV(OW_, 4);
TILES_Y_ = UP_DIV(OH_, 4);
TILES_XY_ = TILES_X_ * TILES_Y_;
use_winograd_ = UseWinograd4x4To6x6();
use_winograd_ = UseWinograd4x4To6x6() && use_fp16_;
// build kernel
if (use_winograd_) {

View File

@ -134,7 +134,14 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() {
size_t up_co_size = C4NUM * CO4 * dtype_size;
memset(bias_data_, 0, up_co_size);
auto ori_bias = in_tensors_.at(kBiasIndex)->MutableData();
memcpy(bias_data_, ori_bias, out_tensors_[0]->Channel() * dtype_size);
if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat32) {
float16_t *bias_ptr = static_cast<float16_t*>(bias_data_);
for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) {
bias_ptr[i] = static_cast<float16_t>(static_cast<float*>(ori_bias)[i]);
}
} else {
memcpy(bias_data_, ori_bias, out_tensors_[0]->Channel() * dtype_size);
}
allocator->UnmapBuffer(bias_data_);
} else {
MS_ASSERT(in_tensors_.size() == kInputSize1);

View File

@ -56,6 +56,19 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
}
for (size_t i = 0; i < in_tensors.size(); ++i) {
if (in_tensors.at(i)->shape().size() <= 1) {
if (mem_type == OpenCLMemType::IMG) {
for (auto &iv : in_kernels[i]) {
auto tensors = iv->in_tensors();
tensors.emplace_back(in_tensors.at(i));
iv->set_in_tensors(tensors);
}
} else {
for (auto &iv : in_kernels[i]) {
auto tensors = iv->out_tensors();
tensors.emplace_back(in_tensors.at(i));
iv->set_out_tensors(tensors);
}
}
continue;
}
OpenCLKernel *cur_opencl_op = reinterpret_cast<OpenCLKernel *>(in_kernels[i][0]);

View File

@ -0,0 +1,2 @@
mobilenet_v1_1.0_224.tflite
mobilenet_v2_1.0_224.tflite

View File

@ -1,6 +1,7 @@
mobilenet_v1_1.0_224.tflite
mobilenet_v2_1.0_224.tflite
resnet.tflite
squeezenet.tflite
mtk_AADB_HADB_MBV2_model_fp32.tflite
hiai_cn_recognize_modify_padv2.tflite
hiai_cv_focusShootOCRModel_08.tflite

View File

@ -479,6 +479,42 @@ function Run_arm64() {
fi
done < ${models_tflite_gpu_config}
# Run GPU fp16 converted models:
while read line; do
model_name=${line}
if [[ $model_name == \#* ]]; then
continue
fi
echo ${model_name} >> "${run_benchmark_log_file}"
echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --warmUpLoopCount=1 --loopCount=1 --fp16Priority=true --accuracyThreshold=5' >> "${run_benchmark_log_file}"
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --warmUpLoopCount=1 --loopCount=1 --fp16Priority=true --accuracyThreshold=5' >> adb_run_cmd.txt
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_benchmark_log_file}"
if [ $? = 0 ]; then
run_result='arm64_gpu_fp16: '${model_name}' pass'
echo ${run_result} >> ${run_benchmark_result_file}
else
run_result='arm64_gpu_fp16: '${model_name}' failed'
echo ${run_result} >> ${run_benchmark_result_file}
return 1
fi
# run benchmark test without clib data
echo ${model_name} >> "${run_benchmark_log_file}"
echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --fp16Priority=true --accuracyThreshold=5' >> "${run_benchmark_log_file}"
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --fp16Priority=true --accuracyThreshold=5' >> adb_run_cmd.txt
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_benchmark_log_file}"
if [ $? = 0 ]; then
run_result='arm64_gpu_fp16: '${model_name}' pass'
echo ${run_result} >> ${run_benchmark_result_file}
else
run_result='arm64_gpu_fp16: '${model_name}' failed'
echo ${run_result} >> ${run_benchmark_result_file}
return 1
fi
#sleep 1
done < ${models_fp16_gpu_config}
# Run mindir converted models:
while read line; do
model_name=${line}
@ -574,6 +610,7 @@ models_onnx_config=${basepath}/models_onnx.cfg
models_fp16_config=${basepath}/models_fp16.cfg
models_mindspore_config=${basepath}/models_mindspore.cfg
models_tflite_gpu_config=${basepath}/models_tflite_gpu.cfg
models_fp16_gpu_config=${basepath}/models_fp16_gpu.cfg
ms_models_path=${basepath}/ms_models