fix fp16 bug and add gpu fp16 model to ci

2020-09-15 00:33:52 -07:00 · 2020-09-15 00:33:52 -07:00 · 2c6cfce70e
parent 15244de50a
commit 2c6cfce70e
7 changed files with 63 additions and 2 deletions
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
@ -14,6 +14,7 @@
 * limitations under the License.
 */
 #include <cstring>
+#include <string>
 #include <algorithm>
 #include <set>
 #include "src/kernel_registry.h"
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
@ -69,7 +69,7 @@ int ConvolutionOpenCLKernel::Init() {
  TILES_X_ = UP_DIV(OW_, 4);
  TILES_Y_ = UP_DIV(OH_, 4);
  TILES_XY_ = TILES_X_ * TILES_Y_;
-  use_winograd_ = UseWinograd4x4To6x6();
+  use_winograd_ = UseWinograd4x4To6x6() && use_fp16_;

  // build kernel
  if (use_winograd_) {
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
@ -134,7 +134,14 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() {
    size_t up_co_size = C4NUM * CO4 * dtype_size;
    memset(bias_data_, 0, up_co_size);
    auto ori_bias = in_tensors_.at(kBiasIndex)->MutableData();
-    memcpy(bias_data_, ori_bias, out_tensors_[0]->Channel() * dtype_size);
+    if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat32) {
+      float16_t *bias_ptr = static_cast<float16_t*>(bias_data_);
+      for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) {
+        bias_ptr[i] = static_cast<float16_t>(static_cast<float*>(ori_bias)[i]);
+      }
+    } else {
+      memcpy(bias_data_, ori_bias, out_tensors_[0]->Channel() * dtype_size);
+    }
    allocator->UnmapBuffer(bias_data_);
  } else {
    MS_ASSERT(in_tensors_.size() == kInputSize1);
--- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
@ -56,6 +56,19 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
  }
  for (size_t i = 0; i < in_tensors.size(); ++i) {
    if (in_tensors.at(i)->shape().size() <= 1) {
+      if (mem_type == OpenCLMemType::IMG) {
+        for (auto &iv : in_kernels[i]) {
+          auto tensors = iv->in_tensors();
+          tensors.emplace_back(in_tensors.at(i));
+          iv->set_in_tensors(tensors);
+        }
+      } else {
+        for (auto &iv : in_kernels[i]) {
+          auto tensors = iv->out_tensors();
+          tensors.emplace_back(in_tensors.at(i));
+          iv->set_out_tensors(tensors);
+        }
+      }
      continue;
    }
    OpenCLKernel *cur_opencl_op = reinterpret_cast<OpenCLKernel *>(in_kernels[i][0]);
--- a/mindspore/lite/test/models_fp16_gpu.cfg
+++ b/mindspore/lite/test/models_fp16_gpu.cfg
@ -0,0 +1,2 @@
+mobilenet_v1_1.0_224.tflite
+mobilenet_v2_1.0_224.tflite
--- a/mindspore/lite/test/models_tflite_gpu.cfg
+++ b/mindspore/lite/test/models_tflite_gpu.cfg
@ -1,6 +1,7 @@
 mobilenet_v1_1.0_224.tflite
 mobilenet_v2_1.0_224.tflite
 resnet.tflite
+squeezenet.tflite
 mtk_AADB_HADB_MBV2_model_fp32.tflite
 hiai_cn_recognize_modify_padv2.tflite
 hiai_cv_focusShootOCRModel_08.tflite
--- a/mindspore/lite/test/run_benchmark_nets.sh
+++ b/mindspore/lite/test/run_benchmark_nets.sh
@ -479,6 +479,42 @@ function Run_arm64() {
        fi
    done < ${models_tflite_gpu_config}

+    # Run GPU fp16 converted models:
+    while read line; do
+        model_name=${line}
+        if [[ $model_name == \#* ]]; then
+          continue
+        fi
+        echo ${model_name} >> "${run_benchmark_log_file}"
+        echo 'cd  /data/local/tmp/benchmark_test' > adb_run_cmd.txt
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --warmUpLoopCount=1 --loopCount=1 --fp16Priority=true --accuracyThreshold=5' >> "${run_benchmark_log_file}"
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --warmUpLoopCount=1 --loopCount=1 --fp16Priority=true --accuracyThreshold=5' >> adb_run_cmd.txt
+        adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_benchmark_log_file}"
+        if [ $? = 0 ]; then
+            run_result='arm64_gpu_fp16: '${model_name}' pass'
+            echo ${run_result} >> ${run_benchmark_result_file}
+        else
+            run_result='arm64_gpu_fp16: '${model_name}' failed'
+            echo ${run_result} >> ${run_benchmark_result_file}
+            return 1
+        fi
+        # run benchmark test without clib data
+        echo ${model_name} >> "${run_benchmark_log_file}"
+        echo 'cd  /data/local/tmp/benchmark_test' > adb_run_cmd.txt
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --fp16Priority=true --accuracyThreshold=5' >> "${run_benchmark_log_file}"
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --fp16Priority=true --accuracyThreshold=5' >> adb_run_cmd.txt
+        adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_benchmark_log_file}"
+        if [ $? = 0 ]; then
+            run_result='arm64_gpu_fp16: '${model_name}' pass'
+            echo ${run_result} >> ${run_benchmark_result_file}
+        else
+            run_result='arm64_gpu_fp16: '${model_name}' failed'
+	    echo ${run_result} >> ${run_benchmark_result_file}
+            return 1
+        fi
+	#sleep 1
+    done < ${models_fp16_gpu_config}
+
    # Run mindir converted models:
    while read line; do
        model_name=${line}
@ -574,6 +610,7 @@ models_onnx_config=${basepath}/models_onnx.cfg
 models_fp16_config=${basepath}/models_fp16.cfg
 models_mindspore_config=${basepath}/models_mindspore.cfg
 models_tflite_gpu_config=${basepath}/models_tflite_gpu.cfg
+models_fp16_gpu_config=${basepath}/models_fp16_gpu.cfg

 ms_models_path=${basepath}/ms_models