!4401 add pad int8 multi thread

Merge pull request !4401 from zhaozhenlong/lite/op/pad_int8_threads
2020-08-13 22:26:38 +08:00 · 2020-08-13 22:26:38 +08:00 · d3ef9f77c5
parent b4b6e5c8ed 2b895074ce
commit d3ef9f77c5
5 changed files with 33 additions and 11 deletions
--- a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
@ -111,18 +111,35 @@ int PadInt8CPUKernel::Init() {
  return RET_OK;
 }

+int PadInt8CPUKernel::RunImpl(int task_id) {
+  return PadConstant4D(in_data_, out_data_, in_dims_, out_dims_, pad_param_->paddings_, task_id, context_->thread_num_);
+}
+
+int PadInt8Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
+  auto resize = reinterpret_cast<PadInt8CPUKernel *>(cdata);
+  auto error_code = resize->RunImpl(task_id);
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "Resize Run error task_id[" << task_id << "] error_code[" << error_code << "]";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
 int PadInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Prepare failed.";
    return RET_ERROR;
  }
-  int8_t *in_data = reinterpret_cast<int8_t *>(in_tensors_[0]->Data());
-  int8_t *out_data = reinterpret_cast<int8_t *>(out_tensors_[0]->Data());
+  in_data_ = reinterpret_cast<int8_t *>(in_tensors_[0]->Data());
+  out_data_ = reinterpret_cast<int8_t *>(out_tensors_[0]->Data());

-  memset(out_data, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t));
-  PadConstant4D(in_data, out_data, in_dims_, out_dims_, pad_param_->paddings_);
+  memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t));
+  int error_code = LiteBackendParallelLaunch(PadInt8Impl, this, context_->thread_num_);
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";
+    return RET_ERROR;
+  }
  return RET_OK;
 }
-
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.h
@ -38,6 +38,7 @@ class PadInt8CPUKernel : public LiteKernel {
  int Init() override;
  int ReSize() override;
  int Run() override;
+  int RunImpl(int task_id);

 private:
  int SetQuantParam();
@ -46,6 +47,8 @@ class PadInt8CPUKernel : public LiteKernel {

 private:
  PadParameter *pad_param_;
+  int8_t *in_data_;
+  int8_t *out_data_;
  int in_dims_[DEFAULT_PAD_NDIMS];
  int out_dims_[DEFAULT_PAD_NDIMS];
 };
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/pad.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/pad.c
@ -16,12 +16,13 @@

 #include "nnacl/int8/pad.h"
 #include "nnacl/common_func.h"
+#include "nnacl/errorcode.h"

-void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
-                   const int32_t *paddings) {
+int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
+                   const int32_t *paddings, const int tid, const int thread_num) {
  int32_t copy_size = in_dims[3];
  for (int n = 0; n < in_dims[0]; n++) {
-    for (int h = 0; h < in_dims[1]; h++) {
+    for (int h = tid; h < in_dims[1]; h += thread_num) {
      for (int w = 0; w < in_dims[2]; w++) {
        const int8_t *in = in_data + offset(in_dims, n, h, w, 0);
        int8_t *out = out_data + offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]);
@ -29,5 +30,5 @@ void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_di
      }
    }
  }
-  return;
+  return NNACL_OK;
 }
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/pad.h
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/pad.h
@ -24,8 +24,8 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-void PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
-                   const int32_t *paddings);
+int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dims, const int32_t *out_dims,
+                   const int32_t *paddings, const int tid, const int thread_num);
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/pad_int8_tests.cc
@ -183,6 +183,7 @@ TEST_F(TestPadInt8, PadInt8TestInit4) {
  std::vector<lite::tensor::Tensor *> outputs_;
  auto pad_param = new PadParameter();
  lite::Context *ctx = new lite::Context;
+  ctx->thread_num_ = 2;
  int8_t *correct;
  int total_size = PadInt8TestInit2(&inputs_, &outputs_, pad_param, &correct);
  kernel::PadInt8CPUKernel *pad =