!7613 [MSLITE][Develop] fix LshProjection kernel and refactor it

Merge pull request !7613 from sunsuodong/fix_lsh_projection
2020-10-22 17:26:55 +08:00 · 2020-10-22 17:26:55 +08:00 · cfa7211c0a
parent 00d40d36c0 cd0700e588
commit cfa7211c0a
4 changed files with 113 additions and 103 deletions
--- a/mindspore/lite/nnacl/lsh_projection_parameter.h
+++ b/mindspore/lite/nnacl/lsh_projection_parameter.h
@ -23,13 +23,10 @@ typedef struct LshProjectionParameter {
  OpParameter op_parameter_;
  int lsh_type_;
  int hash_shape_[2];
-  int in_item_num_;
-  size_t in_item_size_;
-  size_t seed_size_;
-  size_t key_size_;
-  int64_t real_dst_count;
-  int task_id_;
-  int64_t count_unit_;
+  int feature_num_;
+  char **hash_buffs_;
+  size_t hash_buff_size_;
+  int64_t thread_stride_;
 } LshProjectionParameter;

 #endif  // MINDSPORE_LITE_NNACL_LSH_PROJECTION_PARAMETER_H_
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection.cc
@ -23,7 +23,6 @@
 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
-using mindspore::lite::RET_MEMORY_FAILED;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_LshProjection;

@ -37,53 +36,77 @@ int LshProjectionCPUKernel::Init() {

 int LshProjectionCPUKernel::ReSize() { return RET_OK; }

+int LshProjectionRun(void *cdata, int task_id) {
+  auto kernel = reinterpret_cast<LshProjectionCPUKernel *>(cdata);
+  return kernel->DoExecute(task_id);
+}
+
 int LshProjectionCPUKernel::Run() {
-  auto input_tensor0 = in_tensors_.at(0);
-  auto input_tensor1 = in_tensors_.at(1);
-  auto out_tensor0 = out_tensors_.at(0);
+  auto input0_tensor = in_tensors_.at(0);
+  auto input1_tensor = in_tensors_.at(1);
+  auto out_tensor = out_tensors_.at(0);

-  hash = reinterpret_cast<float *>(input_tensor0->MutableData());
-  in_data = reinterpret_cast<char *>(input_tensor1->MutableData());
-  weight = in_tensors_.size() == 2 ? nullptr : reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
-  output = reinterpret_cast<int32_t *>(out_tensor0->MutableData());
+  hash_seed_ = reinterpret_cast<float *>(input0_tensor->MutableData());
+  feature_ = reinterpret_cast<int32_t *>(input1_tensor->MutableData());
+  weight_ = in_tensors_.size() == 2 ? nullptr : reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
+  output_ = reinterpret_cast<int32_t *>(out_tensor->MutableData());

-  const size_t seed_size = sizeof(float);
-  const size_t input_item_size =
-    input_tensor1->ElementsNum() * sizeof(input_tensor1->data_type()) / input_tensor1->DimensionSize(0);
-  const size_t key_size = seed_size + input_item_size;
-  lsh_param_->seed_size_ = seed_size;
-  lsh_param_->in_item_size_ = input_item_size;
-  lsh_param_->key_size_ = key_size;
-  lsh_param_->in_item_num_ = input_tensor1->DimensionSize(0);
-  memcpy(lsh_param_->hash_shape_, input_tensor0->shape().data(), sizeof(int) * input_tensor0->shape().size());
-
-  elements_num_ = input_tensor0->DimensionSize(0);
-  count_unit_ = thread_num_ > 1 ? UP_DIV(elements_num_, thread_num_) : elements_num_;
-  auto ret = ParallelLaunch(this->context_->thread_pool_, LshProjectionRun, this, thread_num_);
+  param_->hash_buff_size_ = sizeof(float) + sizeof(int32_t);
+  param_->feature_num_ = input1_tensor->ElementsNum();
+  param_->hash_shape_[0] = input0_tensor->DimensionSize(0);
+  param_->hash_shape_[1] = input0_tensor->DimensionSize(1);
+  param_->thread_stride_ = op_parameter_->thread_num_ > 1 ? UP_DIV(param_->hash_shape_[0], op_parameter_->thread_num_)
+                                                          : param_->hash_shape_[0];
+  auto ret = MallocKeys();
+  if (ret != RET_OK) {
+    return ret;
+  }
+  ret = ParallelLaunch(this->context_->thread_pool_, LshProjectionRun, this, op_parameter_->thread_num_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "LshProjection kernel parallel launch failed";
+  }
+  FreeKeys();
  return ret;
 }

-int LshProjectionRun(void *cdata, int task_id) {
-  auto lsh_projection = reinterpret_cast<LshProjectionCPUKernel *>(cdata);
-  lsh_projection->DoExecute(task_id);
+int LshProjectionCPUKernel::MallocKeys() {
+  param_->hash_buffs_ = static_cast<char **>(context_->allocator->Malloc(op_parameter_->thread_num_ * sizeof(char *)));
+  if (param_->hash_buffs_ == nullptr) {
+    MS_LOG(ERROR) << "Memory allocation failed";
+    return RET_ERROR;
+  }
+  for (int i = 0; i < op_parameter_->thread_num_; i++) {
+    param_->hash_buffs_[i] = static_cast<char *>(context_->allocator->Malloc(param_->hash_buff_size_));
+    if (param_->hash_buffs_[i] == nullptr) {
+      FreeKeys();
+      MS_LOG(ERROR) << "Memory allocation failed";
+      return RET_ERROR;
+    }
+  }
  return RET_OK;
 }

-int LshProjectionCPUKernel::DoExecute(int task_id) {
-  int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
-  lsh_param_->real_dst_count = real_dst_count;
-  lsh_param_->task_id_ = task_id;
-  lsh_param_->count_unit_ = count_unit_;
-  if (real_dst_count <= 0) {
-    return lite::RET_OK;
+void LshProjectionCPUKernel::FreeKeys() {
+  if (param_->hash_buffs_ != nullptr) {
+    for (int i = 0; i < op_parameter_->thread_num_; i++) {
+      context_->allocator->Free(param_->hash_buffs_[i]);
+    }
+    context_->allocator->Free(param_->hash_buffs_);
  }
+}

-  switch (lsh_param_->lsh_type_) {
+int LshProjectionCPUKernel::DoExecute(int task_id) {
+  int cur_group_num = MSMIN(param_->hash_shape_[0] - task_id * param_->thread_stride_, param_->thread_stride_);
+  int start = task_id * param_->thread_stride_;
+  int end = start + cur_group_num;
+  char *hash_buff = param_->hash_buffs_[task_id];
+
+  switch (param_->lsh_type_) {
    case schema::LshProjectionType_SPARSE:
-      LshProjectionSparse(hash, in_data, weight, output, lsh_param_);
+      LshProjectionSparse(hash_seed_, feature_, weight_, output_, param_, start, end, hash_buff);
      break;
    case schema::LshProjectionType_DENSE:
-      LshProjectionDense(hash, in_data, weight, output, lsh_param_);
+      LshProjectionDense(hash_seed_, feature_, weight_, output_, param_, start, end, hash_buff);
      break;
    default:
      return RET_ERROR;
@ -91,50 +114,43 @@ int LshProjectionCPUKernel::DoExecute(int task_id) {
  return RET_OK;
 }

-int LshProjectionCPUKernel::GetSignBit(char *in_data, float *weight, float seed, LshProjectionParameter *para) {
+int LshProjectionCPUKernel::GetSignBit(int32_t *feature_, float *weight_, float seed, LshProjectionParameter *para,
+                                       char *hash_buff) {
  double score = 0.0;
-  for (int i = 0; i < para->in_item_num_; i++) {
-    char *key = static_cast<char *>(context_->allocator->Malloc(lsh_param_->key_size_));
-    if (key == nullptr) {
-      MS_LOG(ERROR) << "malloc key failed.";
-      return RET_ERROR;
-    }
-    memcpy(key, &seed, para->seed_size_);
-    memcpy(key + para->seed_size_, in_data, para->in_item_size_);
-    in_data += para->in_item_size_;
-    int64_t hash_i = static_cast<int64_t>(mindspore::lite::StringHash64(key, para->key_size_));
+  for (int i = 0; i < para->feature_num_; i++) {
+    memcpy(hash_buff, &seed, sizeof(float));
+    memcpy(hash_buff + sizeof(float), &(feature_[i]), sizeof(int32_t));
+    int64_t hash_i = static_cast<int64_t>(lite::StringHash64(hash_buff, para->hash_buff_size_));
    double hash_d = static_cast<double>(hash_i);
-    if (weight == nullptr) {
+    if (weight_ == nullptr) {
      score += hash_d;
    } else {
-      score += weight[i] * hash_d;
+      score += weight_[i] * hash_d;
    }
-    context_->allocator->Free(key);
  }
  return (score > 0) ? 1 : 0;
 }

-void LshProjectionCPUKernel::LshProjectionSparse(float *hash, char *in_data, float *weight, int32_t *output,
-                                                 LshProjectionParameter *para) {
-  int start = para->task_id_ * para->count_unit_;
-  int end = start + para->real_dst_count;
+void LshProjectionCPUKernel::LshProjectionSparse(float *hash_seed_, int32_t *feature_, float *weight_, int32_t *output_,
+                                                 LshProjectionParameter *para, int32_t start, int32_t end,
+                                                 char *hash_buff) {
  for (int i = start; i < end; i++) {
    int32_t hash_sign = 0;
    for (int j = 0; j < para->hash_shape_[1]; j++) {
-      int bit = GetSignBit(in_data, weight, hash[i * para->hash_shape_[1] + j], para);
+      int bit = GetSignBit(feature_, weight_, hash_seed_[i * para->hash_shape_[1] + j], para, hash_buff);
      hash_sign = (hash_sign << 1) | bit;
    }
-    output[i] = hash_sign + i * (1 << para->hash_shape_[1]);
+    output_[i] = hash_sign + i * (1 << para->hash_shape_[1]);
  }
 }

-void LshProjectionCPUKernel::LshProjectionDense(float *hash, char *in_data, float *weight, int32_t *output,
-                                                LshProjectionParameter *para) {
-  int start = para->task_id_ * para->count_unit_;
-  int end = start + para->real_dst_count;
+void LshProjectionCPUKernel::LshProjectionDense(float *hash_seed_, int32_t *feature_, float *weight_, int32_t *output_,
+                                                LshProjectionParameter *para, int32_t start, int32_t end,
+                                                char *hash_buff) {
  for (int i = start; i < end; i++) {
    for (int j = 0; j < para->hash_shape_[1]; j++) {
-      output[i * para->hash_shape_[1] + j] = GetSignBit(in_data, weight, hash[i * para->hash_shape_[1] + j], para);
+      output_[i * para->hash_shape_[1] + j] =
+        GetSignBit(feature_, weight_, hash_seed_[i * para->hash_shape_[1] + j], para, hash_buff);
    }
  }
 }
@ -144,16 +160,6 @@ kernel::LiteKernel *CpuLshProjectionFp32KernelCreator(const std::vector<lite::Te
                                                      OpParameter *op_parameter, const lite::InnerContext *ctx,
                                                      const kernel::KernelKey &desc,
                                                      const mindspore::lite::PrimitiveC *primitive) {
-  if (op_parameter == nullptr) {
-    MS_LOG(ERROR) << "Input op_parameter is nullptr!";
-    return nullptr;
-  }
-  if (ctx == nullptr) {
-    MS_LOG(ERROR) << "Input context is nullptr!";
-    free(op_parameter);
-    return nullptr;
-  }
-  MS_ASSERT(desc.type == schema::PrimitiveType_LshProjection);
  auto *kernel = new (std::nothrow) LshProjectionCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "new LshProjectionCPUKernel fail!";
@ -171,5 +177,4 @@ kernel::LiteKernel *CpuLshProjectionFp32KernelCreator(const std::vector<lite::Te
 }

 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_LshProjection, CpuLshProjectionFp32KernelCreator)
-
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection.h
@ -28,8 +28,8 @@ class LshProjectionCPUKernel : public LiteKernel {
  LshProjectionCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                         const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {
-    lsh_param_ = reinterpret_cast<LshProjectionParameter *>(op_parameter_);
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
+    param_ = reinterpret_cast<LshProjectionParameter *>(op_parameter_);
  }
  ~LshProjectionCPUKernel() = default;

@ -37,23 +37,21 @@ class LshProjectionCPUKernel : public LiteKernel {
  int ReSize() override;
  int Run() override;
  int DoExecute(int task_id);
-  int GetSignBit(char *in_data, float *weight, float seed, LshProjectionParameter *para);
-  void LshProjectionSparse(float *hash, char *in_data, float *weight, int32_t *output, LshProjectionParameter *param);
-  void LshProjectionDense(float *hash, char *in_data, float *weight, int32_t *output, LshProjectionParameter *param);

 private:
-  LshProjectionParameter *lsh_param_ = nullptr;
-  int thread_num_;
-  int64_t elements_num_;
-  int64_t count_unit_;
-  float *hash = nullptr;
-  char *in_data = nullptr;
-  float *weight = nullptr;
-  int32_t *output = nullptr;
+  int MallocKeys();
+  void FreeKeys();
+  int GetSignBit(int32_t *feature_, float *weight_, float seed, LshProjectionParameter *para, char *hash_buff);
+  void LshProjectionSparse(float *hash_seed_, int32_t *feature_, float *weight_, int32_t *output_,
+                           LshProjectionParameter *param, int32_t start, int32_t end, char *hash_buff);
+  void LshProjectionDense(float *hash_seed_, int32_t *feature_, float *weight_, int32_t *output_,
+                          LshProjectionParameter *param, int32_t start, int32_t end, char *hash_buff);
+  LshProjectionParameter *param_ = nullptr;
+  float *hash_seed_ = nullptr;
+  int32_t *feature_ = nullptr;
+  float *weight_ = nullptr;
+  int32_t *output_ = nullptr;
 };
-
-int LshProjectionRun(void *cdata, int task_id);
-
 }  // namespace mindspore::kernel

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_LSH_PROJECTION_H_
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@ -98,17 +98,27 @@ int Benchmark::ReadInputFile() {
        MS_LOG(ERROR) << "ReadFile return nullptr";
        return RET_ERROR;
      }
-      auto tensor_data_size = cur_tensor->Size();
-      if (size != tensor_data_size) {
-        std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size
-                  << std::endl;
-        MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size;
-        delete bin_buf;
-        return RET_ERROR;
+      if (cur_tensor->data_type() == kObjectTypeString) {
+        std::string str(bin_buf, size);
+        auto ret = StringsToMSTensor({str}, cur_tensor);
+        if (ret != RET_OK) {
+          MS_LOG(ERROR) << "write strings to tensor failed";
+          delete[] bin_buf;
+          return RET_ERROR;
+        }
+      } else {
+        auto tensor_data_size = cur_tensor->Size();
+        if (size != tensor_data_size) {
+          std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size
+                    << std::endl;
+          MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size;
+          delete[] bin_buf;
+          return RET_ERROR;
+        }
+        auto input_data = cur_tensor->MutableData();
+        memcpy(input_data, bin_buf, tensor_data_size);
      }
-      auto input_data = cur_tensor->MutableData();
-      memcpy(input_data, bin_buf, tensor_data_size);
-      delete[](bin_buf);
+      delete[] bin_buf;
    }
  }
  return RET_OK;