!1867 Add unique process for duplicated indices in cpu kernel

Merge pull request !1867 from YuJianfeng/master
2020-06-09 14:11:26 +08:00 · 2020-06-09 14:11:26 +08:00 · f1199f2100
parent 5499161531 2ff9e74d07
commit f1199f2100
4 changed files with 153 additions and 10 deletions
--- a/mindspore/ccsrc/kernel/common_utils.cc
+++ b/mindspore/ccsrc/kernel/common_utils.cc
@ -547,5 +547,38 @@ int Sign(float x) {
  }
  return 0;
 }
+
+void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
+                              size_t outer_dim) {
+  MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
+  MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
+  MS_EXCEPTION_IF_NULL(unique_grad);
+  MS_EXCEPTION_IF_NULL(unique_grad->value_);
+  MS_EXCEPTION_IF_NULL(unique_grad->indices_);
+  std::unordered_map<int, size_t> index_map;
+  size_t unique_indices_size = 0;
+  for (size_t i = 0; i < origin_sparse_grad.indices_size_; ++i) {
+    int index = origin_sparse_grad.indices_[i];
+    if (index < 0 || (size_t)index >= first_dim) {
+      continue;
+    }
+    auto iter = index_map.find(index);
+    if (iter == index_map.end()) {
+      index_map[index] = unique_indices_size;
+      unique_grad->indices_[unique_indices_size] = index;
+      for (size_t j = unique_indices_size * outer_dim, k = i * outer_dim; j < (unique_indices_size + 1) * outer_dim;
+           ++j, ++k) {
+        unique_grad->value_[j] = origin_sparse_grad.value_[k];
+      }
+      unique_indices_size++;
+    } else {
+      size_t first_index = iter->second;
+      for (size_t j = first_index * outer_dim, k = i * outer_dim; j < (first_index + 1) * outer_dim; ++j, ++k) {
+        unique_grad->value_[j] += origin_sparse_grad.value_[k];
+      }
+    }
+  }
+  unique_grad->indices_size_ = unique_indices_size;
+}
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/kernel/common_utils.h
+++ b/mindspore/ccsrc/kernel/common_utils.h
@ -69,6 +69,12 @@ class KernelMeta {
  std::unordered_map<std::string, std::string> kernel_meta_map_;
 };

+struct SparseGradient {
+  float *value_;
+  int *indices_;
+  size_t indices_size_;
+};
+
 bool CheckCache(const std::string &kernel_name);
 KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor);
 KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor);
@ -84,6 +90,8 @@ void SaveJsonInfo(const std::string &json_name, const std::string &info);
 std::string GetProcessor(const AnfNodePtr &anf_node);
 bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b);
 int Sign(float x);
+void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
+                              size_t outer_dim);
 }  // namespace kernel
 }  // namespace mindspore

--- a/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/sparse_apply_ftrl_cpu_kernel.cc
@ -84,28 +84,35 @@ bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
  auto grad = reinterpret_cast<float *>(inputs[3]->addr);
  auto indices = reinterpret_cast<int *>(inputs[4]->addr);

-  for (size_t i = 0; i < indices_size_; ++i) {
-    int index = indices[i];
-    if ((size_t)index >= var_first_dim_size_) {
-      MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range";
+  std::vector<float> new_grad(indices_size_ * var_outer_dim_size_);
+  std::vector<int> new_indices(indices_size_);
+  SparseGradient unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size_});
+  DeduplicateIndexedSlices(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_,
+                           var_outer_dim_size_);
+
+  for (size_t i = 0; i < unique_sparse_grad.indices_size_; ++i) {
+    int index = unique_sparse_grad.indices_[i];
+    if (index < 0 || (size_t)index >= var_first_dim_size_) {
+      MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range after unique process";
    }
    for (size_t j = var_outer_dim_size_ * index, k = var_outer_dim_size_ * i; j < var_outer_dim_size_ * (index + 1);
         ++j, ++k) {
-      auto accum_new = accum[j] + grad[k] * grad[k];
+      auto summed_grad = unique_sparse_grad.value_[k];
+      auto accum_new = accum[j] + summed_grad * summed_grad;
      if (lr_power_ == -0.5) {
-        linear[j] += grad[k] - (sqrt(accum_new) - sqrt(accum[j])) / lr_ * var[j];
+        linear[j] += summed_grad - (std::sqrt(accum_new) - std::sqrt(accum[j])) / lr_ * var[j];
      } else {
-        linear[j] += grad[k] - (pow(accum_new, -lr_power_) - pow(accum[j], -lr_power_)) / lr_ * var[j];
+        linear[j] += summed_grad - (std::pow(accum_new, -lr_power_) - std::pow(accum[j], -lr_power_)) / lr_ * var[j];
      }
      auto x = Sign(linear[j]) * l1_ - linear[j];
      float y;
      if (lr_power_ == -0.5) {
-        y = sqrt(accum_new) / lr_ + 2 * l2_;
+        y = std::sqrt(accum_new) / lr_ + 2 * l2_;
      } else {
-        y = pow(accum_new, -lr_power_) / lr_ + 2 * l2_;
+        y = std::pow(accum_new, -lr_power_) / lr_ + 2 * l2_;
      }
      auto pre_shrink = x / y;
-      var[j] = abs(linear[j]) > l1_ ? pre_shrink : 0;
+      var[j] = std::fabs(linear[j]) > l1_ ? pre_shrink : 0;
      accum[j] = accum_new;
    }
  }
--- a/tests/ut/cpp/kernel/common_utils_test.cc
+++ b/tests/ut/cpp/kernel/common_utils_test.cc
@ -0,0 +1,95 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include "common/common_test.h"
+#include "kernel/common_utils.h"
+
+namespace mindspore {
+namespace kernel {
+class CommonUtilTest : public UT::Common {
+ public:
+  CommonUtilTest() = default;
+};
+
+TEST_F(CommonUtilTest, DeduplicateIndexedSlicesTest1) {
+  // The indices is a vector and the grad is a tensor with shape (6, 2)
+  /* 0
+   * 0
+   * 1
+   * 1
+   * 0
+   * 3
+   */
+  std::vector<int> indices{0, 0, 1, 1, 0, 3};
+  /* 0 1
+   * 2 3
+   * 4 5
+   * 6 7
+   * 8 9
+   * 10 11
+   */
+  std::vector<float> grad;
+  for (int i = 0; i < 6 * 2; i++) {
+    grad.push_back(i);
+  }
+  std::vector<int> unique_indices(3);
+  std::vector<float> summed_grad(6);
+  SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 0});
+  DeduplicateIndexedSlices(SparseGradient({grad.data(), indices.data(), 6}), &unique_grad, 6, 2);
+  EXPECT_EQ(unique_grad.indices_size_, 3);
+  EXPECT_EQ(unique_indices, std::vector<int>({0, 1, 3}));
+  /* 10 13
+   * 10 12
+   * 10 11
+   */
+  EXPECT_EQ(summed_grad, std::vector<float>({10, 13, 10, 12, 10, 11}));
+}
+
+TEST_F(CommonUtilTest, DeduplicateIndexedSlicesTest2) {
+  // The indices is a vector and the grad is a tensor with shape (6, 2)
+  /* 0
+   * 0
+   * 1
+   * 1
+   * 0
+   * 6
+   */
+  std::vector<int> indices{0, 0, 1, 1, 0, 6};
+  /* 0 1
+   * 2 3
+   * 4 5
+   * 6 7
+   * 8 9
+   * 10 11
+   */
+  std::vector<float> grad;
+  for (int i = 0; i < 6 * 2; i++) {
+    grad.push_back(i);
+  }
+  std::vector<int> unique_indices(2);
+  std::vector<float> summed_grad(4);
+  SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 0});
+  DeduplicateIndexedSlices(SparseGradient({grad.data(), indices.data(), 6}), &unique_grad, 6, 2);
+  EXPECT_EQ(unique_grad.indices_size_, 2);
+  EXPECT_EQ(unique_indices, std::vector<int>({0, 1}));
+  /* 10 13
+   * 10 12
+   */
+  EXPECT_EQ(summed_grad, std::vector<float>({10, 13, 10, 12}));
+}
+}  // namespace kernel
+}  // namespace mindspore