!1867 Add unique process for duplicated indices in cpu kernel

Merge pull request !1867 from YuJianfeng/master
This commit is contained in:
mindspore-ci-bot 2020-06-09 14:11:26 +08:00 committed by Gitee
commit f1199f2100
4 changed files with 153 additions and 10 deletions

View File

@ -547,5 +547,38 @@ int Sign(float x) {
}
return 0;
}
void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim) {
MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
MS_EXCEPTION_IF_NULL(unique_grad);
MS_EXCEPTION_IF_NULL(unique_grad->value_);
MS_EXCEPTION_IF_NULL(unique_grad->indices_);
std::unordered_map<int, size_t> index_map;
size_t unique_indices_size = 0;
for (size_t i = 0; i < origin_sparse_grad.indices_size_; ++i) {
int index = origin_sparse_grad.indices_[i];
if (index < 0 || (size_t)index >= first_dim) {
continue;
}
auto iter = index_map.find(index);
if (iter == index_map.end()) {
index_map[index] = unique_indices_size;
unique_grad->indices_[unique_indices_size] = index;
for (size_t j = unique_indices_size * outer_dim, k = i * outer_dim; j < (unique_indices_size + 1) * outer_dim;
++j, ++k) {
unique_grad->value_[j] = origin_sparse_grad.value_[k];
}
unique_indices_size++;
} else {
size_t first_index = iter->second;
for (size_t j = first_index * outer_dim, k = i * outer_dim; j < (first_index + 1) * outer_dim; ++j, ++k) {
unique_grad->value_[j] += origin_sparse_grad.value_[k];
}
}
}
unique_grad->indices_size_ = unique_indices_size;
}
} // namespace kernel
} // namespace mindspore

View File

@ -69,6 +69,12 @@ class KernelMeta {
std::unordered_map<std::string, std::string> kernel_meta_map_;
};
struct SparseGradient {
float *value_;
int *indices_;
size_t indices_size_;
};
bool CheckCache(const std::string &kernel_name);
KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor);
KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor);
@ -84,6 +90,8 @@ void SaveJsonInfo(const std::string &json_name, const std::string &info);
std::string GetProcessor(const AnfNodePtr &anf_node);
bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b);
int Sign(float x);
void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim);
} // namespace kernel
} // namespace mindspore

View File

@ -84,28 +84,35 @@ bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
auto grad = reinterpret_cast<float *>(inputs[3]->addr);
auto indices = reinterpret_cast<int *>(inputs[4]->addr);
for (size_t i = 0; i < indices_size_; ++i) {
int index = indices[i];
if ((size_t)index >= var_first_dim_size_) {
MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range";
std::vector<float> new_grad(indices_size_ * var_outer_dim_size_);
std::vector<int> new_indices(indices_size_);
SparseGradient unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size_});
DeduplicateIndexedSlices(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_,
var_outer_dim_size_);
for (size_t i = 0; i < unique_sparse_grad.indices_size_; ++i) {
int index = unique_sparse_grad.indices_[i];
if (index < 0 || (size_t)index >= var_first_dim_size_) {
MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range after unique process";
}
for (size_t j = var_outer_dim_size_ * index, k = var_outer_dim_size_ * i; j < var_outer_dim_size_ * (index + 1);
++j, ++k) {
auto accum_new = accum[j] + grad[k] * grad[k];
auto summed_grad = unique_sparse_grad.value_[k];
auto accum_new = accum[j] + summed_grad * summed_grad;
if (lr_power_ == -0.5) {
linear[j] += grad[k] - (sqrt(accum_new) - sqrt(accum[j])) / lr_ * var[j];
linear[j] += summed_grad - (std::sqrt(accum_new) - std::sqrt(accum[j])) / lr_ * var[j];
} else {
linear[j] += grad[k] - (pow(accum_new, -lr_power_) - pow(accum[j], -lr_power_)) / lr_ * var[j];
linear[j] += summed_grad - (std::pow(accum_new, -lr_power_) - std::pow(accum[j], -lr_power_)) / lr_ * var[j];
}
auto x = Sign(linear[j]) * l1_ - linear[j];
float y;
if (lr_power_ == -0.5) {
y = sqrt(accum_new) / lr_ + 2 * l2_;
y = std::sqrt(accum_new) / lr_ + 2 * l2_;
} else {
y = pow(accum_new, -lr_power_) / lr_ + 2 * l2_;
y = std::pow(accum_new, -lr_power_) / lr_ + 2 * l2_;
}
auto pre_shrink = x / y;
var[j] = abs(linear[j]) > l1_ ? pre_shrink : 0;
var[j] = std::fabs(linear[j]) > l1_ ? pre_shrink : 0;
accum[j] = accum_new;
}
}

View File

@ -0,0 +1,95 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vector>
#include "common/common_test.h"
#include "kernel/common_utils.h"
namespace mindspore {
namespace kernel {
class CommonUtilTest : public UT::Common {
public:
CommonUtilTest() = default;
};
TEST_F(CommonUtilTest, DeduplicateIndexedSlicesTest1) {
// The indices is a vector and the grad is a tensor with shape (6, 2)
/* 0
* 0
* 1
* 1
* 0
* 3
*/
std::vector<int> indices{0, 0, 1, 1, 0, 3};
/* 0 1
* 2 3
* 4 5
* 6 7
* 8 9
* 10 11
*/
std::vector<float> grad;
for (int i = 0; i < 6 * 2; i++) {
grad.push_back(i);
}
std::vector<int> unique_indices(3);
std::vector<float> summed_grad(6);
SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 0});
DeduplicateIndexedSlices(SparseGradient({grad.data(), indices.data(), 6}), &unique_grad, 6, 2);
EXPECT_EQ(unique_grad.indices_size_, 3);
EXPECT_EQ(unique_indices, std::vector<int>({0, 1, 3}));
/* 10 13
* 10 12
* 10 11
*/
EXPECT_EQ(summed_grad, std::vector<float>({10, 13, 10, 12, 10, 11}));
}
TEST_F(CommonUtilTest, DeduplicateIndexedSlicesTest2) {
// The indices is a vector and the grad is a tensor with shape (6, 2)
/* 0
* 0
* 1
* 1
* 0
* 6
*/
std::vector<int> indices{0, 0, 1, 1, 0, 6};
/* 0 1
* 2 3
* 4 5
* 6 7
* 8 9
* 10 11
*/
std::vector<float> grad;
for (int i = 0; i < 6 * 2; i++) {
grad.push_back(i);
}
std::vector<int> unique_indices(2);
std::vector<float> summed_grad(4);
SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 0});
DeduplicateIndexedSlices(SparseGradient({grad.data(), indices.data(), 6}), &unique_grad, 6, 2);
EXPECT_EQ(unique_grad.indices_size_, 2);
EXPECT_EQ(unique_indices, std::vector<int>({0, 1}));
/* 10 13
* 10 12
*/
EXPECT_EQ(summed_grad, std::vector<float>({10, 13, 10, 12}));
}
} // namespace kernel
} // namespace mindspore