forked from mindspore-Ecosystem/mindspore
!1867 Add unique process for duplicated indices in cpu kernel
Merge pull request !1867 from YuJianfeng/master
This commit is contained in:
commit
f1199f2100
|
@ -547,5 +547,38 @@ int Sign(float x) {
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
|
||||
size_t outer_dim) {
|
||||
MS_EXCEPTION_IF_NULL(origin_sparse_grad.value_);
|
||||
MS_EXCEPTION_IF_NULL(origin_sparse_grad.indices_);
|
||||
MS_EXCEPTION_IF_NULL(unique_grad);
|
||||
MS_EXCEPTION_IF_NULL(unique_grad->value_);
|
||||
MS_EXCEPTION_IF_NULL(unique_grad->indices_);
|
||||
std::unordered_map<int, size_t> index_map;
|
||||
size_t unique_indices_size = 0;
|
||||
for (size_t i = 0; i < origin_sparse_grad.indices_size_; ++i) {
|
||||
int index = origin_sparse_grad.indices_[i];
|
||||
if (index < 0 || (size_t)index >= first_dim) {
|
||||
continue;
|
||||
}
|
||||
auto iter = index_map.find(index);
|
||||
if (iter == index_map.end()) {
|
||||
index_map[index] = unique_indices_size;
|
||||
unique_grad->indices_[unique_indices_size] = index;
|
||||
for (size_t j = unique_indices_size * outer_dim, k = i * outer_dim; j < (unique_indices_size + 1) * outer_dim;
|
||||
++j, ++k) {
|
||||
unique_grad->value_[j] = origin_sparse_grad.value_[k];
|
||||
}
|
||||
unique_indices_size++;
|
||||
} else {
|
||||
size_t first_index = iter->second;
|
||||
for (size_t j = first_index * outer_dim, k = i * outer_dim; j < (first_index + 1) * outer_dim; ++j, ++k) {
|
||||
unique_grad->value_[j] += origin_sparse_grad.value_[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
unique_grad->indices_size_ = unique_indices_size;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -69,6 +69,12 @@ class KernelMeta {
|
|||
std::unordered_map<std::string, std::string> kernel_meta_map_;
|
||||
};
|
||||
|
||||
struct SparseGradient {
|
||||
float *value_;
|
||||
int *indices_;
|
||||
size_t indices_size_;
|
||||
};
|
||||
|
||||
bool CheckCache(const std::string &kernel_name);
|
||||
KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor);
|
||||
KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor);
|
||||
|
@ -84,6 +90,8 @@ void SaveJsonInfo(const std::string &json_name, const std::string &info);
|
|||
std::string GetProcessor(const AnfNodePtr &anf_node);
|
||||
bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b);
|
||||
int Sign(float x);
|
||||
void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
|
||||
size_t outer_dim);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -84,28 +84,35 @@ bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
|
|||
auto grad = reinterpret_cast<float *>(inputs[3]->addr);
|
||||
auto indices = reinterpret_cast<int *>(inputs[4]->addr);
|
||||
|
||||
for (size_t i = 0; i < indices_size_; ++i) {
|
||||
int index = indices[i];
|
||||
if ((size_t)index >= var_first_dim_size_) {
|
||||
MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range";
|
||||
std::vector<float> new_grad(indices_size_ * var_outer_dim_size_);
|
||||
std::vector<int> new_indices(indices_size_);
|
||||
SparseGradient unique_sparse_grad({new_grad.data(), new_indices.data(), indices_size_});
|
||||
DeduplicateIndexedSlices(SparseGradient({grad, indices, indices_size_}), &unique_sparse_grad, var_first_dim_size_,
|
||||
var_outer_dim_size_);
|
||||
|
||||
for (size_t i = 0; i < unique_sparse_grad.indices_size_; ++i) {
|
||||
int index = unique_sparse_grad.indices_[i];
|
||||
if (index < 0 || (size_t)index >= var_first_dim_size_) {
|
||||
MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range after unique process";
|
||||
}
|
||||
for (size_t j = var_outer_dim_size_ * index, k = var_outer_dim_size_ * i; j < var_outer_dim_size_ * (index + 1);
|
||||
++j, ++k) {
|
||||
auto accum_new = accum[j] + grad[k] * grad[k];
|
||||
auto summed_grad = unique_sparse_grad.value_[k];
|
||||
auto accum_new = accum[j] + summed_grad * summed_grad;
|
||||
if (lr_power_ == -0.5) {
|
||||
linear[j] += grad[k] - (sqrt(accum_new) - sqrt(accum[j])) / lr_ * var[j];
|
||||
linear[j] += summed_grad - (std::sqrt(accum_new) - std::sqrt(accum[j])) / lr_ * var[j];
|
||||
} else {
|
||||
linear[j] += grad[k] - (pow(accum_new, -lr_power_) - pow(accum[j], -lr_power_)) / lr_ * var[j];
|
||||
linear[j] += summed_grad - (std::pow(accum_new, -lr_power_) - std::pow(accum[j], -lr_power_)) / lr_ * var[j];
|
||||
}
|
||||
auto x = Sign(linear[j]) * l1_ - linear[j];
|
||||
float y;
|
||||
if (lr_power_ == -0.5) {
|
||||
y = sqrt(accum_new) / lr_ + 2 * l2_;
|
||||
y = std::sqrt(accum_new) / lr_ + 2 * l2_;
|
||||
} else {
|
||||
y = pow(accum_new, -lr_power_) / lr_ + 2 * l2_;
|
||||
y = std::pow(accum_new, -lr_power_) / lr_ + 2 * l2_;
|
||||
}
|
||||
auto pre_shrink = x / y;
|
||||
var[j] = abs(linear[j]) > l1_ ? pre_shrink : 0;
|
||||
var[j] = std::fabs(linear[j]) > l1_ ? pre_shrink : 0;
|
||||
accum[j] = accum_new;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include "common/common_test.h"
|
||||
#include "kernel/common_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class CommonUtilTest : public UT::Common {
|
||||
public:
|
||||
CommonUtilTest() = default;
|
||||
};
|
||||
|
||||
TEST_F(CommonUtilTest, DeduplicateIndexedSlicesTest1) {
|
||||
// The indices is a vector and the grad is a tensor with shape (6, 2)
|
||||
/* 0
|
||||
* 0
|
||||
* 1
|
||||
* 1
|
||||
* 0
|
||||
* 3
|
||||
*/
|
||||
std::vector<int> indices{0, 0, 1, 1, 0, 3};
|
||||
/* 0 1
|
||||
* 2 3
|
||||
* 4 5
|
||||
* 6 7
|
||||
* 8 9
|
||||
* 10 11
|
||||
*/
|
||||
std::vector<float> grad;
|
||||
for (int i = 0; i < 6 * 2; i++) {
|
||||
grad.push_back(i);
|
||||
}
|
||||
std::vector<int> unique_indices(3);
|
||||
std::vector<float> summed_grad(6);
|
||||
SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 0});
|
||||
DeduplicateIndexedSlices(SparseGradient({grad.data(), indices.data(), 6}), &unique_grad, 6, 2);
|
||||
EXPECT_EQ(unique_grad.indices_size_, 3);
|
||||
EXPECT_EQ(unique_indices, std::vector<int>({0, 1, 3}));
|
||||
/* 10 13
|
||||
* 10 12
|
||||
* 10 11
|
||||
*/
|
||||
EXPECT_EQ(summed_grad, std::vector<float>({10, 13, 10, 12, 10, 11}));
|
||||
}
|
||||
|
||||
TEST_F(CommonUtilTest, DeduplicateIndexedSlicesTest2) {
|
||||
// The indices is a vector and the grad is a tensor with shape (6, 2)
|
||||
/* 0
|
||||
* 0
|
||||
* 1
|
||||
* 1
|
||||
* 0
|
||||
* 6
|
||||
*/
|
||||
std::vector<int> indices{0, 0, 1, 1, 0, 6};
|
||||
/* 0 1
|
||||
* 2 3
|
||||
* 4 5
|
||||
* 6 7
|
||||
* 8 9
|
||||
* 10 11
|
||||
*/
|
||||
std::vector<float> grad;
|
||||
for (int i = 0; i < 6 * 2; i++) {
|
||||
grad.push_back(i);
|
||||
}
|
||||
std::vector<int> unique_indices(2);
|
||||
std::vector<float> summed_grad(4);
|
||||
SparseGradient unique_grad({summed_grad.data(), unique_indices.data(), 0});
|
||||
DeduplicateIndexedSlices(SparseGradient({grad.data(), indices.data(), 6}), &unique_grad, 6, 2);
|
||||
EXPECT_EQ(unique_grad.indices_size_, 2);
|
||||
EXPECT_EQ(unique_indices, std::vector<int>({0, 1}));
|
||||
/* 10 13
|
||||
* 10 12
|
||||
*/
|
||||
EXPECT_EQ(summed_grad, std::vector<float>({10, 13, 10, 12}));
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
Loading…
Reference in New Issue