!8106 add cpu adam delta

Merge pull request !8106 from kisnwang/add-cpu-adam-delta
This commit is contained in:
mindspore-ci-bot 2020-11-06 17:30:24 +08:00 committed by Gitee
commit 073d991de4
4 changed files with 324 additions and 0 deletions

View File

@ -0,0 +1,174 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/adam_delta_cpu_kernel.h"
#include <thread>
#include <vector>
#include <string>
#include <memory>
#include "backend/kernel_compiler/common_utils.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
constexpr size_t kAdamDeltaInputSize = 9;
constexpr size_t kUsedThreadNum = 23;
namespace {
struct ComputeParam {
float *delta_{nullptr};
float *m_{nullptr};
float *v_{nullptr};
float *grad_{nullptr};
float beta1_{0};
float beta2_{0};
float epsilon_{0};
float lr_{0};
bool use_nesterov_{0};
};
void ComputeWeightDelta(const std::shared_ptr<ComputeParam> &input_params, size_t start, size_t end) {
MS_EXCEPTION_IF_NULL(input_params);
MS_EXCEPTION_IF_NULL(input_params->delta_);
MS_EXCEPTION_IF_NULL(input_params->m_);
MS_EXCEPTION_IF_NULL(input_params->v_);
MS_EXCEPTION_IF_NULL(input_params->grad_);
auto delta = input_params->delta_;
auto m = input_params->m_;
auto v = input_params->v_;
auto lr = input_params->lr_;
auto beta1 = input_params->beta1_;
auto beta2 = input_params->beta2_;
auto epsilon = input_params->epsilon_;
auto use_nesterov = input_params->use_nesterov_;
auto grad = input_params->grad_;
for (size_t i = start; i < end; ++i) {
m[i] *= beta1;
v[i] *= beta2;
m[i] += (1 - beta1) * grad[i];
v[i] += (1 - beta2) * grad[i] * grad[i];
if (use_nesterov) {
delta[i] = -lr * (m[i] * beta1 + (1 - beta1) * grad[i]) / (std::sqrt(v[i]) + epsilon);
} else {
delta[i] = -lr * m[i] / (std::sqrt(v[i]) + epsilon);
}
}
}
} // namespace
void AdamDeltaCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> delta_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
std::vector<size_t> m_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> v_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> grad_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 8);
if (!IsSameShape(delta_shape, m_shape)) {
MS_LOG(EXCEPTION) << "Delta and m should have the same shape";
}
if (!IsSameShape(delta_shape, v_shape)) {
MS_LOG(EXCEPTION) << "Delta and v should have the same shape";
}
if (!IsSameShape(delta_shape, grad_shape)) {
MS_LOG(EXCEPTION) << "Delta and grad should have the same shape";
}
if (delta_shape.empty()) {
MS_LOG(EXCEPTION) << "Delta must be at least 1D";
}
elem_num_ = 1;
for (size_t i = 0; i < delta_shape.size(); ++i) {
elem_num_ *= delta_shape[i];
}
if (elem_num_ < 1) {
MS_LOG(EXCEPTION) << "Invalid delta shape";
}
if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) {
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "use_nesterov");
}
}
void AdamDeltaCPUKernel::CheckParams(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) const {
if (inputs.size() != kAdamDeltaInputSize) {
MS_LOG(EXCEPTION) << "Error input size!";
}
size_t elem_size = elem_num_ * 4;
std::vector<size_t> expect_sizes = {elem_size, elem_size, 4, 4, 4, 4, 4, 4, elem_size};
std::vector<std::string> input_names = {"m", "v", "beta1_power", "beta2_power", "lr",
"beta1", "beta2", "epsilon", "grad"};
for (size_t i = 0; i < kAdamDeltaInputSize; ++i) {
if (inputs[i]->size != expect_sizes[i]) {
MS_LOG(EXCEPTION) << "Error input " << input_names[i] << " size!";
}
}
if (outputs.size() < 1 || outputs[0]->size != elem_size) {
MS_LOG(EXCEPTION) << "Error output delta size!";
}
}
bool AdamDeltaCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
CheckParams(inputs, workspace, outputs);
auto m = reinterpret_cast<float *>(inputs[0]->addr);
auto v = reinterpret_cast<float *>(inputs[1]->addr);
auto beta1_power = reinterpret_cast<float *>(inputs[2]->addr)[0];
if (beta1_power == 1) {
MS_LOG(EXCEPTION) << "The beta1_power should not be 1";
}
auto beta2_power = reinterpret_cast<float *>(inputs[3]->addr)[0];
auto lr = reinterpret_cast<float *>(inputs[4]->addr)[0];
auto beta1 = reinterpret_cast<float *>(inputs[5]->addr)[0];
auto beta2 = reinterpret_cast<float *>(inputs[6]->addr)[0];
auto epsilon = reinterpret_cast<float *>(inputs[7]->addr)[0];
auto grad = reinterpret_cast<float *>(inputs[8]->addr);
auto delta = reinterpret_cast<float *>(outputs[0]->addr);
lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power);
size_t thread_num = kUsedThreadNum;
if (elem_num_ < thread_num) {
thread_num = elem_num_;
}
std::vector<std::thread> threads;
std::vector<std::shared_ptr<ComputeParam>> thread_params;
threads.reserve(thread_num);
size_t end = 0;
size_t offset = elem_num_ / thread_num;
size_t left = elem_num_ % thread_num;
for (size_t i = 0; i < thread_num; ++i) {
auto params = std::make_shared<ComputeParam>();
params->delta_ = delta;
params->m_ = m;
params->v_ = v;
params->grad_ = grad;
params->beta1_ = beta1;
params->beta2_ = beta2;
params->use_nesterov_ = use_nesterov_;
params->lr_ = lr;
params->epsilon_ = epsilon;
size_t start = end;
end = start + offset;
if (i < left) {
end += 1;
}
threads.emplace_back(std::thread(ComputeWeightDelta, params, start, end));
thread_params.emplace_back(params);
}
for (size_t i = 0; i < thread_num; ++i) {
threads[i].join();
}
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,56 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ADAM_DELTA_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ADAM_DELTA_CPU_KERNEL_H_
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
class AdamDeltaCPUKernel : public CPUKernel {
public:
AdamDeltaCPUKernel() = default;
~AdamDeltaCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
protected:
void CheckParams(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) const;
bool use_nesterov_{false};
size_t elem_num_{0};
};
MS_REG_CPU_KERNEL(AdamDelta,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
AdamDeltaCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ADAM_DELTA_CPU_KERNEL_H_

View File

@ -110,6 +110,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"../../../mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc"
"../../../mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc"
"../../../mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.cc"
"../../../mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc"
"../../../mindspore/ccsrc/backend/kernel_compiler/akg/*.cc"
"../../../mindspore/ccsrc/backend/kernel_compiler/rts/*.cc"
"../../../mindspore/ccsrc/backend/kernel_compiler/hccl/*.cc"

View File

@ -0,0 +1,93 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vector>
#include "common/common_test.h"
#define private public
#define protected public
#include "backend/kernel_compiler/cpu/adam_delta_cpu_kernel.h"
#undef private
#undef protected
namespace mindspore {
namespace kernel {
class AdamDeltaCpuKernelTest : public UT::Common {
public:
AdamDeltaCpuKernelTest() : adam_delta_(std::make_shared<AdamDeltaCPUKernel>()) {}
void SetUp() override {
delta_.clear();
m_.clear();
v_.clear();
grad_.clear();
inputs_.clear();
workspace_.clear();
outputs_.clear();
}
AddressPtr CreateKernelAddress(void *addr, size_t elem_num) {
auto kernel_addr = std::make_shared<Address>();
kernel_addr->addr = addr;
kernel_addr->size = elem_num * 4;
return kernel_addr;
}
void CreateAddress() {
inputs_.push_back(CreateKernelAddress(m_.data(), elem_num_));
inputs_.push_back(CreateKernelAddress(v_.data(), elem_num_));
inputs_.push_back(CreateKernelAddress(&beta1_power_, 1));
inputs_.push_back(CreateKernelAddress(&beta2_power_, 1));
inputs_.push_back(CreateKernelAddress(&lr_, 1));
inputs_.push_back(CreateKernelAddress(&beta1_, 1));
inputs_.push_back(CreateKernelAddress(&beta2_, 1));
inputs_.push_back(CreateKernelAddress(&epsilon_, 1));
inputs_.push_back(CreateKernelAddress(grad_.data(), elem_num_));
outputs_.push_back(CreateKernelAddress(delta_.data(), elem_num_));
}
std::vector<float> delta_;
std::vector<float> m_;
std::vector<float> v_;
std::vector<float> grad_;
std::vector<AddressPtr> inputs_;
std::vector<AddressPtr> workspace_;
std::vector<AddressPtr> outputs_;
std::shared_ptr<AdamDeltaCPUKernel> adam_delta_;
float beta1_power_ = 0.9;
float beta2_power_ = 0.999;
float lr_ = 0.001;
float beta1_ = 0.9;
float beta2_ = 0.999;
float epsilon_ = 1e-8;
size_t elem_num_ = 27;
};
TEST_F(AdamDeltaCpuKernelTest, compute_test) {
for (size_t i = 0; i < elem_num_; ++i) {
delta_.push_back(1.0);
m_.push_back(1.0);
v_.push_back(1.0);
grad_.push_back(1.0);
}
adam_delta_->elem_num_ = elem_num_;
CreateAddress();
adam_delta_->Launch(inputs_, workspace_, outputs_);
for (size_t i = 0; i < elem_num_; ++i) {
EXPECT_TRUE(std::fabs(delta_[i] + 0.000316) < 1e-6);
}
}
} // namespace kernel
} // namespace mindspore