forked from mindspore-Ecosystem/mindspore
!8106 add cpu adam delta
Merge pull request !8106 from kisnwang/add-cpu-adam-delta
This commit is contained in:
commit
073d991de4
|
@ -0,0 +1,174 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/adam_delta_cpu_kernel.h"
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr size_t kAdamDeltaInputSize = 9;
|
||||
constexpr size_t kUsedThreadNum = 23;
|
||||
namespace {
|
||||
struct ComputeParam {
|
||||
float *delta_{nullptr};
|
||||
float *m_{nullptr};
|
||||
float *v_{nullptr};
|
||||
float *grad_{nullptr};
|
||||
float beta1_{0};
|
||||
float beta2_{0};
|
||||
float epsilon_{0};
|
||||
float lr_{0};
|
||||
bool use_nesterov_{0};
|
||||
};
|
||||
|
||||
void ComputeWeightDelta(const std::shared_ptr<ComputeParam> &input_params, size_t start, size_t end) {
|
||||
MS_EXCEPTION_IF_NULL(input_params);
|
||||
MS_EXCEPTION_IF_NULL(input_params->delta_);
|
||||
MS_EXCEPTION_IF_NULL(input_params->m_);
|
||||
MS_EXCEPTION_IF_NULL(input_params->v_);
|
||||
MS_EXCEPTION_IF_NULL(input_params->grad_);
|
||||
auto delta = input_params->delta_;
|
||||
auto m = input_params->m_;
|
||||
auto v = input_params->v_;
|
||||
auto lr = input_params->lr_;
|
||||
auto beta1 = input_params->beta1_;
|
||||
auto beta2 = input_params->beta2_;
|
||||
auto epsilon = input_params->epsilon_;
|
||||
auto use_nesterov = input_params->use_nesterov_;
|
||||
auto grad = input_params->grad_;
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
m[i] *= beta1;
|
||||
v[i] *= beta2;
|
||||
m[i] += (1 - beta1) * grad[i];
|
||||
v[i] += (1 - beta2) * grad[i] * grad[i];
|
||||
if (use_nesterov) {
|
||||
delta[i] = -lr * (m[i] * beta1 + (1 - beta1) * grad[i]) / (std::sqrt(v[i]) + epsilon);
|
||||
} else {
|
||||
delta[i] = -lr * m[i] / (std::sqrt(v[i]) + epsilon);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void AdamDeltaCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
std::vector<size_t> delta_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
std::vector<size_t> m_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
std::vector<size_t> v_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
std::vector<size_t> grad_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 8);
|
||||
if (!IsSameShape(delta_shape, m_shape)) {
|
||||
MS_LOG(EXCEPTION) << "Delta and m should have the same shape";
|
||||
}
|
||||
if (!IsSameShape(delta_shape, v_shape)) {
|
||||
MS_LOG(EXCEPTION) << "Delta and v should have the same shape";
|
||||
}
|
||||
if (!IsSameShape(delta_shape, grad_shape)) {
|
||||
MS_LOG(EXCEPTION) << "Delta and grad should have the same shape";
|
||||
}
|
||||
if (delta_shape.empty()) {
|
||||
MS_LOG(EXCEPTION) << "Delta must be at least 1D";
|
||||
}
|
||||
elem_num_ = 1;
|
||||
for (size_t i = 0; i < delta_shape.size(); ++i) {
|
||||
elem_num_ *= delta_shape[i];
|
||||
}
|
||||
if (elem_num_ < 1) {
|
||||
MS_LOG(EXCEPTION) << "Invalid delta shape";
|
||||
}
|
||||
if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) {
|
||||
use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "use_nesterov");
|
||||
}
|
||||
}
|
||||
|
||||
void AdamDeltaCPUKernel::CheckParams(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &workspace,
|
||||
const std::vector<kernel::AddressPtr> &outputs) const {
|
||||
if (inputs.size() != kAdamDeltaInputSize) {
|
||||
MS_LOG(EXCEPTION) << "Error input size!";
|
||||
}
|
||||
size_t elem_size = elem_num_ * 4;
|
||||
std::vector<size_t> expect_sizes = {elem_size, elem_size, 4, 4, 4, 4, 4, 4, elem_size};
|
||||
std::vector<std::string> input_names = {"m", "v", "beta1_power", "beta2_power", "lr",
|
||||
"beta1", "beta2", "epsilon", "grad"};
|
||||
for (size_t i = 0; i < kAdamDeltaInputSize; ++i) {
|
||||
if (inputs[i]->size != expect_sizes[i]) {
|
||||
MS_LOG(EXCEPTION) << "Error input " << input_names[i] << " size!";
|
||||
}
|
||||
}
|
||||
if (outputs.size() < 1 || outputs[0]->size != elem_size) {
|
||||
MS_LOG(EXCEPTION) << "Error output delta size!";
|
||||
}
|
||||
}
|
||||
|
||||
bool AdamDeltaCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &workspace,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
CheckParams(inputs, workspace, outputs);
|
||||
auto m = reinterpret_cast<float *>(inputs[0]->addr);
|
||||
auto v = reinterpret_cast<float *>(inputs[1]->addr);
|
||||
auto beta1_power = reinterpret_cast<float *>(inputs[2]->addr)[0];
|
||||
if (beta1_power == 1) {
|
||||
MS_LOG(EXCEPTION) << "The beta1_power should not be 1";
|
||||
}
|
||||
auto beta2_power = reinterpret_cast<float *>(inputs[3]->addr)[0];
|
||||
auto lr = reinterpret_cast<float *>(inputs[4]->addr)[0];
|
||||
auto beta1 = reinterpret_cast<float *>(inputs[5]->addr)[0];
|
||||
auto beta2 = reinterpret_cast<float *>(inputs[6]->addr)[0];
|
||||
auto epsilon = reinterpret_cast<float *>(inputs[7]->addr)[0];
|
||||
auto grad = reinterpret_cast<float *>(inputs[8]->addr);
|
||||
auto delta = reinterpret_cast<float *>(outputs[0]->addr);
|
||||
lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power);
|
||||
size_t thread_num = kUsedThreadNum;
|
||||
if (elem_num_ < thread_num) {
|
||||
thread_num = elem_num_;
|
||||
}
|
||||
std::vector<std::thread> threads;
|
||||
std::vector<std::shared_ptr<ComputeParam>> thread_params;
|
||||
threads.reserve(thread_num);
|
||||
|
||||
size_t end = 0;
|
||||
size_t offset = elem_num_ / thread_num;
|
||||
size_t left = elem_num_ % thread_num;
|
||||
for (size_t i = 0; i < thread_num; ++i) {
|
||||
auto params = std::make_shared<ComputeParam>();
|
||||
params->delta_ = delta;
|
||||
params->m_ = m;
|
||||
params->v_ = v;
|
||||
params->grad_ = grad;
|
||||
params->beta1_ = beta1;
|
||||
params->beta2_ = beta2;
|
||||
params->use_nesterov_ = use_nesterov_;
|
||||
params->lr_ = lr;
|
||||
params->epsilon_ = epsilon;
|
||||
size_t start = end;
|
||||
end = start + offset;
|
||||
if (i < left) {
|
||||
end += 1;
|
||||
}
|
||||
threads.emplace_back(std::thread(ComputeWeightDelta, params, start, end));
|
||||
thread_params.emplace_back(params);
|
||||
}
|
||||
for (size_t i = 0; i < thread_num; ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,56 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ADAM_DELTA_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ADAM_DELTA_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class AdamDeltaCPUKernel : public CPUKernel {
|
||||
public:
|
||||
AdamDeltaCPUKernel() = default;
|
||||
~AdamDeltaCPUKernel() override = default;
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
protected:
|
||||
void CheckParams(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) const;
|
||||
bool use_nesterov_{false};
|
||||
size_t elem_num_{0};
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(AdamDelta,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
AdamDeltaCPUKernel);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ADAM_DELTA_CPU_KERNEL_H_
|
|
@ -110,6 +110,7 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
|||
"../../../mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.cc"
|
||||
"../../../mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc"
|
||||
"../../../mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.cc"
|
||||
"../../../mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc"
|
||||
"../../../mindspore/ccsrc/backend/kernel_compiler/akg/*.cc"
|
||||
"../../../mindspore/ccsrc/backend/kernel_compiler/rts/*.cc"
|
||||
"../../../mindspore/ccsrc/backend/kernel_compiler/hccl/*.cc"
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include "common/common_test.h"
|
||||
#define private public
|
||||
#define protected public
|
||||
#include "backend/kernel_compiler/cpu/adam_delta_cpu_kernel.h"
|
||||
#undef private
|
||||
#undef protected
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class AdamDeltaCpuKernelTest : public UT::Common {
|
||||
public:
|
||||
AdamDeltaCpuKernelTest() : adam_delta_(std::make_shared<AdamDeltaCPUKernel>()) {}
|
||||
|
||||
void SetUp() override {
|
||||
delta_.clear();
|
||||
m_.clear();
|
||||
v_.clear();
|
||||
grad_.clear();
|
||||
inputs_.clear();
|
||||
workspace_.clear();
|
||||
outputs_.clear();
|
||||
}
|
||||
|
||||
AddressPtr CreateKernelAddress(void *addr, size_t elem_num) {
|
||||
auto kernel_addr = std::make_shared<Address>();
|
||||
kernel_addr->addr = addr;
|
||||
kernel_addr->size = elem_num * 4;
|
||||
return kernel_addr;
|
||||
}
|
||||
|
||||
void CreateAddress() {
|
||||
inputs_.push_back(CreateKernelAddress(m_.data(), elem_num_));
|
||||
inputs_.push_back(CreateKernelAddress(v_.data(), elem_num_));
|
||||
inputs_.push_back(CreateKernelAddress(&beta1_power_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(&beta2_power_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(&lr_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(&beta1_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(&beta2_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(&epsilon_, 1));
|
||||
inputs_.push_back(CreateKernelAddress(grad_.data(), elem_num_));
|
||||
outputs_.push_back(CreateKernelAddress(delta_.data(), elem_num_));
|
||||
}
|
||||
|
||||
std::vector<float> delta_;
|
||||
std::vector<float> m_;
|
||||
std::vector<float> v_;
|
||||
std::vector<float> grad_;
|
||||
std::vector<AddressPtr> inputs_;
|
||||
std::vector<AddressPtr> workspace_;
|
||||
std::vector<AddressPtr> outputs_;
|
||||
std::shared_ptr<AdamDeltaCPUKernel> adam_delta_;
|
||||
float beta1_power_ = 0.9;
|
||||
float beta2_power_ = 0.999;
|
||||
float lr_ = 0.001;
|
||||
float beta1_ = 0.9;
|
||||
float beta2_ = 0.999;
|
||||
float epsilon_ = 1e-8;
|
||||
size_t elem_num_ = 27;
|
||||
};
|
||||
|
||||
TEST_F(AdamDeltaCpuKernelTest, compute_test) {
|
||||
for (size_t i = 0; i < elem_num_; ++i) {
|
||||
delta_.push_back(1.0);
|
||||
m_.push_back(1.0);
|
||||
v_.push_back(1.0);
|
||||
grad_.push_back(1.0);
|
||||
}
|
||||
adam_delta_->elem_num_ = elem_num_;
|
||||
CreateAddress();
|
||||
adam_delta_->Launch(inputs_, workspace_, outputs_);
|
||||
for (size_t i = 0; i < elem_num_; ++i) {
|
||||
EXPECT_TRUE(std::fabs(delta_[i] + 0.000316) < 1e-6);
|
||||
}
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
Loading…
Reference in New Issue