added SGD operation in CPU

This commit is contained in:
huangbo77 2021-04-12 15:16:57 +08:00
parent b6605f5939
commit cb54c6efae
4 changed files with 228 additions and 1 deletions

View File

@ -0,0 +1,88 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/sgd_cpu_kernel.h"
#include <thread>
#include <vector>
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kInputSize = 6;
constexpr size_t kOutputSize = 1;
} // namespace
template <typename T>
void SGDCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
dampening_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "dampening");
weight_decay_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "weight_decay");
nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "nesterov");
}
template <typename T>
void SGDCPUKernel<T>::CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
// inputs: params, grad, lr, accum, momentum, stat
if (inputs.size() != kInputSize) {
MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but SGD needs 6 inputs.";
}
// output: param
if (outputs.size() != kOutputSize) {
MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but SGD needs 1 outputs.";
}
}
template <typename T>
bool SGDCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs) {
CheckParam(inputs, outputs);
auto param = reinterpret_cast<T *>(inputs[0]->addr);
auto grad = reinterpret_cast<T *>(inputs[1]->addr);
auto lr = reinterpret_cast<T *>(inputs[2]->addr);
auto accum = reinterpret_cast<T *>(inputs[3]->addr);
auto momentum = reinterpret_cast<T *>(inputs[4]->addr);
auto stat = reinterpret_cast<T *>(inputs[5]->addr);
size_t elem_num = inputs[0]->size / sizeof(float);
auto task = [&](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
T grad_new = grad[i];
if (weight_decay_ > 0) {
grad_new += param[i] * static_cast<T>(weight_decay_);
}
if (momentum[0] > static_cast<T>(0)) {
if (stat[i] > static_cast<T>(0)) {
accum[i] = grad_new;
stat[i] = static_cast<T>(0);
} else {
accum[i] = accum[i] * momentum[0] + static_cast<T>(1.0 - dampening_) * grad_new;
}
if (nesterov_) {
grad_new += accum[i] * momentum[0];
} else {
grad_new = accum[i];
}
}
param[i] -= lr[0] * grad_new;
}
};
CPUKernelUtils::ParallelFor(task, elem_num);
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,67 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SGD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SGD_CPU_KERNEL_H_
#include <thread>
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
template <typename T>
class SGDCPUKernel : public CPUKernel {
public:
SGDCPUKernel() = default;
~SGDCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
const std::vector<AddressPtr> &outputs) override;
private:
static void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
float dampening_;
float weight_decay_;
bool nesterov_{true};
};
MS_REG_CPU_KERNEL_T(SGD,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
SGDCPUKernel, float);
MS_REG_CPU_KERNEL_T(SGD,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
SGDCPUKernel, float16);
} // namespace kernel
} // namespace mindspore
#endif

View File

@ -2704,7 +2704,7 @@ class SGD(PrimitiveWithCheck):
float16 nor float32. float16 nor float32.
Supported Platforms: Supported Platforms:
``Ascend`` ``GPU`` ``Ascend`` ``GPU`` ``CPU``
Examples: Examples:
>>> sgd = ops.SGD() >>> sgd = ops.SGD()

View File

@ -0,0 +1,72 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.nn import Dense
from mindspore.nn import TrainOneStepCell, WithLossCell
from mindspore.nn.optim import SGD
from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
class NetSGD(nn.Cell):
def __init__(self):
super(NetSGD, self).__init__()
self.batch_size = 1
self.reshape = P.Reshape()
weight = Tensor(np.ones([10, 16]).astype(np.float32) * 0.01)
self.fc1 = Dense(16, 10, weight_init=weight)
def construct(self, input_x):
output = self.reshape(input_x, (self.batch_size, -1))
output = self.fc1(output)
return output
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_SGD():
epoch = 3
net = NetSGD()
learning_rate = 0.1
momentum = 0.9
dampening = 0.0
weight_decay = 0.0
nesterov = True
loss_scale = 1.0
optimizer = SGD(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum, dampening,
weight_decay, nesterov, loss_scale)
criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer
train_network.set_train()
losses = []
for _ in range(epoch):
data = Tensor(np.arange(0, 16).reshape(1, 1, 4, 4).astype(np.float32) * 0.01)
label = Tensor(np.array([0]).astype(np.int32))
loss = train_network(data, label)
losses.append(loss.asnumpy())
last_loss = 100.0
for loss in losses:
assert last_loss > loss
last_loss = loss