mindspore/tests/st/auto_monad/test_effect_optimizer.py

839 lines
31 KiB
Python

# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import pytest
import numpy as np
import mindspore.nn as nn
from mindspore import context, Tensor
from mindspore.ops import operations as P
from mindspore.common import dtype as mstype
from mindspore.common.parameter import Parameter
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
class AdamNet(nn.Cell):
def __init__(self, var, m, v):
super(AdamNet, self).__init__()
self.apply_adam = P.Adam()
self.var = Parameter(var, name="var")
self.m = Parameter(m, name="m")
self.v = Parameter(v, name="v")
def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
self.apply_adam(self.var, self.m, self.v, beta1_power,
beta2_power, lr, beta1, beta2, epsilon, grad)
return self.var, self.m, self.v
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_adam():
var = Tensor(np.ones([3, 3, 3]).astype(np.float32))
m = Tensor(np.ones([3, 3, 3]).astype(np.float32))
v = Tensor(np.ones([3, 3, 3]).astype(np.float32))
net = AdamNet(var, m, v)
beta1_power = Tensor(0.9, mstype.float32)
beta2_power = Tensor(0.999, mstype.float32)
lr = Tensor(0.001, mstype.float32)
beta1 = Tensor(0.9, mstype.float32)
beta2 = Tensor(0.999, mstype.float32)
epsilon = Tensor(1e-8, mstype.float32)
grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32))
new_var, new_m, new_v = net(
beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \
"The results should be different!"
class ApplyAdaMaxNet(nn.Cell):
def __init__(self, val, m, v):
super(ApplyAdaMaxNet, self).__init__()
self.apply_ada_max = P.ApplyAdaMax()
self.var = Parameter(val, name="var")
self.m = Parameter(m, name="m")
self.v = Parameter(v, name="v")
def construct(self, beta1_power, lr, beta1, beta2, epsilon, grad):
self.apply_ada_max(self.var, self.m, self.v,
beta1_power, lr, beta1, beta2, epsilon, grad)
return self.var, self.m, self.v
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_ada_max():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
m = Tensor(np.random.rand(3, 3).astype(np.float32))
v = Tensor(np.random.rand(3, 3).astype(np.float32))
net = ApplyAdaMaxNet(var, m, v)
beta1_power = Tensor(0.9, mstype.float32)
lr = Tensor(0.001, mstype.float32)
beta1 = Tensor(0.9, mstype.float32)
beta2 = Tensor(0.99, mstype.float32)
epsilon = Tensor(1e-10, mstype.float32)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
new_var, new_m, new_v = net(beta1_power, lr, beta1, beta2, epsilon, grad)
assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \
"The results should be different!"
class ApplyAdadeltaNet(nn.Cell):
def __init__(self, var, accum, accum_update):
super(ApplyAdadeltaNet, self).__init__()
self.apply_adadelta = P.ApplyAdadelta()
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
self.accum_update = Parameter(accum_update, name="accum_update")
def construct(self, lr, rho, epsilon, grad):
self.apply_adadelta(self.var, self.accum,
self.accum_update, lr, rho, epsilon, grad)
return self.var, self.accum, self.accum_update
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_adadelta():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
accum = Tensor(np.random.rand(3, 3).astype(np.float32))
accum_update = Tensor(np.random.rand(3, 3).astype(np.float32))
net = ApplyAdadeltaNet(var, accum, accum_update)
lr = Tensor(0.001, mstype.float32)
rho = Tensor(0.0, mstype.float32)
epsilon = Tensor(1e-6, mstype.float32)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
new_var, new_accum, new_accum_update = net(lr, rho, epsilon, grad)
assert ((new_var != var).any() and (new_accum != accum).any() and (new_accum_update != accum_update).any()), \
"The results should be different!"
class ApplyAdagrad(nn.Cell):
def __init__(self, var, accum):
super(ApplyAdagrad, self).__init__()
self.apply_adagrad = P.ApplyAdagrad()
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
def construct(self, lr, grad):
self.apply_adagrad(self.var, self.accum, lr, grad)
return self.var, self.accum
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_adagrad():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
accum = Tensor(np.random.rand(3, 3).astype(np.float32))
net = ApplyAdagrad(var, accum)
lr = Tensor(0.001, mstype.float32)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
new_var, new_accum = net(lr, grad)
assert ((new_var != var).any() and (new_accum != accum).any()), \
"The results should be different!"
class ApplyAdagradV2Net(nn.Cell):
def __init__(self, var, accum):
super(ApplyAdagradV2Net, self).__init__()
self.apply_adagrad_v2 = P.ApplyAdagradV2(epsilon=1e-6)
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
def construct(self, lr, grad):
self.apply_adagrad_v2(self.var, self.accum, lr, grad)
return self.var, self.accum
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_adagrad_v2():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
accum = Tensor(np.random.rand(3, 3).astype(np.float32))
net = ApplyAdagradV2Net(var, accum)
lr = Tensor(0.001, mstype.float32)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
new_var, new_accum = net(lr, grad)
assert ((new_var != var).any() and (new_accum != accum).any()), \
"The results should be different!"
class ApplyAddSignNet(nn.Cell):
def __init__(self, var, m):
super(ApplyAddSignNet, self).__init__()
self.apply_add_sign = P.ApplyAddSign()
self.var = Parameter(var, name="var")
self.m = Parameter(m, name="m")
def construct(self, lr, alpha, sign_decay, beta, grad):
self.apply_add_sign(self.var, self.m, lr, alpha,
sign_decay, beta, grad)
return self.var, self.m
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_add_sign():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
m = Tensor(np.random.rand(3, 3).astype(np.float32))
net = ApplyAddSignNet(var, m)
lr = Tensor(0.001, mstype.float32)
alpha = Tensor(1.0, mstype.float32)
sign_decay = Tensor(0.99, mstype.float32)
beta = Tensor(0.9, mstype.float32)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
new_var, new_m = net(lr, alpha, sign_decay, beta, grad)
assert ((new_var != var).any() and (new_m != m).any()), \
"The results should be different!"
class ApplyCenteredRMSPropNet(nn.Cell):
def __init__(self, var):
super(ApplyCenteredRMSPropNet, self).__init__()
self.apply_centered_rms_prop = P.ApplyCenteredRMSProp()
self.var = Parameter(var, name="var")
def construct(self, mean_grad, mean_square, moment, grad, learning_rate):
self.apply_centered_rms_prop(self.var, mean_grad, mean_square, moment, grad,
learning_rate, 0.0, 1e-10, 0.05)
return self.var
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_centered_rms_prop():
var = Tensor(
np.arange(-6, 6).astype(np.float32).reshape(2, 3, 2), mstype.float32)
net = ApplyCenteredRMSPropNet(var)
mean_grad = Tensor(np.arange(12).astype(
np.float32).reshape(2, 3, 2), mstype.float32)
mean_square = Tensor(
np.arange(-8, 4).astype(np.float32).reshape(2, 3, 2), mstype.float32)
moment = Tensor(np.arange(12).astype(
np.float32).reshape(2, 3, 2), mstype.float32)
grad = Tensor(np.arange(12).astype(
np.float32).reshape(2, 3, 2), mstype.float32)
learning_rate = Tensor(0.9, mstype.float32)
new_var = net(mean_grad, mean_square, moment, grad, learning_rate)
assert (new_var != var).any(), "The results should be different!"
class ApplyFtrlNet(nn.Cell):
def __init__(self, var, accum, linear):
super(ApplyFtrlNet, self).__init__()
self.apply_ftrl = P.ApplyFtrl()
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
self.linear = Parameter(linear, name="linear")
def construct(self, grad, lr, l1, l2, lr_power):
self.apply_ftrl(self.var, self.accum, self.linear,
grad, lr, l1, l2, lr_power)
return self.var, self.accum, self.linear
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_ftrl():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
accum = Tensor(np.random.rand(3, 3).astype(np.float32))
linear = Tensor(np.random.rand(3, 3).astype(np.float32))
net = ApplyFtrlNet(var, accum, linear)
grad = Tensor(np.random.randint(-4, 4, (3, 3)), mstype.float32)
lr = Tensor(0.001, mstype.float32)
l1 = Tensor(0.0, mstype.float32)
l2 = Tensor(0.0, mstype.float32)
lr_power = Tensor(-0.5, mstype.float32)
new_var, new_accum, new_linear = net(grad, lr, l1, l2, lr_power)
assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \
"The results should be different!"
class ApplyGradientDescentNet(nn.Cell):
def __init__(self, var):
super(ApplyGradientDescentNet, self).__init__()
self.apply_gradient_descent = P.ApplyGradientDescent()
self.var = Parameter(var, name="var")
def construct(self, alpha, delta):
self.apply_gradient_descent(self.var, alpha, delta)
return self.var
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_gradient_descent():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
net = ApplyGradientDescentNet(var)
alpha = Tensor(0.001, mstype.float32)
delta = Tensor(np.random.rand(3, 3).astype(np.float32))
new_var = net(alpha, delta)
assert (new_var != var).any(), "The results should be different!"
class ApplyMomentumNet(nn.Cell):
def __init__(self, var, accum):
super(ApplyMomentumNet, self).__init__()
self.apply_momentum = P.ApplyMomentum(gradient_scale=1024.0)
self.var = Parameter(var, name='var')
self.accum = Parameter(accum, name='accum')
def construct(self, lr, grad, momentum):
self.apply_momentum(self.var, self.accum, lr, grad, momentum)
return self.var, self.accum
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_momentum():
var = Tensor(np.random.normal(size=(2, 3, 3, 4)).astype(np.float32))
accum = Tensor(np.random.normal(size=(2, 3, 3, 4)).astype(np.float32))
net = ApplyMomentumNet(var, accum)
lr = Tensor(np.random.normal(size=(1,)).astype(np.float32))
grad = Tensor(np.random.normal(size=(2, 3, 3, 4)).astype(np.float32))
momentum = Tensor(np.random.normal(size=(1,)).astype(np.float32))
new_var, new_accum = net(lr, grad, momentum)
assert ((new_var != var).any() and (new_accum != accum).any()), \
"The results should be different!"
class ApplyPowerSignNet(nn.Cell):
def __init__(self, var, m):
super(ApplyPowerSignNet, self).__init__()
self.apply_power_sign = P.ApplyPowerSign()
self.var = Parameter(var, name="var")
self.m = Parameter(m, name="m")
def construct(self, lr, logbase, sign_decay, beta, grad):
self.apply_power_sign(self.var, self.m, lr,
logbase, sign_decay, beta, grad)
return self.var, self.m
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_power_sign():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
m = Tensor(np.random.rand(3, 3).astype(np.float32))
net = ApplyPowerSignNet(var, m)
lr = Tensor(0.001, mstype.float32)
logbase = Tensor(np.e, mstype.float32)
sign_decay = Tensor(0.99, mstype.float32)
beta = Tensor(0.9, mstype.float32)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
new_var, new_m = net(lr, logbase, sign_decay, beta, grad)
assert ((new_var != var).any() and (new_m != m).any()), \
"The results should be different!"
class ApplyProximalAdagradNet(nn.Cell):
def __init__(self, var, accum):
super(ApplyProximalAdagradNet, self).__init__()
self.apply_proximal_adagrad = P.ApplyProximalAdagrad()
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name='accum')
def construct(self, lr, l1, l2, grad):
self.apply_proximal_adagrad(self.var, self.accum, lr, l1, l2, grad)
return self.var, self.accum
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_proximal_adagrad():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
accum = Tensor(np.random.rand(3, 3).astype(np.float32))
net = ApplyProximalAdagradNet(var, accum)
lr = Tensor(0.01, mstype.float32)
l1 = Tensor(0.0, mstype.float32)
l2 = Tensor(0.0, mstype.float32)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
new_var, new_accum = net(lr, l1, l2, grad)
assert ((new_var != var).any() and (new_accum != accum).any()), \
"The results should be different!"
class ApplyProximalGradientDescentNet(nn.Cell):
def __init__(self, var):
super(ApplyProximalGradientDescentNet, self).__init__()
self.apply_proximal_gradient_descent = P.ApplyProximalGradientDescent()
self.var = Parameter(var, name="var")
def construct(self, alpha, l1, l2, delta):
self.apply_proximal_gradient_descent(self.var, alpha, l1, l2, delta)
return self.var
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_proximal_gradient_descent():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
net = ApplyProximalGradientDescentNet(var)
alpha = Tensor(0.001, mstype.float32)
l1 = Tensor(0.0, mstype.float32)
l2 = Tensor(0.0, mstype.float32)
delta = Tensor(np.random.rand(3, 3).astype(np.float32))
new_var = net(alpha, l1, l2, delta)
assert (new_var != var).any(), "The results should be different!"
class ApplyRMSPropNet(nn.Cell):
def __init__(self, var):
super(ApplyRMSPropNet, self).__init__()
self.apply_rms_prop = P.ApplyRMSProp()
self.var = Parameter(var, name="var")
def construct(self, mean_square, moment, learning_rate, grad):
self.apply_rms_prop(self.var, mean_square, moment,
learning_rate, grad, 0.0, 1e-10, 0.001)
return self.var
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_apply_rms_prop():
var = Tensor(1., mstype.float32)
net = ApplyRMSPropNet(var)
mean_square = Tensor(2., mstype.float32)
moment = Tensor(1., mstype.float32)
learning_rate = Tensor(0.9, mstype.float32)
grad = Tensor(2., mstype.float32)
new_var = net(mean_square, moment, learning_rate, grad)
assert (new_var != var).any(), "The results should be different!"
class FusedSparseAdamNet(nn.Cell):
def __init__(self, var, m, v):
super(FusedSparseAdamNet, self).__init__()
self.fused_sparse_adam = P.FusedSparseAdam()
self.var = Parameter(var, name="var")
self.m = Parameter(m, name="m")
self.v = Parameter(v, name="v")
def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices):
self.fused_sparse_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2,
epsilon, grad, indices)
return self.var, self.m, self.v
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_fused_sparse_adam():
var = Tensor(np.ones([3, 1, 2]).astype(np.float32))
m = Tensor(np.ones([3, 1, 2]).astype(np.float32))
v = Tensor(np.ones([3, 1, 2]).astype(np.float32))
net = FusedSparseAdamNet(var, m, v)
beta1_power = Tensor(0.9, mstype.float32)
beta2_power = Tensor(0.999, mstype.float32)
lr = Tensor(0.001, mstype.float32)
beta1 = Tensor(0.9, mstype.float32)
beta2 = Tensor(0.999, mstype.float32)
epsilon = Tensor(1e-8, mstype.float32)
gradient = Tensor(np.random.rand(2, 1, 2), mstype.float32)
indices = Tensor([0, 1], mstype.int32)
new_var, new_m, new_v = net(
beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices)
assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \
"The results should be different!"
class FusedSparseFtrlNet(nn.Cell):
def __init__(self, var, accum, linear):
super(FusedSparseFtrlNet, self).__init__()
self.fused_sparse_ftrl = P.FusedSparseFtrl(
lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5)
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
self.linear = Parameter(linear, name="linear")
def construct(self, grad, indices):
self.fused_sparse_ftrl(self.var, self.accum,
self.linear, grad, indices)
return self.var, self.accum, self.linear
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_fused_sparse_ftrl():
var = Tensor(np.random.rand(3, 1, 2).astype(np.float32))
accum = Tensor(np.random.rand(3, 1, 2).astype(np.float32))
linear = Tensor(np.random.rand(3, 1, 2).astype(np.float32))
net = FusedSparseFtrlNet(var, accum, linear)
grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32))
indices = Tensor(np.array([0, 1]).astype(np.int32))
new_var, new_accum, new_linear = net(grad, indices)
assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \
"The results should be different!"
class FusedSparseLazyAdamNet(nn.Cell):
def __init__(self, var, m, v):
super(FusedSparseLazyAdamNet, self).__init__()
self.fused_sparse_lazyadam = P.FusedSparseLazyAdam()
self.var = Parameter(var, name="var")
self.m = Parameter(m, name="m")
self.v = Parameter(v, name="v")
def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, indices):
self.fused_sparse_lazyadam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1,
beta2, epsilon, grad, indices)
return self.var, self.m, self.v
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_fused_sparse_lazyadam():
var = Tensor(np.ones([3, 1, 2]).astype(np.float32))
m = Tensor(np.ones([3, 1, 2]).astype(np.float32))
v = Tensor(np.ones([3, 1, 2]).astype(np.float32))
net = FusedSparseLazyAdamNet(var, m, v)
beta1_power = Tensor(0.9, mstype.float32)
beta2_power = Tensor(0.999, mstype.float32)
lr = Tensor(0.001, mstype.float32)
beta1 = Tensor(0.9, mstype.float32)
beta2 = Tensor(0.999, mstype.float32)
epsilon = Tensor(1e-8, mstype.float32)
gradient = Tensor(np.random.rand(2, 1, 2), mstype.float32)
indices = Tensor([0, 1], mstype.int32)
new_var, new_m, new_v = net(
beta1_power, beta2_power, lr, beta1, beta2, epsilon, gradient, indices)
assert ((new_var != var).any() and (new_m != m).any() and (new_v != v).any()), \
"The results should be different!"
class FusedSparseProximalAdagradNet(nn.Cell):
def __init__(self, var, accum):
super(FusedSparseProximalAdagradNet, self).__init__()
self.fused_sparse_proximal_adagrad = P.FusedSparseProximalAdagrad()
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
def construct(self, lr, l1, l2, grad, indices):
self.fused_sparse_proximal_adagrad(
self.var, self.accum, lr, l1, l2, grad, indices)
return self.var, self.accum
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_fused_sparse_proximal_adagrad():
var = Tensor(np.random.rand(3, 1, 2).astype(np.float32))
accum = Tensor(np.random.rand(3, 1, 2).astype(np.float32))
net = FusedSparseProximalAdagradNet(var, accum)
lr = Tensor(0.01, mstype.float32)
l1 = Tensor(0.0, mstype.float32)
l2 = Tensor(0.0, mstype.float32)
grad = Tensor(np.random.rand(2, 1, 2).astype(np.float32))
indices = Tensor(np.array([0, 1]).astype(np.int32))
new_var, new_accum = net(lr, l1, l2, grad, indices)
assert ((new_var != var).any() and (new_accum != accum).any()), \
"The results should be different!"
class SparseApplyAdagradNet(nn.Cell):
def __init__(self, var, accum):
super(SparseApplyAdagradNet, self).__init__()
self.sparse_apply_adagrad = P.SparseApplyAdagrad(lr=0.01)
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
def construct(self, grad, indices):
self.sparse_apply_adagrad(self.var, self.accum, grad, indices)
return self.var, self.accum
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_sparse_apply_adagrad():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
accum = Tensor(np.random.rand(3, 3).astype(np.float32))
net = SparseApplyAdagradNet(var, accum)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
indices = Tensor(np.ones((3,), np.int32))
new_var, _ = net(grad, indices)
# new_accum is equal to accum.
assert (new_var != var).any(), "The results should be different!"
class SparseApplyAdagradV2Net(nn.Cell):
def __init__(self, var, accum):
super(SparseApplyAdagradV2Net, self).__init__()
self.sparse_apply_adagrad_v2 = P.SparseApplyAdagradV2(
lr=0.01, epsilon=0.001)
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
def construct(self, grad, indices):
self.sparse_apply_adagrad_v2(self.var, self.accum, grad, indices)
return self.var, self.accum
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_sparse_apply_adagrad_v2():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
accum = Tensor(np.random.rand(3, 3).astype(np.float32))
net = SparseApplyAdagradV2Net(var, accum)
grad = grad = Tensor(np.random.rand(3, 3).astype(np.float32))
indices = Tensor(np.ones((3,), np.int32))
new_var, new_accum = net(grad, indices)
assert ((new_var != var).any() and (new_accum != accum).any()), \
"The results should be different!"
class SparseApplyFtrlNet(nn.Cell):
def __init__(self, var, accum, linear):
super(SparseApplyFtrlNet, self).__init__()
self.sparse_apply_ftrl = P.SparseApplyFtrl(
lr=0.01, l1=0.0, l2=0.0, lr_power=-0.5)
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
self.linear = Parameter(linear, name="linear")
def construct(self, grad, indices):
self.sparse_apply_ftrl(self.var, self.accum,
self.linear, grad, indices)
return self.var, self.accum, self.linear
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_sparse_apply_ftrl():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
accum = Tensor(np.random.rand(3, 3).astype(np.float32))
linear = Tensor(np.random.rand(3, 3).astype(np.float32))
net = SparseApplyFtrlNet(var, accum, linear)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
indices = Tensor(np.ones((3,), np.int32))
new_var, new_accum, new_linear = net(grad, indices)
assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \
"The results should be different!"
class SparseApplyFtrlV2Net(nn.Cell):
def __init__(self, var, accum, linear):
super(SparseApplyFtrlV2Net, self).__init__()
self.sparse_apply_ftrl_v2 = P.SparseApplyFtrlV2(
lr=0.01, l1=0.0, l2=0.0, l2_shrinkage=0.0, lr_power=-0.5)
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
self.linear = Parameter(linear, name="linear")
def construct(self, grad, indices):
self.sparse_apply_ftrl_v2(
self.var, self.accum, self.linear, grad, indices)
return self.var, self.accum, self.linear
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_sparse_apply_ftrl_v2():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
accum = Tensor(np.random.rand(3, 3).astype(np.float32))
linear = Tensor(np.random.rand(3, 3).astype(np.float32))
net = SparseApplyFtrlV2Net(var, accum, linear)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
indices = Tensor(np.ones((3,), np.int32))
new_var, new_accum, new_linear = net(grad, indices)
assert ((new_var != var).any() and (new_accum != accum).any() and (new_linear != linear).any()), \
"The results should be different!"
class SparseApplyProximalAdagradNet(nn.Cell):
def __init__(self, var, accum):
super(SparseApplyProximalAdagradNet, self).__init__()
self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagrad()
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
def construct(self, lr, l1, l2, grad, indices):
self.sparse_apply_proximal_adagrad(
self.var, self.accum, lr, l1, l2, grad, indices)
return self.var, self.accum
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_sparse_apply_proximal_adagrad():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
accum = Tensor(np.random.rand(3, 3).astype(np.float32))
net = SparseApplyProximalAdagradNet(var, accum)
lr = Tensor(0.01, mstype.float32)
l1 = Tensor(0.0, mstype.float32)
l2 = Tensor(0.0, mstype.float32)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
indices = Tensor(np.ones((3,), np.int32))
new_var, new_accum = net(lr, l1, l2, grad, indices)
assert ((new_var != var).any() and (new_accum != accum).any()), \
"The results should be different!"
class SGDNet(nn.Cell):
def __init__(self, var):
super(SGDNet, self).__init__()
self.sgd = P.SGD()
self.var = Parameter(var, name="var")
def construct(self, gradient, learning_rate, accum, momentum, stat):
self.sgd(self.var, gradient, learning_rate, accum, momentum, stat)
return self.var
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_sgd():
var = Tensor(np.array([2, -0.5, 1.7, 4]), mstype.float32)
net = SGDNet(var)
gradient = Tensor(np.array([1, -1, 0.5, 2]), mstype.float32)
learning_rate = Tensor(0.01, mstype.float32)
accum = Tensor(np.array([0.1, 0.3, -0.2, -0.1]), mstype.float32)
momentum = Tensor(0.1, mstype.float32)
stat = Tensor(np.array([1.5, -0.3, 0.2, -0.7]), mstype.float32)
new_var = net(gradient, learning_rate, accum, momentum, stat)
assert (new_var != var).any(), "The results should be different!"
class ApplyProximalAdagradConstantNet(nn.Cell):
def __init__(self, var, accum):
super().__init__()
self.depend = P.Depend()
self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagrad()
self.var = Parameter(var, name="var")
self.accum = Parameter(accum, name="accum")
self.const = Tensor(9999, mstype.float32)
def construct(self, lr, l1, l2, grad, indices):
optimizer = self.sparse_apply_proximal_adagrad(
self.var, self.accum, lr, l1, l2, grad, indices)
return self.depend(self.const, optimizer)
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_sparse_apply_proximal_adagrad_constant():
var = Tensor(np.random.rand(3, 3).astype(np.float32))
accum = Tensor(np.random.rand(3, 3).astype(np.float32))
net = ApplyProximalAdagradConstantNet(var, accum)
lr = Tensor(0.01, mstype.float32)
l1 = Tensor(0.1, mstype.float32)
l2 = Tensor(0.2, mstype.float32)
grad = Tensor(np.random.rand(3, 3).astype(np.float32))
indices = Tensor(np.ones((3,), np.int32))
net(lr, l1, l2, grad, indices)
assert (net.parameters_dict()['var'].data != var).any()
assert (net.parameters_dict()['accum'].data != accum).any()
class MulSGDNet(nn.Cell):
def __init__(self, var):
super().__init__()
self.sgd = P.SGD()
self.var = Parameter(var, name="var")
self.mul = P.Mul()
def construct(self, gradient, learning_rate, accum, momentum, stat):
out = self.mul(self.var, self.var)
self.sgd(self.var, gradient, learning_rate, accum, momentum, stat)
return out
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
def test_mul_sgd():
var = Tensor(np.array([2, -0.5, 1.7, 4]), mstype.float32)
net = MulSGDNet(var)
gradient = Tensor(np.array([1, -1, 0.5, 2]), mstype.float32)
learning_rate = Tensor(0.01, mstype.float32)
accum = Tensor(np.array([0.1, 0.3, -0.2, -0.1]), mstype.float32)
momentum = Tensor(0.1, mstype.float32)
stat = Tensor(np.array([1.5, -0.3, 0.2, -0.7]), mstype.float32)
net(gradient, learning_rate, accum, momentum, stat)
assert (net.parameters_dict()['var'].data != var).any()