forked from mindspore-Ecosystem/mindspore
!6232 fix gpu heterogeneous bug
Merge pull request !6232 from baihuawei/embedding
This commit is contained in:
commit
af5ebcf1a9
|
@ -196,7 +196,9 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (need_sync) {
|
if (need_sync) {
|
||||||
tensor->set_device_address(device_address);
|
if (AnfAlgo::IsParameterWeight(input_node->cast<ParameterPtr>())) {
|
||||||
|
tensor->set_device_address(device_address);
|
||||||
|
}
|
||||||
MS_EXCEPTION_IF_NULL(device_address);
|
MS_EXCEPTION_IF_NULL(device_address);
|
||||||
if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0),
|
if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0),
|
||||||
LongToSize(tensor->data().nbytes()), tensor->data_type(),
|
LongToSize(tensor->data().nbytes()), tensor->data_type(),
|
||||||
|
|
|
@ -19,6 +19,8 @@ import pytest
|
||||||
import mindspore.context as context
|
import mindspore.context as context
|
||||||
import mindspore.nn as nn
|
import mindspore.nn as nn
|
||||||
from mindspore import Tensor
|
from mindspore import Tensor
|
||||||
|
from mindspore.common.parameter import Parameter
|
||||||
|
from mindspore.common.initializer import initializer
|
||||||
from mindspore.common.api import ms_function
|
from mindspore.common.api import ms_function
|
||||||
from mindspore.ops.operations import _quant_ops as Q
|
from mindspore.ops.operations import _quant_ops as Q
|
||||||
|
|
||||||
|
@ -26,13 +28,15 @@ context.set_context(device_target='GPU')
|
||||||
|
|
||||||
|
|
||||||
class Net(nn.Cell):
|
class Net(nn.Cell):
|
||||||
def __init__(self):
|
def __init__(self, mean, variance):
|
||||||
super(Net, self).__init__()
|
super(Net, self).__init__()
|
||||||
|
self.mean = mean
|
||||||
|
self.variance = variance
|
||||||
self.op = Q.BatchNormFold(momentum=0.9, freeze_bn=10)
|
self.op = Q.BatchNormFold(momentum=0.9, freeze_bn=10)
|
||||||
|
|
||||||
@ms_function
|
@ms_function
|
||||||
def construct(self, x, mean, variance, current_step):
|
def construct(self, x, current_step):
|
||||||
a, b, c, d = self.op(x, mean, variance, current_step)
|
a, b, c, d = self.op(x, self.mean, self.variance, current_step)
|
||||||
return a, b, c, d
|
return a, b, c, d
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,16 +56,17 @@ def np_result(x, mean, var, momentum, epsilon):
|
||||||
@pytest.mark.platform_x86_gpu_training
|
@pytest.mark.platform_x86_gpu_training
|
||||||
@pytest.mark.env_onecard
|
@pytest.mark.env_onecard
|
||||||
def test_batchnorm_fold():
|
def test_batchnorm_fold():
|
||||||
net = Net()
|
|
||||||
c = 64
|
c = 64
|
||||||
x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32')
|
x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32')
|
||||||
mean = np.random.uniform(1, 10, size=[c]).astype('float32')
|
mean = np.random.uniform(1, 10, size=[c]).astype('float32')
|
||||||
variance = np.random.uniform(1, 10, size=[c]).astype('float32')
|
variance = np.random.uniform(1, 10, size=[c]).astype('float32')
|
||||||
current_step = np.array([0]).astype('int32')
|
current_step = np.array([0]).astype('int32')
|
||||||
ms_mean = Tensor(mean)
|
ms_mean_t = Tensor(mean)
|
||||||
ms_var = Tensor(variance)
|
ms_var_t = Tensor(variance)
|
||||||
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var,
|
ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean')
|
||||||
Tensor(current_step))
|
ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var')
|
||||||
|
net = Net(ms_mean, ms_var)
|
||||||
|
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step))
|
||||||
|
|
||||||
expect1, expect2, expect3, expect4, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
|
expect1, expect2, expect3, expect4, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
|
||||||
assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5)
|
assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5)
|
||||||
|
@ -76,16 +81,17 @@ def test_batchnorm_fold():
|
||||||
@pytest.mark.platform_x86_gpu_training
|
@pytest.mark.platform_x86_gpu_training
|
||||||
@pytest.mark.env_onecard
|
@pytest.mark.env_onecard
|
||||||
def test_batchnorm_fold2():
|
def test_batchnorm_fold2():
|
||||||
net = Net()
|
|
||||||
c = 64
|
c = 64
|
||||||
x = np.random.uniform(1, 10, size=[3, c, 512, 512]).astype('float32')
|
x = np.random.uniform(1, 10, size=[3, c, 512, 512]).astype('float32')
|
||||||
mean = np.random.uniform(1, 10, size=[c]).astype('float32')
|
mean = np.random.uniform(1, 10, size=[c]).astype('float32')
|
||||||
variance = np.random.uniform(1, 10, size=[c]).astype('float32')
|
variance = np.random.uniform(1, 10, size=[c]).astype('float32')
|
||||||
current_step = np.array([0]).astype('int32')
|
current_step = np.array([0]).astype('int32')
|
||||||
ms_mean = Tensor(mean)
|
ms_mean_t = Tensor(mean)
|
||||||
ms_var = Tensor(variance)
|
ms_var_t = Tensor(variance)
|
||||||
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var,
|
ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean')
|
||||||
Tensor(current_step))
|
ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var')
|
||||||
|
net = Net(ms_mean, ms_var)
|
||||||
|
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step))
|
||||||
expect1, expect2, expect3, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
|
expect1, expect2, expect3, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
|
||||||
assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5)
|
assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5)
|
||||||
assert np.allclose(batch_var.asnumpy(), expect2, rtol=1.e-7, atol=1.e-5)
|
assert np.allclose(batch_var.asnumpy(), expect2, rtol=1.e-7, atol=1.e-5)
|
||||||
|
@ -98,16 +104,17 @@ def test_batchnorm_fold2():
|
||||||
@pytest.mark.platform_x86_gpu_training
|
@pytest.mark.platform_x86_gpu_training
|
||||||
@pytest.mark.env_onecard
|
@pytest.mark.env_onecard
|
||||||
def test_batchnorm_fold_freeze():
|
def test_batchnorm_fold_freeze():
|
||||||
net = Net()
|
|
||||||
c = 64
|
c = 64
|
||||||
x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32')
|
x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32')
|
||||||
mean = np.random.uniform(1, 10, size=[c]).astype('float32')
|
mean = np.random.uniform(1, 10, size=[c]).astype('float32')
|
||||||
variance = np.random.uniform(1, 10, size=[c]).astype('float32')
|
variance = np.random.uniform(1, 10, size=[c]).astype('float32')
|
||||||
current_step = np.array([10]).astype('int32')
|
current_step = np.array([10]).astype('int32')
|
||||||
ms_mean = Tensor(mean)
|
ms_mean_t = Tensor(mean)
|
||||||
ms_var = Tensor(variance)
|
ms_var_t = Tensor(variance)
|
||||||
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var,
|
ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean')
|
||||||
Tensor(current_step))
|
ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var')
|
||||||
|
net = Net(ms_mean, ms_var)
|
||||||
|
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step))
|
||||||
_, _, _, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
|
_, _, _, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
|
||||||
assert np.allclose(batch_mean.asnumpy(), np.zeros_like(mean), rtol=1.e-7, atol=1.e-5)
|
assert np.allclose(batch_mean.asnumpy(), np.zeros_like(mean), rtol=1.e-7, atol=1.e-5)
|
||||||
assert np.allclose(batch_var.asnumpy(), np.ones_like(mean), rtol=1.e-7, atol=1.e-5)
|
assert np.allclose(batch_var.asnumpy(), np.ones_like(mean), rtol=1.e-7, atol=1.e-5)
|
||||||
|
|
|
@ -19,35 +19,48 @@ import pytest
|
||||||
import mindspore.context as context
|
import mindspore.context as context
|
||||||
import mindspore.nn as nn
|
import mindspore.nn as nn
|
||||||
from mindspore import Tensor
|
from mindspore import Tensor
|
||||||
|
from mindspore.common.parameter import Parameter
|
||||||
|
from mindspore.common.initializer import initializer
|
||||||
from mindspore.ops import operations as P
|
from mindspore.ops import operations as P
|
||||||
|
|
||||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||||
|
|
||||||
|
|
||||||
class NetCenteredRMSProp(nn.Cell):
|
class NetCenteredRMSProp(nn.Cell):
|
||||||
def __init__(self, lr, decay, momentum, epsilon):
|
def __init__(self, lr, decay, momentum, epsilon, var, g, mg, rms, mom):
|
||||||
super(NetCenteredRMSProp, self).__init__()
|
super(NetCenteredRMSProp, self).__init__()
|
||||||
self.rms_opt = P.ApplyCenteredRMSProp()
|
self.rms_opt = P.ApplyCenteredRMSProp()
|
||||||
self.lr = lr
|
self.lr = lr
|
||||||
self.decay = decay
|
self.decay = decay
|
||||||
self.momentum = momentum
|
self.momentum = momentum
|
||||||
self.epsilon = epsilon
|
self.epsilon = epsilon
|
||||||
|
self.var = var
|
||||||
|
self.g = g
|
||||||
|
self.mg = mg
|
||||||
|
self.rms = rms
|
||||||
|
self.mom = mom
|
||||||
|
|
||||||
def construct(self, var, g, mg, rms, mom):
|
def construct(self):
|
||||||
return self.rms_opt(var, mg, rms, mom, g, self.lr, self.decay, self.momentum, self.epsilon)
|
return self.rms_opt(self.var, self.mg, self.rms, self.mom, self.g, self.lr, self.decay, self.momentum,
|
||||||
|
self.epsilon)
|
||||||
|
|
||||||
|
|
||||||
class NetRMSProp(nn.Cell):
|
class NetRMSProp(nn.Cell):
|
||||||
def __init__(self, lr, decay, momentum, epsilon):
|
def __init__(self, lr, decay, momentum, epsilon, var, g, mg, rms, mom):
|
||||||
super(NetRMSProp, self).__init__()
|
super(NetRMSProp, self).__init__()
|
||||||
self.lr = lr
|
self.lr = lr
|
||||||
self.decay = decay
|
self.decay = decay
|
||||||
self.momentum = momentum
|
self.momentum = momentum
|
||||||
self.epsilon = epsilon
|
self.epsilon = epsilon
|
||||||
|
self.var = var
|
||||||
|
self.g = g
|
||||||
|
self.mg = mg
|
||||||
|
self.rms = rms
|
||||||
|
self.mom = mom
|
||||||
self.rms_opt = P.ApplyRMSProp()
|
self.rms_opt = P.ApplyRMSProp()
|
||||||
|
|
||||||
def construct(self, var, g, mg, rms, mom):
|
def construct(self):
|
||||||
return self.rms_opt(var, rms, mom, self.lr, g, self.decay, self.momentum, self.epsilon)
|
return self.rms_opt(self.var, self.rms, self.mom, self.lr, self.g, self.decay, self.momentum, self.epsilon)
|
||||||
|
|
||||||
|
|
||||||
def rmsprop_numpy(variable, gradients, mean_square, moment,
|
def rmsprop_numpy(variable, gradients, mean_square, moment,
|
||||||
|
@ -67,6 +80,7 @@ def rmspropcented_numpy(variable, gradients, mean_gradients, mean_square, moment
|
||||||
variable = variable - moment
|
variable = variable - moment
|
||||||
return variable, gradients, mean_gradients, mean_square, moment
|
return variable, gradients, mean_gradients, mean_square, moment
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.level0
|
@pytest.mark.level0
|
||||||
@pytest.mark.platform_x86_gpu_training
|
@pytest.mark.platform_x86_gpu_training
|
||||||
@pytest.mark.env_onecard
|
@pytest.mark.env_onecard
|
||||||
|
@ -79,25 +93,33 @@ def test_rmsprop():
|
||||||
mean_square_np = np.array([epsilon, epsilon], dtype=np.float32)
|
mean_square_np = np.array([epsilon, epsilon], dtype=np.float32)
|
||||||
moment_np = np.array([0.0, 0.0], dtype=np.float32)
|
moment_np = np.array([0.0, 0.0], dtype=np.float32)
|
||||||
|
|
||||||
variable_ms = Tensor(variable_np)
|
variable = Tensor(variable_np)
|
||||||
gradients_ms = Tensor(gradients_np)
|
gradients = Tensor(gradients_np)
|
||||||
mean_gradients_ms = Tensor(mean_gradients_np)
|
mean_gradients = Tensor(mean_gradients_np)
|
||||||
mean_square_ms = Tensor(mean_square_np)
|
mean_square = Tensor(mean_square_np)
|
||||||
moment_ms = Tensor(moment_np)
|
moment = Tensor(moment_np)
|
||||||
|
|
||||||
|
variable_ms = Parameter(initializer(variable, variable.shape), name='var')
|
||||||
|
gradients_ms = Parameter(initializer(gradients, gradients.shape), name='grad')
|
||||||
|
mean_gradients_ms = Parameter(initializer(mean_gradients, mean_gradients.shape), name='mg')
|
||||||
|
mean_square_ms = Parameter(initializer(mean_square, mean_square.shape), name='msr')
|
||||||
|
moment_ms = Parameter(initializer(moment, moment.shape), name='mom')
|
||||||
|
|
||||||
if centered:
|
if centered:
|
||||||
variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \
|
variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \
|
||||||
rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np,
|
rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np,
|
||||||
learning_rate, decay, momentum, epsilon)
|
learning_rate, decay, momentum, epsilon)
|
||||||
net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon)
|
net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
|
||||||
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms)
|
mean_square_ms, moment_ms)
|
||||||
|
_ = net()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
variable_np, gradients_np, mean_square_np, moment_np = \
|
variable_np, gradients_np, mean_square_np, moment_np = \
|
||||||
rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np,
|
rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np,
|
||||||
learning_rate, decay, momentum, epsilon)
|
learning_rate, decay, momentum, epsilon)
|
||||||
net = NetRMSProp(learning_rate, decay, momentum, epsilon)
|
net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
|
||||||
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms)
|
mean_square_ms, moment_ms)
|
||||||
|
_ = net()
|
||||||
|
|
||||||
error = np.ones(shape=variable_np.shape) * 10e-6
|
error = np.ones(shape=variable_np.shape) * 10e-6
|
||||||
diff = variable_ms.asnumpy() - variable_np
|
diff = variable_ms.asnumpy() - variable_np
|
||||||
|
@ -132,24 +154,32 @@ def test_rmspropcenter():
|
||||||
mean_square_np = np.array([epsilon, epsilon], dtype=np.float32)
|
mean_square_np = np.array([epsilon, epsilon], dtype=np.float32)
|
||||||
moment_np = np.array([0.0, 0.0], dtype=np.float32)
|
moment_np = np.array([0.0, 0.0], dtype=np.float32)
|
||||||
|
|
||||||
variable_ms = Tensor(variable_np)
|
variable = Tensor(variable_np)
|
||||||
gradients_ms = Tensor(gradients_np)
|
gradients = Tensor(gradients_np)
|
||||||
mean_gradients_ms = Tensor(mean_gradients_np)
|
mean_gradients = Tensor(mean_gradients_np)
|
||||||
mean_square_ms = Tensor(mean_square_np)
|
mean_square = Tensor(mean_square_np)
|
||||||
moment_ms = Tensor(moment_np)
|
moment = Tensor(moment_np)
|
||||||
|
|
||||||
|
variable_ms = Parameter(initializer(variable, variable.shape), name='var')
|
||||||
|
gradients_ms = Parameter(initializer(gradients, gradients.shape), name='grad')
|
||||||
|
mean_gradients_ms = Parameter(initializer(mean_gradients, mean_gradients.shape), name='mg')
|
||||||
|
mean_square_ms = Parameter(initializer(mean_square, mean_square.shape), name='msr')
|
||||||
|
moment_ms = Parameter(initializer(moment, moment.shape), name='mom')
|
||||||
|
|
||||||
if centered:
|
if centered:
|
||||||
variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \
|
variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \
|
||||||
rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np,
|
rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np,
|
||||||
learning_rate, decay, momentum, epsilon)
|
learning_rate, decay, momentum, epsilon)
|
||||||
net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon)
|
net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
|
||||||
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms)
|
mean_square_ms, moment_ms)
|
||||||
|
_ = net()
|
||||||
else:
|
else:
|
||||||
variable_np, gradients_np, mean_square_np, moment_np = \
|
variable_np, gradients_np, mean_square_np, moment_np = \
|
||||||
rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np,
|
rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np,
|
||||||
learning_rate, decay, momentum, epsilon)
|
learning_rate, decay, momentum, epsilon)
|
||||||
net = NetRMSProp(learning_rate, decay, momentum, epsilon)
|
net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
|
||||||
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms)
|
mean_square_ms, moment_ms)
|
||||||
|
_ = net()
|
||||||
|
|
||||||
error = np.ones(shape=variable_np.shape) * 10e-6
|
error = np.ones(shape=variable_np.shape) * 10e-6
|
||||||
diff = variable_ms.asnumpy() - variable_np
|
diff = variable_ms.asnumpy() - variable_np
|
||||||
|
|
Loading…
Reference in New Issue