!6232 fix gpu heterogeneous bug

Merge pull request !6232 from baihuawei/embedding
This commit is contained in:
mindspore-ci-bot 2020-09-16 16:49:39 +08:00 committed by Gitee
commit af5ebcf1a9
3 changed files with 90 additions and 51 deletions

View File

@ -196,7 +196,9 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
} }
} }
if (need_sync) { if (need_sync) {
tensor->set_device_address(device_address); if (AnfAlgo::IsParameterWeight(input_node->cast<ParameterPtr>())) {
tensor->set_device_address(device_address);
}
MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(device_address);
if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0), if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0),
LongToSize(tensor->data().nbytes()), tensor->data_type(), LongToSize(tensor->data().nbytes()), tensor->data_type(),

View File

@ -19,6 +19,8 @@ import pytest
import mindspore.context as context import mindspore.context as context
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import Tensor from mindspore import Tensor
from mindspore.common.parameter import Parameter
from mindspore.common.initializer import initializer
from mindspore.common.api import ms_function from mindspore.common.api import ms_function
from mindspore.ops.operations import _quant_ops as Q from mindspore.ops.operations import _quant_ops as Q
@ -26,13 +28,15 @@ context.set_context(device_target='GPU')
class Net(nn.Cell): class Net(nn.Cell):
def __init__(self): def __init__(self, mean, variance):
super(Net, self).__init__() super(Net, self).__init__()
self.mean = mean
self.variance = variance
self.op = Q.BatchNormFold(momentum=0.9, freeze_bn=10) self.op = Q.BatchNormFold(momentum=0.9, freeze_bn=10)
@ms_function @ms_function
def construct(self, x, mean, variance, current_step): def construct(self, x, current_step):
a, b, c, d = self.op(x, mean, variance, current_step) a, b, c, d = self.op(x, self.mean, self.variance, current_step)
return a, b, c, d return a, b, c, d
@ -52,16 +56,17 @@ def np_result(x, mean, var, momentum, epsilon):
@pytest.mark.platform_x86_gpu_training @pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard @pytest.mark.env_onecard
def test_batchnorm_fold(): def test_batchnorm_fold():
net = Net()
c = 64 c = 64
x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32') x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32')
mean = np.random.uniform(1, 10, size=[c]).astype('float32') mean = np.random.uniform(1, 10, size=[c]).astype('float32')
variance = np.random.uniform(1, 10, size=[c]).astype('float32') variance = np.random.uniform(1, 10, size=[c]).astype('float32')
current_step = np.array([0]).astype('int32') current_step = np.array([0]).astype('int32')
ms_mean = Tensor(mean) ms_mean_t = Tensor(mean)
ms_var = Tensor(variance) ms_var_t = Tensor(variance)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var, ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean')
Tensor(current_step)) ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var')
net = Net(ms_mean, ms_var)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step))
expect1, expect2, expect3, expect4, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12) expect1, expect2, expect3, expect4, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5)
@ -76,16 +81,17 @@ def test_batchnorm_fold():
@pytest.mark.platform_x86_gpu_training @pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard @pytest.mark.env_onecard
def test_batchnorm_fold2(): def test_batchnorm_fold2():
net = Net()
c = 64 c = 64
x = np.random.uniform(1, 10, size=[3, c, 512, 512]).astype('float32') x = np.random.uniform(1, 10, size=[3, c, 512, 512]).astype('float32')
mean = np.random.uniform(1, 10, size=[c]).astype('float32') mean = np.random.uniform(1, 10, size=[c]).astype('float32')
variance = np.random.uniform(1, 10, size=[c]).astype('float32') variance = np.random.uniform(1, 10, size=[c]).astype('float32')
current_step = np.array([0]).astype('int32') current_step = np.array([0]).astype('int32')
ms_mean = Tensor(mean) ms_mean_t = Tensor(mean)
ms_var = Tensor(variance) ms_var_t = Tensor(variance)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var, ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean')
Tensor(current_step)) ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var')
net = Net(ms_mean, ms_var)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step))
expect1, expect2, expect3, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12) expect1, expect2, expect3, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_mean.asnumpy(), expect1, rtol=1.e-7, atol=1.e-5)
assert np.allclose(batch_var.asnumpy(), expect2, rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_var.asnumpy(), expect2, rtol=1.e-7, atol=1.e-5)
@ -98,16 +104,17 @@ def test_batchnorm_fold2():
@pytest.mark.platform_x86_gpu_training @pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard @pytest.mark.env_onecard
def test_batchnorm_fold_freeze(): def test_batchnorm_fold_freeze():
net = Net()
c = 64 c = 64
x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32') x = np.random.uniform(1, 10, size=[3, c, 32, 32]).astype('float32')
mean = np.random.uniform(1, 10, size=[c]).astype('float32') mean = np.random.uniform(1, 10, size=[c]).astype('float32')
variance = np.random.uniform(1, 10, size=[c]).astype('float32') variance = np.random.uniform(1, 10, size=[c]).astype('float32')
current_step = np.array([10]).astype('int32') current_step = np.array([10]).astype('int32')
ms_mean = Tensor(mean) ms_mean_t = Tensor(mean)
ms_var = Tensor(variance) ms_var_t = Tensor(variance)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), ms_mean, ms_var, ms_mean = Parameter(initializer(ms_mean_t, ms_mean_t.shape), name='mean')
Tensor(current_step)) ms_var = Parameter(initializer(ms_var_t, ms_var_t.shape), name='var')
net = Net(ms_mean, ms_var)
batch_mean, batch_var, delay_mean, delay_std = net(Tensor(x), Tensor(current_step))
_, _, _, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12) _, _, _, _, expect5, expect6 = np_result(x, mean, variance, 0.9, 1e-12)
assert np.allclose(batch_mean.asnumpy(), np.zeros_like(mean), rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_mean.asnumpy(), np.zeros_like(mean), rtol=1.e-7, atol=1.e-5)
assert np.allclose(batch_var.asnumpy(), np.ones_like(mean), rtol=1.e-7, atol=1.e-5) assert np.allclose(batch_var.asnumpy(), np.ones_like(mean), rtol=1.e-7, atol=1.e-5)

View File

@ -19,35 +19,48 @@ import pytest
import mindspore.context as context import mindspore.context as context
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import Tensor from mindspore import Tensor
from mindspore.common.parameter import Parameter
from mindspore.common.initializer import initializer
from mindspore.ops import operations as P from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target="GPU") context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
class NetCenteredRMSProp(nn.Cell): class NetCenteredRMSProp(nn.Cell):
def __init__(self, lr, decay, momentum, epsilon): def __init__(self, lr, decay, momentum, epsilon, var, g, mg, rms, mom):
super(NetCenteredRMSProp, self).__init__() super(NetCenteredRMSProp, self).__init__()
self.rms_opt = P.ApplyCenteredRMSProp() self.rms_opt = P.ApplyCenteredRMSProp()
self.lr = lr self.lr = lr
self.decay = decay self.decay = decay
self.momentum = momentum self.momentum = momentum
self.epsilon = epsilon self.epsilon = epsilon
self.var = var
self.g = g
self.mg = mg
self.rms = rms
self.mom = mom
def construct(self, var, g, mg, rms, mom): def construct(self):
return self.rms_opt(var, mg, rms, mom, g, self.lr, self.decay, self.momentum, self.epsilon) return self.rms_opt(self.var, self.mg, self.rms, self.mom, self.g, self.lr, self.decay, self.momentum,
self.epsilon)
class NetRMSProp(nn.Cell): class NetRMSProp(nn.Cell):
def __init__(self, lr, decay, momentum, epsilon): def __init__(self, lr, decay, momentum, epsilon, var, g, mg, rms, mom):
super(NetRMSProp, self).__init__() super(NetRMSProp, self).__init__()
self.lr = lr self.lr = lr
self.decay = decay self.decay = decay
self.momentum = momentum self.momentum = momentum
self.epsilon = epsilon self.epsilon = epsilon
self.var = var
self.g = g
self.mg = mg
self.rms = rms
self.mom = mom
self.rms_opt = P.ApplyRMSProp() self.rms_opt = P.ApplyRMSProp()
def construct(self, var, g, mg, rms, mom): def construct(self):
return self.rms_opt(var, rms, mom, self.lr, g, self.decay, self.momentum, self.epsilon) return self.rms_opt(self.var, self.rms, self.mom, self.lr, self.g, self.decay, self.momentum, self.epsilon)
def rmsprop_numpy(variable, gradients, mean_square, moment, def rmsprop_numpy(variable, gradients, mean_square, moment,
@ -67,6 +80,7 @@ def rmspropcented_numpy(variable, gradients, mean_gradients, mean_square, moment
variable = variable - moment variable = variable - moment
return variable, gradients, mean_gradients, mean_square, moment return variable, gradients, mean_gradients, mean_square, moment
@pytest.mark.level0 @pytest.mark.level0
@pytest.mark.platform_x86_gpu_training @pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard @pytest.mark.env_onecard
@ -79,25 +93,33 @@ def test_rmsprop():
mean_square_np = np.array([epsilon, epsilon], dtype=np.float32) mean_square_np = np.array([epsilon, epsilon], dtype=np.float32)
moment_np = np.array([0.0, 0.0], dtype=np.float32) moment_np = np.array([0.0, 0.0], dtype=np.float32)
variable_ms = Tensor(variable_np) variable = Tensor(variable_np)
gradients_ms = Tensor(gradients_np) gradients = Tensor(gradients_np)
mean_gradients_ms = Tensor(mean_gradients_np) mean_gradients = Tensor(mean_gradients_np)
mean_square_ms = Tensor(mean_square_np) mean_square = Tensor(mean_square_np)
moment_ms = Tensor(moment_np) moment = Tensor(moment_np)
variable_ms = Parameter(initializer(variable, variable.shape), name='var')
gradients_ms = Parameter(initializer(gradients, gradients.shape), name='grad')
mean_gradients_ms = Parameter(initializer(mean_gradients, mean_gradients.shape), name='mg')
mean_square_ms = Parameter(initializer(mean_square, mean_square.shape), name='msr')
moment_ms = Parameter(initializer(moment, moment.shape), name='mom')
if centered: if centered:
variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \ variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \
rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np, rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np,
learning_rate, decay, momentum, epsilon) learning_rate, decay, momentum, epsilon)
net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon) net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) mean_square_ms, moment_ms)
_ = net()
else: else:
variable_np, gradients_np, mean_square_np, moment_np = \ variable_np, gradients_np, mean_square_np, moment_np = \
rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np, rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np,
learning_rate, decay, momentum, epsilon) learning_rate, decay, momentum, epsilon)
net = NetRMSProp(learning_rate, decay, momentum, epsilon) net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) mean_square_ms, moment_ms)
_ = net()
error = np.ones(shape=variable_np.shape) * 10e-6 error = np.ones(shape=variable_np.shape) * 10e-6
diff = variable_ms.asnumpy() - variable_np diff = variable_ms.asnumpy() - variable_np
@ -132,24 +154,32 @@ def test_rmspropcenter():
mean_square_np = np.array([epsilon, epsilon], dtype=np.float32) mean_square_np = np.array([epsilon, epsilon], dtype=np.float32)
moment_np = np.array([0.0, 0.0], dtype=np.float32) moment_np = np.array([0.0, 0.0], dtype=np.float32)
variable_ms = Tensor(variable_np) variable = Tensor(variable_np)
gradients_ms = Tensor(gradients_np) gradients = Tensor(gradients_np)
mean_gradients_ms = Tensor(mean_gradients_np) mean_gradients = Tensor(mean_gradients_np)
mean_square_ms = Tensor(mean_square_np) mean_square = Tensor(mean_square_np)
moment_ms = Tensor(moment_np) moment = Tensor(moment_np)
variable_ms = Parameter(initializer(variable, variable.shape), name='var')
gradients_ms = Parameter(initializer(gradients, gradients.shape), name='grad')
mean_gradients_ms = Parameter(initializer(mean_gradients, mean_gradients.shape), name='mg')
mean_square_ms = Parameter(initializer(mean_square, mean_square.shape), name='msr')
moment_ms = Parameter(initializer(moment, moment.shape), name='mom')
if centered: if centered:
variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \ variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np = \
rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np, rmspropcented_numpy(variable_np, gradients_np, mean_gradients_np, mean_square_np, moment_np,
learning_rate, decay, momentum, epsilon) learning_rate, decay, momentum, epsilon)
net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon) net = NetCenteredRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) mean_square_ms, moment_ms)
_ = net()
else: else:
variable_np, gradients_np, mean_square_np, moment_np = \ variable_np, gradients_np, mean_square_np, moment_np = \
rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np, rmsprop_numpy(variable_np, gradients_np, mean_square_np, moment_np,
learning_rate, decay, momentum, epsilon) learning_rate, decay, momentum, epsilon)
net = NetRMSProp(learning_rate, decay, momentum, epsilon) net = NetRMSProp(learning_rate, decay, momentum, epsilon, variable_ms, gradients_ms, mean_gradients_ms,
_ = net(variable_ms, gradients_ms, mean_gradients_ms, mean_square_ms, moment_ms) mean_square_ms, moment_ms)
_ = net()
error = np.ones(shape=variable_np.shape) * 10e-6 error = np.ones(shape=variable_np.shape) * 10e-6
diff = variable_ms.asnumpy() - variable_np diff = variable_ms.asnumpy() - variable_np