forked from mindspore-Ecosystem/mindspore
Add GPU NCCL ci test cases.
This commit is contained in:
parent
7d406e8e6c
commit
4cd237eee4
|
@ -0,0 +1,44 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import os
|
||||
import pytest
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_single
|
||||
def test_nccl_lenet():
|
||||
return_code = os.system("mpirun -n 8 pytest -s test_nccl_lenet.py")
|
||||
assert(return_code == 0)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_single
|
||||
def test_nccl_all_reduce_op():
|
||||
return_code = os.system("mpirun -n 8 pytest -s test_nccl_all_reduce_op.py")
|
||||
assert(return_code == 0)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_single
|
||||
def test_nccl_all_gather_op():
|
||||
return_code = os.system("mpirun -n 8 pytest -s test_nccl_all_gather_op.py")
|
||||
assert(return_code == 0)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_single
|
||||
def test_nccl_reduce_scatter_op():
|
||||
return_code = os.system("mpirun -n 8 pytest -s test_nccl_reduce_scatter_op.py")
|
||||
assert(return_code == 0)
|
|
@ -20,7 +20,7 @@ import mindspore.context as context
|
|||
from mindspore.common.initializer import initializer
|
||||
from mindspore.common.parameter import Parameter
|
||||
from mindspore.communication.management import init, NCCL_WORLD_COMM_GROUP, get_rank, get_group_size
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='GPU', enable_dynamic_memory=False)
|
||||
|
||||
init('nccl')
|
||||
rank = get_rank()
|
||||
|
|
|
@ -27,7 +27,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
|||
init('nccl')
|
||||
|
||||
epoch = 2
|
||||
total = 50000
|
||||
total = 5000
|
||||
batch_size = 32
|
||||
mini_batch = total // batch_size
|
||||
|
||||
|
@ -94,3 +94,4 @@ def test_lenet_nccl():
|
|||
with open("ms_loss.txt", "w") as fo2:
|
||||
fo2.write("loss:")
|
||||
fo2.write(str(losses[-5:]))
|
||||
assert(losses[-1] < 0.01)
|
||||
|
|
|
@ -62,8 +62,6 @@ def test_ReduceScatter():
|
|||
expect1 = np.ones([1, 1, 3, 3]).astype(np.float32) * 0.01 * size
|
||||
diff1 = output[1].asnumpy() - expect1
|
||||
error1 = np.ones(shape=expect1.shape) * 1.0e-5
|
||||
print(expect1)
|
||||
print(output[1])
|
||||
assert np.all(diff1 < error1)
|
||||
assert (output[1].shape() == expect1.shape)
|
||||
|
||||
|
|
Loading…
Reference in New Issue