Add GPU NCCL ci test cases.

2020-04-16 09:56:43 +08:00 · 2020-04-16 09:56:43 +08:00 · 4cd237eee4
parent 7d406e8e6c
commit 4cd237eee4
4 changed files with 47 additions and 4 deletions
--- a/tests/st/nccl/test_nccl_all.py
+++ b/tests/st/nccl/test_nccl_all.py
@ -0,0 +1,44 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import os
+import pytest
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_single
+def test_nccl_lenet():
+    return_code = os.system("mpirun -n 8 pytest -s test_nccl_lenet.py")
+    assert(return_code == 0)
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_single
+def test_nccl_all_reduce_op():
+    return_code = os.system("mpirun -n 8 pytest -s test_nccl_all_reduce_op.py")
+    assert(return_code == 0)
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_single
+def test_nccl_all_gather_op():
+    return_code = os.system("mpirun -n 8 pytest -s test_nccl_all_gather_op.py")
+    assert(return_code == 0)
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_single
+def test_nccl_reduce_scatter_op():
+    return_code = os.system("mpirun -n 8 pytest -s test_nccl_reduce_scatter_op.py")
+    assert(return_code == 0)
--- a/tests/st/nccl/test_nccl_all_reduce_op.py
+++ b/tests/st/nccl/test_nccl_all_reduce_op.py
@ -20,7 +20,7 @@ import mindspore.context as context
 from mindspore.common.initializer import initializer
 from mindspore.common.parameter import Parameter
 from mindspore.communication.management import init, NCCL_WORLD_COMM_GROUP, get_rank, get_group_size
-context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
+context.set_context(mode=context.GRAPH_MODE, device_target='GPU', enable_dynamic_memory=False)

 init('nccl')
 rank = get_rank()
--- a/tests/st/nccl/test_nccl_lenet.py
+++ b/tests/st/nccl/test_nccl_lenet.py
@ -27,7 +27,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
 init('nccl')

 epoch = 2
-total = 50000
+total = 5000
 batch_size = 32
 mini_batch = total // batch_size

@ -94,3 +94,4 @@ def test_lenet_nccl():
    with open("ms_loss.txt", "w") as fo2:
        fo2.write("loss:")
        fo2.write(str(losses[-5:]))
+    assert(losses[-1] < 0.01)
--- a/tests/st/nccl/test_nccl_reduce_scatter_op.py
+++ b/tests/st/nccl/test_nccl_reduce_scatter_op.py
@ -62,8 +62,6 @@ def test_ReduceScatter():
    expect1 = np.ones([1, 1, 3, 3]).astype(np.float32) * 0.01 * size
    diff1 = output[1].asnumpy() - expect1
    error1 = np.ones(shape=expect1.shape) * 1.0e-5
-    print(expect1)
-    print(output[1])
    assert np.all(diff1 < error1)
    assert (output[1].shape() == expect1.shape)