forked from mindspore-Ecosystem/mindspore
add some allreduce st test_cases in pynative mode
This commit is contained in:
parent
837d6e71de
commit
6ae6a27688
|
@ -16,12 +16,11 @@
|
|||
"""test bert thor performance with 8p on mlperf dataset"""
|
||||
|
||||
import os
|
||||
from multiprocessing import Process
|
||||
from multiprocessing import Process, Queue
|
||||
import pytest
|
||||
import numpy as np
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
import mindspore.dataset as dataset
|
||||
from mindspore import dtype as mstype
|
||||
from mindspore.ops import operations as P
|
||||
import mindspore.communication.management as D
|
||||
|
@ -31,7 +30,6 @@ from mindspore.context import ParallelMode
|
|||
MINDSPORE_HCCL_CONFIG_PATH = "/home/workspace/mindspore_config/hccl/rank_table_8p.json"
|
||||
|
||||
np.random.seed(1)
|
||||
dataset.config.set_seed(1)
|
||||
os.environ['GLOG_v'] = str(2)
|
||||
|
||||
class AllReduceNet(nn.Cell):
|
||||
|
@ -42,7 +40,7 @@ class AllReduceNet(nn.Cell):
|
|||
def construct(self, x):
|
||||
return self.all_reduce(x)
|
||||
|
||||
def train_allreduce_8p(device_id, device_num):
|
||||
def train_allreduce_8p(q, device_id, device_num):
|
||||
os.system("mkdir " + str(device_id))
|
||||
os.chdir(str(device_id))
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend", device_id=device_id)
|
||||
|
@ -58,7 +56,7 @@ def train_allreduce_8p(device_id, device_num):
|
|||
input_x = np.ones([32, 255, 255, 3]).astype(np.float32)
|
||||
except_output = input_x * 8
|
||||
output = net(Tensor(input_x, mstype.float32))
|
||||
assert np.allclose(output.asnumpy(), except_output)
|
||||
q.put(np.allclose(output.asnumpy(), except_output))
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
|
@ -67,9 +65,10 @@ def train_allreduce_8p(device_id, device_num):
|
|||
def test_pynative_hccl_8p():
|
||||
device_num = 8
|
||||
process = []
|
||||
q = Queue()
|
||||
for i in range(device_num):
|
||||
device_id = i
|
||||
process.append(Process(target=train_allreduce_8p, args=(device_id, device_num)))
|
||||
process.append(Process(target=train_allreduce_8p, args=(q, device_id, device_num)))
|
||||
|
||||
for i in range(device_num):
|
||||
process[i].start()
|
||||
|
@ -79,6 +78,10 @@ def test_pynative_hccl_8p():
|
|||
for i in range(device_num):
|
||||
process[i].join()
|
||||
|
||||
# check result
|
||||
for i in range(device_num):
|
||||
assert q.get()
|
||||
|
||||
for i in range(device_num):
|
||||
os.system("rm -rf " + str(i))
|
||||
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""test hccl allreduce performance with 8p"""
|
||||
|
||||
import os
|
||||
from multiprocessing import Process, Queue
|
||||
import pytest
|
||||
import numpy as np
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore import dtype as mstype
|
||||
from mindspore.ops import operations as P
|
||||
import mindspore.communication.management as D
|
||||
from mindspore import context
|
||||
from mindspore.context import ParallelMode
|
||||
|
||||
MINDSPORE_HCCL_CONFIG_PATH = "/home/workspace/mindspore_config/hccl/rank_table_8p.json"
|
||||
|
||||
np.random.seed(1)
|
||||
os.environ['GLOG_v'] = str(2)
|
||||
|
||||
class AllReduceNet(nn.Cell):
|
||||
def __init__(self):
|
||||
super(AllReduceNet, self).__init__()
|
||||
self.mul = P.Mul()
|
||||
self.all_reduce = P.AllReduce()
|
||||
self.add = P.Add()
|
||||
|
||||
def construct(self, x):
|
||||
x = self.mul(x, 2)
|
||||
y1 = Tensor(np.array([[2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2]])).astype(np.float32)
|
||||
z = self.add(x, y1)
|
||||
z = self.all_reduce(z)
|
||||
y2 = Tensor(np.array([[-16, -16, -16, -16], [-16, -16, -16, -16], [-16, -16, -16, -16]])).astype(np.float32)
|
||||
out = self.add(z, y2)
|
||||
out = self.all_reduce(out)
|
||||
out = self.mul(out, 2)
|
||||
return out
|
||||
|
||||
def train_allreduce_8p(q, device_id, device_num):
|
||||
os.system("mkdir " + str(device_id))
|
||||
os.chdir(str(device_id))
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend", device_id=device_id)
|
||||
os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = MINDSPORE_HCCL_CONFIG_PATH
|
||||
os.environ['RANK_ID'] = str(device_id)
|
||||
os.environ['RANK_SIZE'] = str(device_num)
|
||||
D.init()
|
||||
context.reset_auto_parallel_context()
|
||||
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=False,
|
||||
device_num=device_num)
|
||||
|
||||
net = AllReduceNet()
|
||||
input_x = np.ones([3, 4]).astype(np.float32)
|
||||
output = net(Tensor(input_x, mstype.float32))
|
||||
q.put(output)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_single
|
||||
def test_pynative_hccl_allreduce_8p():
|
||||
device_num = 8
|
||||
process = []
|
||||
q = Queue()
|
||||
for i in range(device_num):
|
||||
device_id = i
|
||||
process.append(Process(target=train_allreduce_8p, args=(q, device_id, device_num)))
|
||||
|
||||
for i in range(device_num):
|
||||
process[i].start()
|
||||
|
||||
print("Waiting for all subprocesses done...")
|
||||
|
||||
for i in range(device_num):
|
||||
process[i].join()
|
||||
|
||||
# check result
|
||||
for i in range(device_num):
|
||||
expect_output = [[256, 256, 256, 256], [256, 256, 256, 256], [256, 256, 256, 256]]
|
||||
output = Tensor(q.get())
|
||||
assert np.allclose(output.asnumpy(), expect_output)
|
||||
|
||||
for i in range(device_num):
|
||||
os.system("rm -rf " + str(i))
|
||||
|
||||
print("End training...")
|
|
@ -0,0 +1,24 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import os
|
||||
import pytest
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_single
|
||||
def test_pynative_nccl_allreduce():
|
||||
return_code = os.system("mpirun -n 8 pytest -s test_pynative_nccl_allreduce.py")
|
||||
assert return_code == 0
|
|
@ -0,0 +1,55 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""test nccl allreduce performance with 8p"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore import dtype as mstype
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore import context
|
||||
from mindspore.communication.management import init
|
||||
|
||||
np.random.seed(1)
|
||||
os.environ['GLOG_v'] = str(2)
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
|
||||
init()
|
||||
|
||||
class AllReduceNet(nn.Cell):
|
||||
def __init__(self):
|
||||
super(AllReduceNet, self).__init__()
|
||||
self.mul = P.Mul()
|
||||
self.all_reduce = P.AllReduce()
|
||||
self.add = P.Add()
|
||||
|
||||
def construct(self, x):
|
||||
x = self.mul(x, 2)
|
||||
y1 = Tensor(np.array([[2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2]])).astype(np.float32)
|
||||
z = self.add(x, y1)
|
||||
z = self.all_reduce(z)
|
||||
y2 = Tensor(np.array([[-16, -16, -16, -16], [-16, -16, -16, -16], [-16, -16, -16, -16]])).astype(np.float32)
|
||||
out = self.add(z, y2)
|
||||
out = self.all_reduce(out)
|
||||
out = self.mul(out, 2)
|
||||
return out
|
||||
|
||||
def test_pynative_nccl_allreduce_8p():
|
||||
net = AllReduceNet()
|
||||
input_x = np.ones([3, 4]).astype(np.float32)
|
||||
expect_output = [[256, 256, 256, 256], [256, 256, 256, 256], [256, 256, 256, 256]]
|
||||
output = net(Tensor(input_x, mstype.float32))
|
||||
assert np.allclose(output.asnumpy(), expect_output)
|
Loading…
Reference in New Issue