!1370 delete parallel end-to-end test cases

Merge pull request !1370 from yihuaijie/master
This commit is contained in:
mindspore-ci-bot 2020-05-22 20:01:49 +08:00 committed by Gitee
commit d402b94476
32 changed files with 0 additions and 3614 deletions

View File

@ -1,178 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import pytest
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class AddRelu(Cell):
def __init__(self, strategy0=None, strategy1=None):
super(AddRelu, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.relu = P.ReLU(strategy=strategy1)
def construct(self, x, z):
out = self.add(x, z)
return self.relu(out)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class AddReluFactory:
def __init__(self, input_shape, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = 1.0
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
input_shape).astype(np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in strategy1[1]:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_
def forward_mindspore_impl(self):
net = AddRelu()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
output_grad = Tensor(output_grads[self.out_id])
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad])
return input_grad
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
_ = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
_ = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_forward
def test_reid_add_relu_input_256_64():
stra0 = (0, (2, 2), ())
stra1 = (0, (2, 2))
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_add_relu_input_256_64():
stra0 = (0, (2, 2), ())
stra1 = (0, (2, 2))
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
fact.grad_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_add_relu_parallel_4p.py>../../log/test_add_relu_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,356 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from numpy import allclose
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore._checkparam import check_bool, twice
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class _Conv(Cell):
r"""Applies a N-D convolution over an input signal composed of several input
planes.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
pad_mode,
padding,
dilation,
group,
has_bias,
weight_init,
bias_init):
super(_Conv, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.pad_mode = pad_mode
self.padding = padding
self.dilation = dilation
self.group = group
self.has_bias = has_bias
if not (isinstance(in_channels, int) and in_channels > 0):
raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed '
+ str(in_channels) + ', should be a int and greater than 0.')
if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \
(not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
kernel_size[0] < 1 or kernel_size[1] < 1:
raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed '
+ str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.')
if in_channels % group != 0:
raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by '
'attr \'group\' of \'Conv2D\' Op.')
if out_channels % group != 0:
raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by '
'attr \'group\' of \'Conv2D\' Op.')
self.weight = Parameter(initializer(
weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight')
if check_bool(has_bias):
self.bias = Parameter(initializer(
bias_init, [out_channels]), name='bias')
else:
if bias_init != 'zeros':
print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
self.bias = None
def construct(self, *inputs):
raise NotImplementedError
class Conv2d(_Conv):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
pad_mode='same',
padding=0,
dilation=1,
group=1,
has_bias=False,
weight_init='normal',
bias_init='zeros',
strategy=None):
kernel_size = twice(kernel_size)
super(Conv2d, self).__init__(
in_channels,
out_channels,
kernel_size,
stride,
pad_mode,
padding,
dilation,
group,
has_bias,
weight_init,
bias_init)
self.add = P.TensorAdd(strategy)
self.conv2d = P.Conv2D(out_channel=self.out_channels,
kernel_size=self.kernel_size,
mode=1,
pad_mode=self.pad_mode,
pad=self.padding,
stride=self.stride,
dilation=self.dilation,
group=self.group,
strategy=None)
self.bias_add = P.BiasAdd()
def construct(self, input1, input2):
x = self.add(input1, input2)
if self.has_bias:
return self.bias_add(self.conv2d(x, self.weight),
self.bias)
return self.conv2d(x, self.weight)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, input1, input2, output_grad):
return grad_all_with_sens(self.network)(input1, input2, output_grad)
class Conv2dFactory:
def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias):
self.in_n, self.in_c, self.in_h, self.in_w = input_shape
self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape
self.stride = stride
self.pad_mode = pad_mode
self.padding = padding
self.dilation = dilation
self.group = group
self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1))
prefix = ""
input_size = 1
filter_size = 1
for s in input_shape:
prefix = prefix + str(s) + "_"
input_size = input_size * s
self.prefix = prefix
for s in filter_shape:
filter_size = filter_size * s
number_range1 = min(10, input_size)
number_range2 = min(10, filter_size)
self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype(
np.float16)
self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype(
np.float16)
self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype(
np.float16)
self.has_bias = has_bias
if self.has_bias is True:
self.bias_np = np.arange(0, self.out_c).astype(np.float16)
self.out_shape = (128, 64, 56, 56)
out_size = 1
for s in self.out_shape:
out_size = out_size * s
number_range3 = min(10, out_size)
self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2,
self.out_shape).astype(np.float16)
self.x_id = device_id % 4
self.y_id = device_id % 4
self.out_strategy = self.strategy0[1]
self.out_id = device_id % 4
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_conv2d_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias)
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight)
out = net(input1, input2)
return out.asnumpy()
def forward_conv2d_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight,
strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_conv2d_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
output_grad = Tensor(self.output_grad_np)
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias,)
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight)
grad_net = Grad(net)
grad_net.set_train()
out_grad = grad_net(x, y, output_grad)
return out_grad
def grad_conv2d_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
weight = Tensor(self.weight_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad = Tensor(self.output_grad_np)
output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
output_grad1 = Tensor(output_grads[self.out_id])
if self.has_bias:
bias = Tensor(self.bias_np)
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=True, weight_init=weight,
bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
else:
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
kernel_size=(self.kernel_h, self.kernel_w),
stride=self.stride, pad_mode=self.pad_mode,
padding=self.padding, dilation=self.dilation,
group=self.group, has_bias=False, weight_init=weight,
strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_train()
grad_net.set_auto_parallel()
out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return out_grad
def forward_conv2d_cmp(self):
out_mindspore = self.forward_conv2d_mindspore_impl()
out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
def grad_conv2d_cmp(self):
input_grad_mindspore = self.grad_conv2d_mindspore_impl()
input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1])
assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001)
assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001)
def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
filter_shape=(64, 64, 1, 1),
stride=2, pad_mode='valid', padding=0,
dilation=1, group=1, has_bias=False)
fact.forward_conv2d_cmp()
def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
filter_shape=(64, 64, 1, 1),
stride=2, pad_mode='valid', padding=0,
dilation=1, group=1, has_bias=False)
fact.grad_conv2d_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_conv2d_parallel_4p.py>../../log/test_conv2d_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,36 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
export SLOG_PRINT_TO_STDOUT=1
source /root/miniconda3/bin/activate ci3.6
export RANK_SIZE=4
export RANK_TABLE_FILE=../../rank_table_4p.json
export RANK_ID=$1
export DEVICE_ID=$1
export HCCL_FLAG=1
export DEPLOY_MODE=0
export AICPU_FLAG=1
export DUMP_OP=1
export PYTHONPATH=../../../../../../../../mindspore:/usr/local/HiAI/runtime/python3.6/site-packages/topi.egg/:/usr/local/HiAI/runtime/python3.6/site-packages/te.egg/:/usr/local/HiAI/runtime/ops/op_impl/built-in/ai_core/tbe/
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/HiAI/runtime/lib64/libhccl.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so
export LD_LIBRARY_PATH=/usr/local/HiAI/runtime/lib64
export FE_FLAG=1
export PATH=/usr/local/HiAI/runtime/ccec_compiler/bin:$PATH
if [ $1 -eq 0 ];
then
export DUMP_GE_GRAPH=true
export ME_DRAW_GRAPH=1
fi

View File

@ -1,120 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.nn import Dropout
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Net(Cell):
def __init__(self, keep_prob, seed0, seed1, strategy=None):
super(Net, self).__init__()
self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy)
def construct(self, input_):
x = self.drop(input_)
return x
# pylint: disable=comparison-with-itself
class DropoutFactory:
def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None):
size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(10, size)
self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32)
self.keep_prob = keep_prob
self.seed0 = seed0
self.seed1 = seed1
self.strategy0 = strategy0
need_dev_num = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
self.x_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def d4_tensor_compare(self, input_, out_me):
[a, b, c, d] = input_.shape
for i in range(a):
for j in range(b):
for k in range(c):
for e in range(d):
if out_me[i, j, k, e] == 0:
assert True
else:
assert np.allclose(out_me[i, j, k, e], input_[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001)
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np)
inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
net = Net(0.4, 0, 0, strategy=self.strategy0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
return out.asnumpy()
def forward_cmp(self):
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1])
self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel)
def test_reid_dropout_forward_seed_F32_64_512_8_8():
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1)))
fact.forward_cmp()
def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat():
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1)))
fact.forward_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_dropout_parallel_4p.py>../../log/test_dropout_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,154 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MatmulSingle(Cell):
def __init__(self, transpose_a=False, transpose_b=False):
super(MatmulSingle, self).__init__()
self.matmul = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
def construct(self, x, y):
out = self.matmul(x, y)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
return out
class MatmulAllgather(Cell):
def __init__(self, group, transpose_a=False, transpose_b=False):
super(MatmulAllgather, self).__init__()
self.allgather = P.AllGather(group=group)
self.matmul = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
self.allreduce = P.AllReduce(group=group)
def construct(self, x, y):
x = self.allgather(x)
out = self.matmul(x, y)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
out = self.allreduce(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, sens):
return grad_all_with_sens(self.network)(x, y, sens)
class MatmulAllgatherFactory:
def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra):
self.inputx = self.gen_value(inputx_shape, 10)
self.inputy = self.gen_value(inputy_shape, 20)
self.x_stra = x_stra
self.y_stra = y_stra
stra_size = 1
for s in x_stra:
stra_size = stra_size * s
self.stra_size = stra_size
def gen_value(self, input_shape, delta):
size = 1
for s in input_shape:
size = size * s
number_range = min(100, size)
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
return input_np
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl_single(self):
x = Tensor(self.inputx)
y = Tensor(self.inputy)
sens = Tensor(1.0, dtype=ms.float32)
net = MatmulSingle()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, sens)
return input_grad
def grad_mindspore_impl_reduce(self):
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
x = Tensor(inputxs[device_id % self.stra_size])
y = Tensor(inputys[device_id % self.stra_size])
repeat_num = device_num / self.stra_size
v = self.stra_size * repeat_num * repeat_num * repeat_num
sens = Tensor(1.0 / v, dtype=ms.float32)
net = MatmulAllgather("hccl_world_group")
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, sens)
return input_grad
def grad_cmp(self):
single_results = self.grad_mindspore_impl_single()
reduce_results = self.grad_mindspore_impl_reduce()
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
reduce_result0 = reduce_results[0].asnumpy()
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
reduce_result1 = reduce_results[1].asnumpy()
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
def test_reduce_grad():
inputx_shape = (64, 32)
inputy_shape = (32, 64)
fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4))
fact.grad_cmp()

View File

@ -1,175 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MatmulSingle(Cell):
def __init__(self, transpose_a=False, transpose_b=False):
super(MatmulSingle, self).__init__()
self.matmul1 = P.MatMul(transpose_a, transpose_b)
self.matmul2 = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
def construct(self, x, y, z):
out = self.matmul1(x, y)
out = self.matmul2(out, z)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
return out
class MatmulReduce(Cell):
def __init__(self, group, transpose_a=False, transpose_b=False):
super(MatmulReduce, self).__init__()
self.matmul1 = P.MatMul(transpose_a, transpose_b)
self.allreduce1 = P.AllReduce(group=group)
self.matmul2 = P.MatMul(transpose_a, transpose_b)
self.pow = P.Pow()
self.reduce_sum = P.ReduceSum()
self.allreduce2 = P.AllReduce(group=group)
def construct(self, x, y, z):
out = self.matmul1(x, y)
out = self.allreduce1(out)
out = self.matmul2(out, z)
out = self.pow(out, 2.0)
out = self.reduce_sum(out, None)
out = self.allreduce2(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, z, sens):
return grad_all_with_sens(self.network)(x, y, z, sens)
class MatmulReduceFactory:
def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra):
self.inputx = self.gen_value(inputx_shape, 10)
self.inputy = self.gen_value(inputy_shape, 20)
self.inputz = self.gen_value(inputz_shape, 30)
self.x_stra = x_stra
self.y_stra = y_stra
self.z_stra = z_stra
stra_size = 1
for s in x_stra:
stra_size = stra_size * s
self.stra_size = stra_size
def gen_value(self, input_shape, delta):
size = 1
for s in input_shape:
size = size * s
number_range = min(100, size)
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
return input_np
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl_single(self):
x = Tensor(self.inputx)
y = Tensor(self.inputy)
z = Tensor(self.inputz)
sens = Tensor(1.0, dtype=ms.float32)
net = MatmulSingle()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, z, sens)
return input_grad
def grad_mindspore_impl_reduce(self):
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
inputzs = self.get_parallel_blocks(self.inputz, self.z_stra)
x = Tensor(inputxs[device_id % self.stra_size])
y = Tensor(inputys[device_id % self.stra_size])
z = Tensor(inputzs[device_id % self.stra_size])
repeat_num = device_num / self.stra_size
v = self.stra_size * repeat_num * repeat_num * repeat_num
sens = Tensor(1.0 / v, dtype=ms.float32)
net = MatmulReduce("hccl_world_group")
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, z, sens)
return input_grad
def grad_cmp(self):
single_results = self.grad_mindspore_impl_single()
reduce_results = self.grad_mindspore_impl_reduce()
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
reduce_result0 = reduce_results[0].asnumpy()
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
reduce_result1 = reduce_results[1].asnumpy()
single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size]
reduce_result2 = reduce_results[2].asnumpy()
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001)
def test_reduce_grad():
inputx_shape = (32, 64)
inputy_shape = (64, 64)
inputz_shape = (64, 32)
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4))
fact.grad_cmp()
def test_reduce_grad_repeat():
inputx_shape = (32, 64)
inputy_shape = (64, 64)
inputz_shape = (64, 32)
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2))
fact.grad_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_allgather_4p.py>../../log/test_allgather_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_allreduce_4p.py>../../log/test_allreduce_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,206 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class L2normalize(Cell):
def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None):
super(L2normalize, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.l2norm = P.L2Normalize(axis, epsilon, strategy1)
def construct(self, x, y):
out = self.add(x, y)
out = self.l2norm(out)
return out
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class L2normalizeFactory:
def __init__(self, input_shape, axis, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = input_shape
self.target_shape = target_shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
target_shape).astype(np.float32)
self.axis = axis
self.epsilon = 1e-4
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = strategy1[1]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = L2normalize(self.axis, self.epsilon)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = L2normalize(self.axis, self.epsilon)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_l2normalize_input_128_512():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.forward_cmp()
def test_reid_l2normalize_grad_input_128_512():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.grad_cmp()
def test_reid_l2normalize_input_128_512_repeat():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
fact.forward_cmp()
def test_reid_l2normalize_grad_input_128_512_repeat():
input_shape = (128, 512)
axis = 0
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
fact.grad_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_l2normalize_parallel_4p.py>../../log/test_l2normalize_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1 +0,0 @@
log files for auto parallel end to end test cases

View File

@ -1,195 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class AddRelu(Cell):
def __init__(self, strategy0=None, strategy1=None):
super(AddRelu, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.relu = P.ReLU(strategy=strategy1)
def construct(self, x, y):
out = self.add(x, y)
out = self.relu(out)
return out
class NetWithLoss(Cell):
def __init__(self, network, strategy2=None):
super(NetWithLoss, self).__init__()
self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2)
self.network = network
def construct(self, x, y, b):
predict = self.network(x, y)
return self.loss(predict, b)[0]
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, b):
return grad_all(self.network)(x, y, b)
class AddReluFactory:
def __init__(self, input_shape, strategy0, strategy1, strategy2):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = input_shape
self.target_shape = target_shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(10, target_size)
self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype(
np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
self.strategy2 = strategy2
out_strategy = strategy1[1]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = AddRelu()
net_with_loss = NetWithLoss(net)
grad_net = Grad(net_with_loss)
grad_net.set_train()
input_grads = []
for i in range(0, 3):
input_grad = grad_net(x, y, output_grad)
input_grads.append(input_grad)
return input_grads
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
net_with_loss = NetWithLoss(net, strategy2=self.strategy2)
grad_net = Grad(net_with_loss)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grads = []
for i in range(0, 3):
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad1])
input_grads.append(input_grad)
return input_grads
def grad_cmp(self):
input_grad_mindspores = self.grad_mindspore_impl()
input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl()
for i in range(0, len(input_grad_mindspores)):
input_grad_mindspore = input_grad_mindspores[i]
input_grad_mindspore_parallel = input_grad_mindspore_parallels[i]
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy",
input_grad_blocks_0[self.x_id])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy",
input_grad_blocks_1[self.y_id])
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy",
input_grad_mindspore_parallel0)
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy",
input_grad_mindspore_parallel1)
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_l2normalize_grad_input_128_512():
input_shape = (128, 512)
fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)),
strategy2=(0, (4, 1), (4, 1)))
fact.grad_cmp()
def test_reid_l2normalize_grad_input_128_512_stridesplit():
input_shape = (128, 512)
fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)),
strategy2=(0, (4, 1), (4, 1)))
fact.grad_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_loss_parallel_4p.py>../../log/test_loss_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,329 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from numpy import allclose
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Matmul(Cell):
def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
super(Matmul, self).__init__()
self.add = P.TensorAdd(strategy=strategy1)
self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0)
def construct(self, x, w, z):
out = self.add(x, z)
return self.matmul(out, w)
class BatchMatMul(Cell):
def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
super(BatchMatMul, self).__init__()
self.add = P.TensorAdd(strategy=strategy1)
self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0)
def construct(self, x, w, z):
out = self.add(x, z)
return self.batchmatmul(out, w)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, inputa, inputb, inputz, output_grad):
gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad)
return gout
class BatchmatmulFactory:
def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_):
self.strategy = strategy
self.strategy_ = strategy_
inputa_size = 1
inputb_size = 1
prefix = ""
for s in inputa_shape:
prefix = prefix + str(s) + "_"
inputa_size = inputa_size * s
prefix = prefix + "and"
for s in inputb_shape:
prefix = prefix + str(s) + "_"
inputb_size = inputb_size * s
number_rangea = min(1000, inputa_size)
number_rangeb = min(1000, inputb_size)
self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype(
np.float32)
self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype(
np.float32)
self.inputz = np.zeros(self.inputa.shape).astype(np.float32)
self.transpose_a = transpose_a
self.transpose_b = transpose_b
out_shape = []
device_matrix = []
out_strategy = []
if transpose_a:
temp = inputa_shape[-1]
inputa_shape[-1] = inputa_shape[-2]
inputa_shape[-2] = temp
if transpose_b:
temp = inputb_shape[-1]
inputb_shape[-1] = inputb_shape[-2]
inputb_shape[-2] = temp
if len(inputa_shape) >= len(inputb_shape):
out_shape = list(inputa_shape)
out_shape[-1] = inputb_shape[-1]
else:
out_shape = list(inputb_shape)
out_shape[-2] = inputa_shape[-2]
strategy1 = list(self.strategy[1])
strategy2 = list(self.strategy[2])
if transpose_a:
temp = strategy1[-1]
strategy1[-1] = strategy1[-2]
strategy1[-2] = temp
if transpose_b:
temp = strategy2[-1]
strategy2[-1] = strategy2[-2]
strategy2[-2] = temp
if len(strategy1) >= len(strategy2):
out_strategy = strategy1.copy()
out_strategy[-1] = strategy2[-1]
else:
out_strategy = strategy2.copy()
out_strategy[-2] = strategy1[-2]
device_matrix = out_strategy.copy()
device_matrix.insert(-1, strategy1[-1])
self.out_strategy = out_strategy
need_dev_num = 1
for s in device_matrix:
need_dev_num = need_dev_num * s
self.need_dev_num = need_dev_num
self.device_matrix = device_matrix
out_size = 1
for s in out_shape:
out_size = out_size * s
number_range = min(1000, out_size)
self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype(
np.float32)
device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix)
x_index = device_index[:-1].copy()
if transpose_a:
temp = x_index[-1]
x_index[-1] = x_index[-2]
x_index[-2] = temp
y_index = device_index[:-3].copy()
y_index.append(device_index[-2])
y_index.append(device_index[-1])
if transpose_b:
temp = y_index[-1]
y_index[-1] = y_index[-2]
y_index[-2] = temp
out_index = device_index[:-2].copy()
out_index.append(device_index[-1])
print(device_matrix)
print(device_index)
need_dev_num_ = 1
for s in strategy_[1]:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num_
self.y_id = self.list_to_id(y_index, self.strategy[2])
self.out_id = self.list_to_id(out_index, self.out_strategy)
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def id_to_list(self, id_, shape):
"""
shape每一维的上限2,4,8
"""
result = []
r = id_
for i in range(0, len(shape)):
v = 1
for j in range(i + 1, len(shape)):
v = v * shape[j]
result.append(r // v)
r = r % v
return result
def list_to_id(self, id_list, shape):
result = 0
for i in range(0, len(id_list)):
v = 1
for j in range(i + 1, len(id_list)):
v = v * shape[j]
result = result + id_list[i] * v
return result
def forward_mindspore_impl(self):
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b)
else:
matmul = Matmul(self.transpose_a, self.transpose_b)
matmul.set_train()
out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz))
return out_me.asnumpy()
def forward_mindspore_parallel_impl(self):
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
else:
matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
x = Tensor(self.inputa)
y = Tensor(self.inputb)
z = Tensor(self.inputz)
xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
x1 = Tensor(xs[self.x_id]) #
y1 = Tensor(ys[self.y_id]) # 需要从设备矩阵推导
z1 = Tensor(zs[self.x_id])
matmul.set_train()
matmul.set_auto_parallel()
out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1])
return out_me.asnumpy()
def grad_mindspore_impl(self):
x = Tensor(self.inputa)
y = Tensor(self.inputb)
z = Tensor(self.inputz)
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b)
else:
matmul = Matmul(self.transpose_a, self.transpose_b)
net_me = Grad(matmul)
net_me.set_train()
out_grad_me = Tensor(self.output_grad_np)
out_grad = net_me(x, y, z, out_grad_me)
return out_grad
def grad_mindspore_parallel_impl(self):
if len(self.inputa.shape) > 2:
matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
else:
matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
x = Tensor(self.inputa)
y = Tensor(self.inputb)
z = Tensor(self.inputz)
out_grad_me = Tensor(self.output_grad_np)
xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(xs[self.x_id]) # 需要从设备矩阵推导
y1 = Tensor(ys[self.y_id]) #
z1 = Tensor(zs[self.x_id])
out_grad1 = Tensor(out_grads[self.out_id])
net_me = Grad(matmul)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net_me.set_auto_parallel()
net_me.set_train()
out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1],
parallel_inputs_run=[x1, y1, z1, out_grad1])
return out_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy)
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1])
input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2])
input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1])
assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001)
assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001)
assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001)
def test_reid_batchmatmul_inputa_128_512_inputb_2000_512():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.forward_cmp()
def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.grad_cmp()
def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.forward_cmp()
def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution():
inputa = [128, 512]
inputb = [2000, 512]
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
fact.grad_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_matmul_parallel_4p.py >../../log/test_matmul_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,213 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, input1, input2, output_grad):
return grad_all_with_sens(self.network)(input1, input2, output_grad)
class Max(Cell):
def __init__(self, axis, keep_dims, strategy0=None, strategy1=None):
super(Max, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1)
self.axis = axis
def construct(self, input1, input2):
out = self.add(input1, input2)
return self.reduce_max(out, self.axis)
class MaxFactory:
def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1):
self.strategy0 = strategy0
self.strategy1 = strategy1
self.axis = axis
self.keep_dims = keep_dims
input_size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s) + "_"
input_size = input_size * s
number_range = min(1000, input_size)
self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = self.input_np1.copy()
self.out_grad_np = None
out_shape = list(input_shape)
out_shape.pop(axis)
out_size = input_size / input_shape[axis]
number_range_ = min(1000, out_size)
self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype(
np.float32)
out_strategy = list(strategy1[1])
out_strategy.pop(axis)
self.out_strategy = out_strategy
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in out_strategy:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
net = Max(axis=self.axis, keep_dims=self.keep_dims)
out = net(input1, input2)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(xs[self.x_id])
y1 = Tensor(ys[self.y_id])
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
input1 = Tensor(self.input_np1)
input2 = Tensor(self.input_np2)
out_grad = Tensor(self.out_grad_np)
net = Max(axis=self.axis, keep_dims=self.keep_dims)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(input1, input2, out_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy)
out_grad = Tensor(output_grads[self.out_id])
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(xs[self.x_id])
y1 = Tensor(ys[self.y_id])
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad],
parallel_inputs_run=[x1, y1, out_grad])
return input_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
print(out_mindspore)
print(out_mindspore_parallel)
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_max_forward_input_256_64():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
strategy1=(0, (4, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_256_64():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
strategy1=(0, (4, 1)))
fact.grad_cmp()
def test_reid_max_forward_input_128_64_32_32():
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
strategy1=(0, (2, 1, 2, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_128_64_32_32():
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
strategy1=(0, (2, 1, 2, 1)))
fact.grad_cmp()
def test_reid_max_forward_input_256_64_repeat():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.forward_cmp()
def test_reid_max_grad_input_256_64_repeat():
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.grad_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_max_parallel_4p.py>../../log/test_max_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_mul_softmax_parallel_4p.py>../../log/test_mul_softmax_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,200 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import pytest
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class MulSoftmax(Cell):
def __init__(self, strategy0=None, strategy1=None, axis=0):
super(MulSoftmax, self).__init__()
self.mul = P.Mul(strategy=strategy0)
self.softmax = P.Softmax(axis=axis, strategy=strategy1)
def construct(self, x, z):
out = self.mul(x, z)
return self.softmax(out)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class MulSoftmaxFactory:
def __init__(self, input_shape, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = 1.0
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
input_shape).astype(np.float32)
self.strategy0 = strategy0
self.strategy1 = strategy1
need_dev_num = 1
need_dev_num_ = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
for s in strategy1[1]:
need_dev_num_ = need_dev_num_ * s
self.x_id = device_id % need_dev_num
self.y_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num_
def forward_mindspore_impl(self):
net = MulSoftmax()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = MulSoftmax()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
output_grad = Tensor(output_grads[self.out_id])
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_train()
grad_net.set_auto_parallel()
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(self.input_np2, ms.float32)
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
parallel_inputs_run=[x1, y1, output_grad])
return input_grad
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel)
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0)
np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1)
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0,
self.strategy0[1]) # 这里由于TensorMul两个输入X1没做广播X2做了广播
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_forward
def test_reid_mul_softmax_input_128x64():
stra0 = (0, (1, 4), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_mul_softmax_input_128x64():
stra0 = (0, (1, 4), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.grad_cmp()
@pytest.mark.reid_forward
def test_reid_mul_softmax_input_128x64_all_to_all():
stra0 = (0, (4, 1), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_mul_softmax_input_128x64_all_to_all():
stra0 = (0, (4, 1), ())
stra1 = (0, (1, 4))
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
fact.grad_cmp()

View File

@ -1,147 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Onehot(Cell):
def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
super(Onehot, self).__init__()
self.onehot = P.OneHot(axis, strategy=strategy)
self.depth = depth
self.on_value = Tensor(on_value, ms.float32)
self.off_value = Tensor(off_value, ms.float32)
def construct(self, indices):
return self.onehot(indices, self.depth, self.on_value, self.off_value)
class OneHotFactory:
def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None):
size = 1
prefix = ""
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(10, size)
self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32)
self.depth = depth
self.on_value = on_value
self.off_value = off_value
self.axis = axis
self.dtype = dtype
self.strategy0 = strategy0
need_dev_num = 1
for s in strategy0[1]:
need_dev_num = need_dev_num * s
self.x_id = device_id % need_dev_num
self.out_id = device_id % need_dev_num
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np1)
y = Tensor(self.input_np2, ms.float32)
net = AddRelu()
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def forward_mindspore_impl(self):
indices = Tensor(self.input_np)
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value)
out = net(indices)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np)
inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
net = Onehot(axis=self.axis,
depth=self.depth,
on_value=self.on_value,
off_value=self.off_value, strategy=self.strategy0)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
return out.asnumpy()
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1])
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)
def test_reid_onehot_forward_int32_128_depth13000():
fact = OneHotFactory(input_shape=(128,),
depth=131072,
on_value=1.000000,
off_value=0.000000,
axis=-1,
dtype="float32",
strategy0=(0, (2,)))
fact.forward_cmp()
def test_reid_onehot_forward_int32_131072_depth127():
fact = OneHotFactory(input_shape=(131072,),
depth=127,
on_value=1.000000,
off_value=0.000000,
axis=-1,
dtype="float32",
strategy0=(0, (4,)))
fact.forward_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_onehot_parallel_4p.py>../../log/test_onehot_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,206 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import pytest
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class PReLU(Cell):
def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None):
super(PReLU, self).__init__()
self.add = P.TensorAdd(strategy=strategy1_)
self.prelu = P.PReLU(strategy=strategy_)
self.channel = channel
def construct(self, x, z, w):
out = self.add(x, z)
return self.prelu(out, w)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, input_, z, w, output_grad):
return grad_all_with_sens(self.network)(input_, z, w, output_grad)
class PReLUFactory:
def __init__(self, input_shape, strategy):
n, c = input_shape[:2]
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32)
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
input_shape).astype(np.float32)
self.channel = c
self.weight = np.array([np.float32(0.25)] * c)
self.strategy = strategy
def forward_mindspore_impl(self):
net = PReLU(channel=self.channel, w=self.weight)
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
out = net(x, z, w)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
block_id = device_id % len(inputs)
x1 = Tensor(inputs[block_id])
z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
w1 = Tensor(self.weight)
out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1])
return out.asnumpy()
def grad_mindspore_impl(self):
output_grad = Tensor(self.output_grad_np)
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
net = PReLU(channel=self.channel, w=self.weight)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, z, w, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1])
block_id = device_id % len(output_grads)
output_grad = Tensor(output_grads[block_id])
x = Tensor(self.input_np)
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
w = Tensor(self.weight)
net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
x1 = Tensor(inputs[block_id])
z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
w1 = Tensor(self.weight)
input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad],
parallel_inputs_run=[x1, z1, w1, output_grad])
return input_grad
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1])
block_id = device_id % len(out_blocks)
assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore2 = input_grad_mindspore[2].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy()
input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1])
input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1])
block_id = device_id % len(input_grad_blocks)
assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001)
assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_grad
def test_reid_prelu_input_128x64x112x112_repeat():
stra = (0, (1, 1, 2, 1), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_prelu_input_128x64x112x112_repeat():
stra = (0, (1, 1, 2, 1), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.grad_cmp()
@pytest.mark.reid_grad
def test_reid_prelu_input_128x64x112x112_mix():
stra = (0, (2, 1, 1, 2), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.forward_cmp()
@pytest.mark.reid_grad
def test_reid_grad_prelu_input_128x64x112x112_mix():
stra = (0, (2, 1, 1, 2), (1))
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
fact.grad_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_prelu_parallel_4p.py >../../log/test_prelu_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,252 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from numpy import allclose as allclose_nparray
import mindspore as ms
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class GradScalar(Cell):
def __init__(self, network):
super(GradScalar, self).__init__()
self.network = network
self.sens = Tensor([1.0], dtype=ms.float32)
def construct(self, x, y):
return grad_all_with_sens(self.network)(x, y, self.sens)
class ReduceMean(Cell):
def __init__(self, keep_dims, axis, strategy0=None, strategy1=None):
super(ReduceMean, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1)
self.axis = axis
def construct(self, x, y):
out = self.add(x, y)
return self.reduce_mean(out, self.axis)
class ReduceMeanFactory:
def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
self.keep_dims = keep_dims
self.axis = axis
target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.output_grad_np = np.array([1.0], dtype=np.float32)
if len(target_shape) > 0:
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype(
np.float32) + 1.0
self.shape = target_shape
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = []
axis_ = list(axis)
if axis_[0] == -1:
axis_[0] = len(input_shape) - 1
for i in range(0, len(input_shape)):
if i in axis_:
if keep_dims:
out_strategy.append(1)
else:
out_strategy.append(strategy1[1][i])
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
block_id = device_id % need_dev_num0
device_index = self.id_to_list(block_id, self.strategy1[1])
print(device_index)
for i in axis:
device_index[i] = 0
print(device_index)
self.out_id = self.list_to_id(device_index, self.out_strategy)
print(self.out_id)
def id_to_list(self, id_, shape):
result = []
r = id_
for i in range(0, len(shape)):
v = 1
for j in range(i + 1, len(shape)):
v = v * shape[j]
result.append(r // v)
r = r % v
return result
def list_to_id(self, id_list, shape):
result = 0
for i in range(0, len(id_list)):
v = 1
for j in range(i + 1, len(id_list)):
v = v * shape[j]
result = result + id_list[i] * v
return result
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
out_grad = Tensor(self.output_grad_np)
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, out_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad
def forward_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_reducemean_input_64x16():
fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
strategy1=(0, (4,)))
fact.forward_cmp()
def test_grad_reid_reducemean_input_64x16():
fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
strategy1=(0, (4,)))
fact.grad_cmp()
def test_reid_reducemean_input_64x128x28x28():
fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
fact.forward_cmp()
def test_grad_reid_reducemean_input_64x128x28x28():
fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
fact.grad_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_reducemean_parallel_4p.py>../../log/test_reducemean_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,206 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import pytest
from numpy import allclose as allclose_nparray
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class Reshape(Cell):
def __init__(self, target_shape, strategy0=None, strategy1=None):
super(Reshape, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.reshape = P.Reshape(strategy=strategy1)
self.shape = tuple(target_shape)
def construct(self, input1, input2):
x = self.add(input1, input2)
return self.reshape(x, self.shape)
class ReshapeFactory:
def __init__(self, input_shape, target_shape, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
target_shape).astype(np.float32)
self.target_shape = target_shape
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = [1] * len(target_shape)
out_strategy[0] = strategy1[1][0]
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
self.out_id = device_id % need_dev_num1
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def forward_reshape_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = Reshape(self.target_shape)
out = net(x, y)
return out.asnumpy()
def forward_reshape_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_reshape_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = Reshape(self.target_shape)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_reshape_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad
def forward_reshape_cmp(self):
out_mindspore = self.forward_reshape_mindspore_impl()
out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_reshape_cmp(self):
input_grad_mindspore = self.grad_reshape_mindspore_impl()
input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
@pytest.mark.reid_forward
def test_reid_reshape_input_128x512x7x7_target_128x25088():
fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
fact.forward_reshape_cmp()
def test_reid_reshape_grad_input_128x512x7x7_target_128x25088():
fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
fact.grad_reshape_cmp()
@pytest.mark.reid_forward
def test_reid_reshape_input_128x64_target_128x64x1x1():
fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.forward_reshape_cmp()
@pytest.mark.reid_grad
def test_reid_reshape_grad_input_128x64_target_128x64x1x1():
fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
strategy1=(0, (2, 1)))
fact.grad_reshape_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_reshape_parallel_4p.py>../../log/test_reshape_parallel_4p_log$i.log 2>&1 &
cd ..
done

View File

@ -1,235 +0,0 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from numpy import allclose as allclose_nparray
import mindspore.communication.management as distributedTool
from mindspore import context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops.composite import grad_all_with_sens
device_num = 4
device_id = int(os.environ["RANK_ID"])
path = "./output/"
def setup_module():
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
context.set_context(mode=context.GRAPH_MODE)
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
distributedTool.init()
distributedTool.create_group("0-3", [0, 1, 2, 3])
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
def teardown_module():
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
class Net(Cell):
def __init__(self, perm_in, strategy0=None, strategy1=None):
super(Net, self).__init__()
self.add = P.TensorAdd(strategy=strategy0)
self.transpose = P.Transpose(strategy=strategy1)
self.perm_in = perm_in
def construct(self, x, y):
out = self.add(x, y)
return self.transpose(out, self.perm_in)
class Grad(Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
def construct(self, x, y, output_grad):
return grad_all_with_sens(self.network)(x, y, output_grad)
class TransposeFactory:
def __init__(self, input_shape, perm_in, strategy0, strategy1):
prefix = ""
size = 1
for s in input_shape:
prefix = prefix + str(s)
size = size * s
self.prefix = prefix
number_range = min(1000, size)
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
np.float32)
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
np.float32)
target_shape = self.input_np1.transpose(perm_in).shape
target_size = 1
for s in target_shape:
target_size = target_size * s
number_range = min(1000, target_size)
self.target_shape = target_shape
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
target_shape).astype(np.float32)
self.perm_in = perm_in
self.strategy0 = strategy0
self.strategy1 = strategy1
out_strategy = []
for i in perm_in:
out_strategy.append(strategy1[1][i])
self.out_strategy = out_strategy
need_dev_num0 = 1
need_dev_num1 = 1
for s in strategy0[1]:
need_dev_num0 = need_dev_num0 * s
for s in out_strategy:
need_dev_num1 = need_dev_num1 * s
self.x_id = device_id % need_dev_num0
self.y_id = device_id % need_dev_num0
device_index = self.id_to_list(device_id % need_dev_num1,
self.strategy1[1]) # encoding to get the index before transpose
device_index_transpose = []
for i in perm_in:
device_index_transpose.append(device_index[i])
self.out_id = self.list_to_id(device_index_transpose, self.out_strategy)
def get_parallel_blocks(self, input_, strategy):
blocks = [input_]
i = 0
for stra in strategy:
temp = []
while len(blocks) > 0:
block = blocks.pop(0)
temp.extend(np.split(block, stra, axis=i))
blocks.extend(temp)
i += 1
return blocks
def id_to_list(self, id_, shape):
result = []
r = id_
for i in range(0, len(shape)):
v = 1
for j in range(i + 1, len(shape)):
v = v * shape[j]
result.append(r // v)
r = r % v
return result
def list_to_id(self, id_list, shape):
result = 0
for i in range(0, len(id_list)):
v = 1
for j in range(i + 1, len(id_list)):
v = v * shape[j]
result = result + id_list[i] * v
return result
def forward_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
net = Net(self.perm_in)
out = net(x, y)
return out.asnumpy()
def forward_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
net.set_auto_parallel()
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
return out.asnumpy()
def grad_mindspore_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
net = Net(self.perm_in)
grad_net = Grad(net)
grad_net.set_train()
input_grad = grad_net(x, y, output_grad)
return input_grad
def grad_mindspore_parallel_impl(self):
x = Tensor(self.input_np1)
y = Tensor(self.input_np2)
output_grad = Tensor(self.output_grad_np)
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
x1 = Tensor(inputs_x[self.x_id])
y1 = Tensor(inputs_y[self.y_id])
output_grad1 = Tensor(outgrads[self.out_id])
net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
grad_net = Grad(net)
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
grad_net.set_auto_parallel()
grad_net.set_train()
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
parallel_inputs_run=[x1, y1, output_grad1])
return input_grad
def forward_transpose_cmp(self):
out_mindspore = self.forward_mindspore_impl()
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
def grad_transpose_cmp(self):
input_grad_mindspore = self.grad_mindspore_impl()
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
def test_reid_transpose_input_256x512_output_512x256_perm_1x0():
fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
fact.forward_transpose_cmp()
def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0():
fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
fact.grad_transpose_cmp()
def test_reid_transpose_input_512x256_output_256x512_perm_1x0():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.forward_transpose_cmp()
def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
fact.grad_transpose_cmp()
def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
fact.forward_transpose_cmp()
def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat():
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
fact.grad_transpose_cmp()

View File

@ -1,27 +0,0 @@
#!/bin/bash
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
for((i=0;i<4;i++));
do
rm -rf device$i
mkdir device$i
cd device$i
mkdir output
source ../../dist_env_4p.sh $i
env >log$i.log
pytest -s ../test_transpose_parallel_4p.py>../../log/test_transpose_parallel_4p_log$i.log 2>&1 &
cd ..
done