forked from mindspore-Ecosystem/mindspore
!1370 delete parallel end-to-end test cases
Merge pull request !1370 from yihuaijie/master
This commit is contained in:
commit
d402b94476
|
@ -1,178 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class AddRelu(Cell):
|
||||
def __init__(self, strategy0=None, strategy1=None):
|
||||
super(AddRelu, self).__init__()
|
||||
self.add = P.TensorAdd(strategy=strategy0)
|
||||
self.relu = P.ReLU(strategy=strategy1)
|
||||
|
||||
def construct(self, x, z):
|
||||
out = self.add(x, z)
|
||||
return self.relu(out)
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, output_grad):
|
||||
return grad_all_with_sens(self.network)(x, y, output_grad)
|
||||
|
||||
|
||||
class AddReluFactory:
|
||||
def __init__(self, input_shape, strategy0, strategy1):
|
||||
prefix = ""
|
||||
size = 1
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s)
|
||||
size = size * s
|
||||
self.prefix = prefix
|
||||
number_range = min(1000, size)
|
||||
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
|
||||
np.float32)
|
||||
self.input_np2 = 1.0
|
||||
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
|
||||
input_shape).astype(np.float32)
|
||||
self.strategy0 = strategy0
|
||||
self.strategy1 = strategy1
|
||||
need_dev_num = 1
|
||||
need_dev_num_ = 1
|
||||
for s in strategy0[1]:
|
||||
need_dev_num = need_dev_num * s
|
||||
for s in strategy1[1]:
|
||||
need_dev_num_ = need_dev_num_ * s
|
||||
self.x_id = device_id % need_dev_num
|
||||
self.y_id = device_id % need_dev_num
|
||||
self.out_id = device_id % need_dev_num_
|
||||
|
||||
def forward_mindspore_impl(self):
|
||||
net = AddRelu()
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2, ms.float32)
|
||||
out = net(x, y)
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_mindspore_parallel_impl(self):
|
||||
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net.set_auto_parallel()
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2, ms.float32)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(self.input_np2, ms.float32)
|
||||
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
|
||||
return out.asnumpy()
|
||||
|
||||
def grad_mindspore_impl(self):
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2, ms.float32)
|
||||
net = AddRelu()
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, output_grad)
|
||||
return input_grad
|
||||
|
||||
def grad_mindspore_parallel_impl(self):
|
||||
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
|
||||
output_grad = Tensor(output_grads[self.out_id])
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2, ms.float32)
|
||||
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
grad_net = Grad(net)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
grad_net.set_auto_parallel()
|
||||
grad_net.set_train()
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(self.input_np2, ms.float32)
|
||||
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
|
||||
parallel_inputs_run=[x1, y1, output_grad])
|
||||
return input_grad
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def forward_cmp(self):
|
||||
out_mindspore = self.forward_mindspore_impl()
|
||||
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
|
||||
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
|
||||
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
|
||||
|
||||
def grad_cmp(self):
|
||||
input_grad_mindspore = self.grad_mindspore_impl()
|
||||
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
|
||||
_ = input_grad_mindspore[0].asnumpy()
|
||||
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
|
||||
_ = input_grad_mindspore_parallel[0].asnumpy()
|
||||
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
|
||||
assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
|
||||
|
||||
|
||||
@pytest.mark.reid_forward
|
||||
def test_reid_add_relu_input_256_64():
|
||||
stra0 = (0, (2, 2), ())
|
||||
stra1 = (0, (2, 2))
|
||||
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
@pytest.mark.reid_grad
|
||||
def test_reid_grad_add_relu_input_256_64():
|
||||
stra0 = (0, (2, 2), ())
|
||||
stra1 = (0, (2, 2))
|
||||
fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1)
|
||||
fact.grad_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_add_relu_parallel_4p.py>../../log/test_add_relu_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,356 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from numpy import allclose
|
||||
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore._checkparam import check_bool, twice
|
||||
from mindspore.common.initializer import initializer
|
||||
from mindspore.common.parameter import Parameter
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class _Conv(Cell):
|
||||
r"""Applies a N-D convolution over an input signal composed of several input
|
||||
planes.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
pad_mode,
|
||||
padding,
|
||||
dilation,
|
||||
group,
|
||||
has_bias,
|
||||
weight_init,
|
||||
bias_init):
|
||||
super(_Conv, self).__init__()
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.kernel_size = kernel_size
|
||||
self.stride = stride
|
||||
self.pad_mode = pad_mode
|
||||
self.padding = padding
|
||||
self.dilation = dilation
|
||||
self.group = group
|
||||
self.has_bias = has_bias
|
||||
if not (isinstance(in_channels, int) and in_channels > 0):
|
||||
raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed '
|
||||
+ str(in_channels) + ', should be a int and greater than 0.')
|
||||
if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \
|
||||
(not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
|
||||
kernel_size[0] < 1 or kernel_size[1] < 1:
|
||||
raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed '
|
||||
+ str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.')
|
||||
if in_channels % group != 0:
|
||||
raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by '
|
||||
'attr \'group\' of \'Conv2D\' Op.')
|
||||
if out_channels % group != 0:
|
||||
raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by '
|
||||
'attr \'group\' of \'Conv2D\' Op.')
|
||||
|
||||
self.weight = Parameter(initializer(
|
||||
weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight')
|
||||
|
||||
if check_bool(has_bias):
|
||||
self.bias = Parameter(initializer(
|
||||
bias_init, [out_channels]), name='bias')
|
||||
else:
|
||||
if bias_init != 'zeros':
|
||||
print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.")
|
||||
self.bias = None
|
||||
|
||||
def construct(self, *inputs):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class Conv2d(_Conv):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
pad_mode='same',
|
||||
padding=0,
|
||||
dilation=1,
|
||||
group=1,
|
||||
has_bias=False,
|
||||
weight_init='normal',
|
||||
bias_init='zeros',
|
||||
strategy=None):
|
||||
kernel_size = twice(kernel_size)
|
||||
super(Conv2d, self).__init__(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
pad_mode,
|
||||
padding,
|
||||
dilation,
|
||||
group,
|
||||
has_bias,
|
||||
weight_init,
|
||||
bias_init)
|
||||
self.add = P.TensorAdd(strategy)
|
||||
self.conv2d = P.Conv2D(out_channel=self.out_channels,
|
||||
kernel_size=self.kernel_size,
|
||||
mode=1,
|
||||
pad_mode=self.pad_mode,
|
||||
pad=self.padding,
|
||||
stride=self.stride,
|
||||
dilation=self.dilation,
|
||||
group=self.group,
|
||||
strategy=None)
|
||||
self.bias_add = P.BiasAdd()
|
||||
|
||||
def construct(self, input1, input2):
|
||||
x = self.add(input1, input2)
|
||||
if self.has_bias:
|
||||
return self.bias_add(self.conv2d(x, self.weight),
|
||||
self.bias)
|
||||
return self.conv2d(x, self.weight)
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, input1, input2, output_grad):
|
||||
return grad_all_with_sens(self.network)(input1, input2, output_grad)
|
||||
|
||||
|
||||
class Conv2dFactory:
|
||||
def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias):
|
||||
self.in_n, self.in_c, self.in_h, self.in_w = input_shape
|
||||
self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape
|
||||
self.stride = stride
|
||||
self.pad_mode = pad_mode
|
||||
self.padding = padding
|
||||
self.dilation = dilation
|
||||
self.group = group
|
||||
self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1))
|
||||
prefix = ""
|
||||
input_size = 1
|
||||
filter_size = 1
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s) + "_"
|
||||
input_size = input_size * s
|
||||
self.prefix = prefix
|
||||
for s in filter_shape:
|
||||
filter_size = filter_size * s
|
||||
number_range1 = min(10, input_size)
|
||||
number_range2 = min(10, filter_size)
|
||||
self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype(
|
||||
np.float16)
|
||||
self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype(
|
||||
np.float16)
|
||||
self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype(
|
||||
np.float16)
|
||||
self.has_bias = has_bias
|
||||
if self.has_bias is True:
|
||||
self.bias_np = np.arange(0, self.out_c).astype(np.float16)
|
||||
|
||||
self.out_shape = (128, 64, 56, 56)
|
||||
out_size = 1
|
||||
for s in self.out_shape:
|
||||
out_size = out_size * s
|
||||
number_range3 = min(10, out_size)
|
||||
self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2,
|
||||
self.out_shape).astype(np.float16)
|
||||
self.x_id = device_id % 4
|
||||
self.y_id = device_id % 4
|
||||
self.out_strategy = self.strategy0[1]
|
||||
self.out_id = device_id % 4
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def forward_conv2d_mindspore_impl(self):
|
||||
input1 = Tensor(self.input_np1)
|
||||
input2 = Tensor(self.input_np2)
|
||||
weight = Tensor(self.weight_np)
|
||||
if self.has_bias:
|
||||
bias = Tensor(self.bias_np)
|
||||
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
|
||||
kernel_size=(self.kernel_h, self.kernel_w),
|
||||
stride=self.stride, pad_mode=self.pad_mode,
|
||||
padding=self.padding, dilation=self.dilation,
|
||||
group=self.group, has_bias=True, weight_init=weight,
|
||||
bias_init=bias)
|
||||
else:
|
||||
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
|
||||
kernel_size=(self.kernel_h, self.kernel_w),
|
||||
stride=self.stride, pad_mode=self.pad_mode,
|
||||
padding=self.padding, dilation=self.dilation,
|
||||
group=self.group, has_bias=False, weight_init=weight)
|
||||
out = net(input1, input2)
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_conv2d_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
weight = Tensor(self.weight_np)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(inputs_y[self.y_id])
|
||||
if self.has_bias:
|
||||
bias = Tensor(self.bias_np)
|
||||
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
|
||||
kernel_size=(self.kernel_h, self.kernel_w),
|
||||
stride=self.stride, pad_mode=self.pad_mode,
|
||||
padding=self.padding, dilation=self.dilation,
|
||||
group=self.group, has_bias=True, weight_init=weight,
|
||||
bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
|
||||
else:
|
||||
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
|
||||
kernel_size=(self.kernel_h, self.kernel_w),
|
||||
stride=self.stride, pad_mode=self.pad_mode,
|
||||
padding=self.padding, dilation=self.dilation,
|
||||
group=self.group, has_bias=False, weight_init=weight,
|
||||
strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net.set_auto_parallel()
|
||||
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
|
||||
return out.asnumpy()
|
||||
|
||||
def grad_conv2d_mindspore_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
weight = Tensor(self.weight_np)
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
if self.has_bias:
|
||||
bias = Tensor(self.bias_np)
|
||||
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
|
||||
kernel_size=(self.kernel_h, self.kernel_w),
|
||||
stride=self.stride, pad_mode=self.pad_mode,
|
||||
padding=self.padding, dilation=self.dilation,
|
||||
group=self.group, has_bias=True, weight_init=weight,
|
||||
bias_init=bias,)
|
||||
else:
|
||||
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
|
||||
kernel_size=(self.kernel_h, self.kernel_w),
|
||||
stride=self.stride, pad_mode=self.pad_mode,
|
||||
padding=self.padding, dilation=self.dilation,
|
||||
group=self.group, has_bias=False, weight_init=weight)
|
||||
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
out_grad = grad_net(x, y, output_grad)
|
||||
return out_grad
|
||||
|
||||
def grad_conv2d_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
weight = Tensor(self.weight_np)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(inputs_y[self.y_id])
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
|
||||
output_grad1 = Tensor(output_grads[self.out_id])
|
||||
if self.has_bias:
|
||||
bias = Tensor(self.bias_np)
|
||||
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
|
||||
kernel_size=(self.kernel_h, self.kernel_w),
|
||||
stride=self.stride, pad_mode=self.pad_mode,
|
||||
padding=self.padding, dilation=self.dilation,
|
||||
group=self.group, has_bias=True, weight_init=weight,
|
||||
bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
|
||||
else:
|
||||
net = Conv2d(in_channels=self.in_c, out_channels=self.out_c,
|
||||
kernel_size=(self.kernel_h, self.kernel_w),
|
||||
stride=self.stride, pad_mode=self.pad_mode,
|
||||
padding=self.padding, dilation=self.dilation,
|
||||
group=self.group, has_bias=False, weight_init=weight,
|
||||
strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1]))
|
||||
|
||||
grad_net = Grad(net)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
grad_net.set_train()
|
||||
grad_net.set_auto_parallel()
|
||||
out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
|
||||
parallel_inputs_run=[x1, y1, output_grad1])
|
||||
return out_grad
|
||||
|
||||
def forward_conv2d_cmp(self):
|
||||
out_mindspore = self.forward_conv2d_mindspore_impl()
|
||||
out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl()
|
||||
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
|
||||
assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
|
||||
|
||||
def grad_conv2d_cmp(self):
|
||||
input_grad_mindspore = self.grad_conv2d_mindspore_impl()
|
||||
input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl()
|
||||
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
|
||||
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
|
||||
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
|
||||
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
|
||||
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
|
||||
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1])
|
||||
assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001)
|
||||
assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001)
|
||||
|
||||
|
||||
def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
|
||||
fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
|
||||
filter_shape=(64, 64, 1, 1),
|
||||
stride=2, pad_mode='valid', padding=0,
|
||||
dilation=1, group=1, has_bias=False)
|
||||
fact.forward_conv2d_cmp()
|
||||
|
||||
|
||||
def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true():
|
||||
fact = Conv2dFactory(input_shape=(128, 64, 112, 112),
|
||||
filter_shape=(64, 64, 1, 1),
|
||||
stride=2, pad_mode='valid', padding=0,
|
||||
dilation=1, group=1, has_bias=False)
|
||||
fact.grad_conv2d_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_conv2d_parallel_4p.py>../../log/test_conv2d_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,36 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
export SLOG_PRINT_TO_STDOUT=1
|
||||
source /root/miniconda3/bin/activate ci3.6
|
||||
export RANK_SIZE=4
|
||||
export RANK_TABLE_FILE=../../rank_table_4p.json
|
||||
export RANK_ID=$1
|
||||
export DEVICE_ID=$1
|
||||
export HCCL_FLAG=1
|
||||
export DEPLOY_MODE=0
|
||||
export AICPU_FLAG=1
|
||||
export DUMP_OP=1
|
||||
export PYTHONPATH=../../../../../../../../mindspore:/usr/local/HiAI/runtime/python3.6/site-packages/topi.egg/:/usr/local/HiAI/runtime/python3.6/site-packages/te.egg/:/usr/local/HiAI/runtime/ops/op_impl/built-in/ai_core/tbe/
|
||||
export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/HiAI/runtime/lib64/libhccl.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so
|
||||
export LD_LIBRARY_PATH=/usr/local/HiAI/runtime/lib64
|
||||
export FE_FLAG=1
|
||||
export PATH=/usr/local/HiAI/runtime/ccec_compiler/bin:$PATH
|
||||
if [ $1 -eq 0 ];
|
||||
then
|
||||
export DUMP_GE_GRAPH=true
|
||||
export ME_DRAW_GRAPH=1
|
||||
fi
|
|
@ -1,120 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.nn import Dropout
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class Net(Cell):
|
||||
def __init__(self, keep_prob, seed0, seed1, strategy=None):
|
||||
super(Net, self).__init__()
|
||||
self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy)
|
||||
|
||||
def construct(self, input_):
|
||||
x = self.drop(input_)
|
||||
return x
|
||||
|
||||
|
||||
# pylint: disable=comparison-with-itself
|
||||
class DropoutFactory:
|
||||
def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None):
|
||||
size = 1
|
||||
prefix = ""
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s)
|
||||
size = size * s
|
||||
self.prefix = prefix
|
||||
number_range = min(10, size)
|
||||
self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32)
|
||||
self.keep_prob = keep_prob
|
||||
self.seed0 = seed0
|
||||
self.seed1 = seed1
|
||||
self.strategy0 = strategy0
|
||||
need_dev_num = 1
|
||||
for s in strategy0[1]:
|
||||
need_dev_num = need_dev_num * s
|
||||
self.x_id = device_id % need_dev_num
|
||||
self.out_id = device_id % need_dev_num
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def d4_tensor_compare(self, input_, out_me):
|
||||
[a, b, c, d] = input_.shape
|
||||
for i in range(a):
|
||||
for j in range(b):
|
||||
for k in range(c):
|
||||
for e in range(d):
|
||||
if out_me[i, j, k, e] == 0:
|
||||
assert True
|
||||
else:
|
||||
assert np.allclose(out_me[i, j, k, e], input_[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001)
|
||||
|
||||
def forward_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
net = Net(0.4, 0, 0, strategy=self.strategy0)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net.set_auto_parallel()
|
||||
out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_cmp(self):
|
||||
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
|
||||
input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1])
|
||||
self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel)
|
||||
|
||||
|
||||
def test_reid_dropout_forward_seed_F32_64_512_8_8():
|
||||
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1)))
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat():
|
||||
fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1)))
|
||||
fact.forward_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_dropout_parallel_4p.py>../../log/test_dropout_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,154 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class MatmulSingle(Cell):
|
||||
def __init__(self, transpose_a=False, transpose_b=False):
|
||||
super(MatmulSingle, self).__init__()
|
||||
self.matmul = P.MatMul(transpose_a, transpose_b)
|
||||
self.pow = P.Pow()
|
||||
self.reduce_sum = P.ReduceSum()
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.matmul(x, y)
|
||||
out = self.pow(out, 2.0)
|
||||
out = self.reduce_sum(out, None)
|
||||
return out
|
||||
|
||||
|
||||
class MatmulAllgather(Cell):
|
||||
def __init__(self, group, transpose_a=False, transpose_b=False):
|
||||
super(MatmulAllgather, self).__init__()
|
||||
self.allgather = P.AllGather(group=group)
|
||||
self.matmul = P.MatMul(transpose_a, transpose_b)
|
||||
self.pow = P.Pow()
|
||||
self.reduce_sum = P.ReduceSum()
|
||||
self.allreduce = P.AllReduce(group=group)
|
||||
|
||||
def construct(self, x, y):
|
||||
x = self.allgather(x)
|
||||
out = self.matmul(x, y)
|
||||
out = self.pow(out, 2.0)
|
||||
out = self.reduce_sum(out, None)
|
||||
out = self.allreduce(out)
|
||||
return out
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, sens):
|
||||
return grad_all_with_sens(self.network)(x, y, sens)
|
||||
|
||||
|
||||
class MatmulAllgatherFactory:
|
||||
def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra):
|
||||
self.inputx = self.gen_value(inputx_shape, 10)
|
||||
self.inputy = self.gen_value(inputy_shape, 20)
|
||||
self.x_stra = x_stra
|
||||
self.y_stra = y_stra
|
||||
stra_size = 1
|
||||
for s in x_stra:
|
||||
stra_size = stra_size * s
|
||||
self.stra_size = stra_size
|
||||
|
||||
def gen_value(self, input_shape, delta):
|
||||
size = 1
|
||||
for s in input_shape:
|
||||
size = size * s
|
||||
number_range = min(100, size)
|
||||
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
|
||||
return input_np
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def grad_mindspore_impl_single(self):
|
||||
x = Tensor(self.inputx)
|
||||
y = Tensor(self.inputy)
|
||||
sens = Tensor(1.0, dtype=ms.float32)
|
||||
net = MatmulSingle()
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, sens)
|
||||
return input_grad
|
||||
|
||||
def grad_mindspore_impl_reduce(self):
|
||||
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
|
||||
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
|
||||
x = Tensor(inputxs[device_id % self.stra_size])
|
||||
y = Tensor(inputys[device_id % self.stra_size])
|
||||
repeat_num = device_num / self.stra_size
|
||||
v = self.stra_size * repeat_num * repeat_num * repeat_num
|
||||
sens = Tensor(1.0 / v, dtype=ms.float32)
|
||||
net = MatmulAllgather("hccl_world_group")
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, sens)
|
||||
return input_grad
|
||||
|
||||
def grad_cmp(self):
|
||||
single_results = self.grad_mindspore_impl_single()
|
||||
reduce_results = self.grad_mindspore_impl_reduce()
|
||||
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
|
||||
reduce_result0 = reduce_results[0].asnumpy()
|
||||
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
|
||||
reduce_result1 = reduce_results[1].asnumpy()
|
||||
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
|
||||
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_reduce_grad():
|
||||
inputx_shape = (64, 32)
|
||||
inputy_shape = (32, 64)
|
||||
fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4))
|
||||
fact.grad_cmp()
|
|
@ -1,175 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class MatmulSingle(Cell):
|
||||
def __init__(self, transpose_a=False, transpose_b=False):
|
||||
super(MatmulSingle, self).__init__()
|
||||
self.matmul1 = P.MatMul(transpose_a, transpose_b)
|
||||
self.matmul2 = P.MatMul(transpose_a, transpose_b)
|
||||
self.pow = P.Pow()
|
||||
self.reduce_sum = P.ReduceSum()
|
||||
|
||||
def construct(self, x, y, z):
|
||||
out = self.matmul1(x, y)
|
||||
out = self.matmul2(out, z)
|
||||
out = self.pow(out, 2.0)
|
||||
out = self.reduce_sum(out, None)
|
||||
return out
|
||||
|
||||
|
||||
class MatmulReduce(Cell):
|
||||
def __init__(self, group, transpose_a=False, transpose_b=False):
|
||||
super(MatmulReduce, self).__init__()
|
||||
self.matmul1 = P.MatMul(transpose_a, transpose_b)
|
||||
self.allreduce1 = P.AllReduce(group=group)
|
||||
self.matmul2 = P.MatMul(transpose_a, transpose_b)
|
||||
self.pow = P.Pow()
|
||||
self.reduce_sum = P.ReduceSum()
|
||||
self.allreduce2 = P.AllReduce(group=group)
|
||||
|
||||
def construct(self, x, y, z):
|
||||
out = self.matmul1(x, y)
|
||||
out = self.allreduce1(out)
|
||||
out = self.matmul2(out, z)
|
||||
out = self.pow(out, 2.0)
|
||||
out = self.reduce_sum(out, None)
|
||||
out = self.allreduce2(out)
|
||||
return out
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, z, sens):
|
||||
return grad_all_with_sens(self.network)(x, y, z, sens)
|
||||
|
||||
|
||||
class MatmulReduceFactory:
|
||||
def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra):
|
||||
self.inputx = self.gen_value(inputx_shape, 10)
|
||||
self.inputy = self.gen_value(inputy_shape, 20)
|
||||
self.inputz = self.gen_value(inputz_shape, 30)
|
||||
self.x_stra = x_stra
|
||||
self.y_stra = y_stra
|
||||
self.z_stra = z_stra
|
||||
stra_size = 1
|
||||
for s in x_stra:
|
||||
stra_size = stra_size * s
|
||||
self.stra_size = stra_size
|
||||
|
||||
def gen_value(self, input_shape, delta):
|
||||
size = 1
|
||||
for s in input_shape:
|
||||
size = size * s
|
||||
number_range = min(100, size)
|
||||
input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32)
|
||||
return input_np
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def grad_mindspore_impl_single(self):
|
||||
x = Tensor(self.inputx)
|
||||
y = Tensor(self.inputy)
|
||||
z = Tensor(self.inputz)
|
||||
sens = Tensor(1.0, dtype=ms.float32)
|
||||
net = MatmulSingle()
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, z, sens)
|
||||
return input_grad
|
||||
|
||||
def grad_mindspore_impl_reduce(self):
|
||||
inputxs = self.get_parallel_blocks(self.inputx, self.x_stra)
|
||||
inputys = self.get_parallel_blocks(self.inputy, self.y_stra)
|
||||
inputzs = self.get_parallel_blocks(self.inputz, self.z_stra)
|
||||
x = Tensor(inputxs[device_id % self.stra_size])
|
||||
y = Tensor(inputys[device_id % self.stra_size])
|
||||
z = Tensor(inputzs[device_id % self.stra_size])
|
||||
repeat_num = device_num / self.stra_size
|
||||
v = self.stra_size * repeat_num * repeat_num * repeat_num
|
||||
sens = Tensor(1.0 / v, dtype=ms.float32)
|
||||
net = MatmulReduce("hccl_world_group")
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, z, sens)
|
||||
return input_grad
|
||||
|
||||
def grad_cmp(self):
|
||||
single_results = self.grad_mindspore_impl_single()
|
||||
reduce_results = self.grad_mindspore_impl_reduce()
|
||||
single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size]
|
||||
reduce_result0 = reduce_results[0].asnumpy()
|
||||
single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size]
|
||||
reduce_result1 = reduce_results[1].asnumpy()
|
||||
single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size]
|
||||
reduce_result2 = reduce_results[2].asnumpy()
|
||||
assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001)
|
||||
assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001)
|
||||
assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_reduce_grad():
|
||||
inputx_shape = (32, 64)
|
||||
inputy_shape = (64, 64)
|
||||
inputz_shape = (64, 32)
|
||||
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4))
|
||||
fact.grad_cmp()
|
||||
|
||||
|
||||
def test_reduce_grad_repeat():
|
||||
inputx_shape = (32, 64)
|
||||
inputy_shape = (64, 64)
|
||||
inputz_shape = (64, 32)
|
||||
fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2))
|
||||
fact.grad_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_allgather_4p.py>../../log/test_allgather_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_allreduce_4p.py>../../log/test_allreduce_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,206 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class L2normalize(Cell):
|
||||
def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None):
|
||||
super(L2normalize, self).__init__()
|
||||
self.add = P.TensorAdd(strategy=strategy0)
|
||||
self.l2norm = P.L2Normalize(axis, epsilon, strategy1)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.add(x, y)
|
||||
out = self.l2norm(out)
|
||||
return out
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, output_grad):
|
||||
return grad_all_with_sens(self.network)(x, y, output_grad)
|
||||
|
||||
|
||||
class L2normalizeFactory:
|
||||
def __init__(self, input_shape, axis, strategy0, strategy1):
|
||||
prefix = ""
|
||||
size = 1
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s)
|
||||
size = size * s
|
||||
self.prefix = prefix
|
||||
number_range = min(1000, size)
|
||||
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
|
||||
np.float32)
|
||||
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
|
||||
np.float32)
|
||||
target_shape = input_shape
|
||||
self.target_shape = target_shape
|
||||
target_size = 1
|
||||
for s in target_shape:
|
||||
target_size = target_size * s
|
||||
number_range = min(1000, target_size)
|
||||
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
|
||||
target_shape).astype(np.float32)
|
||||
self.axis = axis
|
||||
self.epsilon = 1e-4
|
||||
self.strategy0 = strategy0
|
||||
self.strategy1 = strategy1
|
||||
out_strategy = strategy1[1]
|
||||
self.out_strategy = out_strategy
|
||||
need_dev_num0 = 1
|
||||
need_dev_num1 = 1
|
||||
for s in strategy0[1]:
|
||||
need_dev_num0 = need_dev_num0 * s
|
||||
for s in out_strategy:
|
||||
need_dev_num1 = need_dev_num1 * s
|
||||
self.x_id = device_id % need_dev_num0
|
||||
self.y_id = device_id % need_dev_num0
|
||||
self.out_id = device_id % need_dev_num1
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def forward_mindspore_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
net = L2normalize(self.axis, self.epsilon)
|
||||
out = net(x, y)
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(inputs_y[self.y_id])
|
||||
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net.set_auto_parallel()
|
||||
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
|
||||
return out.asnumpy()
|
||||
|
||||
def grad_mindspore_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
net = L2normalize(self.axis, self.epsilon)
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, output_grad)
|
||||
return input_grad
|
||||
|
||||
def grad_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(inputs_y[self.y_id])
|
||||
output_grad1 = Tensor(outgrads[self.out_id])
|
||||
net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
grad_net = Grad(net)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
grad_net.set_auto_parallel()
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
|
||||
parallel_inputs_run=[x1, y1, output_grad1])
|
||||
return input_grad
|
||||
|
||||
def forward_cmp(self):
|
||||
out_mindspore = self.forward_mindspore_impl()
|
||||
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
|
||||
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
|
||||
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
|
||||
|
||||
def grad_cmp(self):
|
||||
input_grad_mindspore = self.grad_mindspore_impl()
|
||||
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
|
||||
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
|
||||
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
|
||||
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
|
||||
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
|
||||
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
|
||||
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
|
||||
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
|
||||
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_reid_l2normalize_input_128_512():
|
||||
input_shape = (128, 512)
|
||||
axis = 0
|
||||
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
def test_reid_l2normalize_grad_input_128_512():
|
||||
input_shape = (128, 512)
|
||||
axis = 0
|
||||
fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
|
||||
fact.grad_cmp()
|
||||
|
||||
|
||||
def test_reid_l2normalize_input_128_512_repeat():
|
||||
input_shape = (128, 512)
|
||||
axis = 0
|
||||
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
def test_reid_l2normalize_grad_input_128_512_repeat():
|
||||
input_shape = (128, 512)
|
||||
axis = 0
|
||||
fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2)))
|
||||
fact.grad_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_l2normalize_parallel_4p.py>../../log/test_l2normalize_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1 +0,0 @@
|
|||
log files for auto parallel end to end test cases
|
|
@ -1,195 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class AddRelu(Cell):
|
||||
def __init__(self, strategy0=None, strategy1=None):
|
||||
super(AddRelu, self).__init__()
|
||||
self.add = P.TensorAdd(strategy=strategy0)
|
||||
self.relu = P.ReLU(strategy=strategy1)
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.add(x, y)
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
|
||||
class NetWithLoss(Cell):
|
||||
def __init__(self, network, strategy2=None):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2)
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, b):
|
||||
predict = self.network(x, y)
|
||||
return self.loss(predict, b)[0]
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, b):
|
||||
return grad_all(self.network)(x, y, b)
|
||||
|
||||
|
||||
class AddReluFactory:
|
||||
def __init__(self, input_shape, strategy0, strategy1, strategy2):
|
||||
prefix = ""
|
||||
size = 1
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s)
|
||||
size = size * s
|
||||
self.prefix = prefix
|
||||
number_range = min(1000, size)
|
||||
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
|
||||
np.float32)
|
||||
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
|
||||
np.float32)
|
||||
target_shape = input_shape
|
||||
self.target_shape = target_shape
|
||||
target_size = 1
|
||||
for s in target_shape:
|
||||
target_size = target_size * s
|
||||
number_range = min(10, target_size)
|
||||
self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype(
|
||||
np.float32)
|
||||
self.strategy0 = strategy0
|
||||
self.strategy1 = strategy1
|
||||
self.strategy2 = strategy2
|
||||
out_strategy = strategy1[1]
|
||||
self.out_strategy = out_strategy
|
||||
need_dev_num0 = 1
|
||||
need_dev_num1 = 1
|
||||
for s in strategy0[1]:
|
||||
need_dev_num0 = need_dev_num0 * s
|
||||
for s in out_strategy:
|
||||
need_dev_num1 = need_dev_num1 * s
|
||||
self.x_id = device_id % need_dev_num0
|
||||
self.y_id = device_id % need_dev_num0
|
||||
self.out_id = device_id % need_dev_num1
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def grad_mindspore_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
net = AddRelu()
|
||||
net_with_loss = NetWithLoss(net)
|
||||
grad_net = Grad(net_with_loss)
|
||||
grad_net.set_train()
|
||||
input_grads = []
|
||||
for i in range(0, 3):
|
||||
input_grad = grad_net(x, y, output_grad)
|
||||
input_grads.append(input_grad)
|
||||
return input_grads
|
||||
|
||||
def grad_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(inputs_y[self.y_id])
|
||||
output_grad1 = Tensor(outgrads[self.out_id])
|
||||
net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
net_with_loss = NetWithLoss(net, strategy2=self.strategy2)
|
||||
grad_net = Grad(net_with_loss)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
grad_net.set_auto_parallel()
|
||||
grad_net.set_train()
|
||||
input_grads = []
|
||||
for i in range(0, 3):
|
||||
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
|
||||
parallel_inputs_run=[x1, y1, output_grad1])
|
||||
input_grads.append(input_grad)
|
||||
return input_grads
|
||||
|
||||
def grad_cmp(self):
|
||||
input_grad_mindspores = self.grad_mindspore_impl()
|
||||
input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl()
|
||||
for i in range(0, len(input_grad_mindspores)):
|
||||
input_grad_mindspore = input_grad_mindspores[i]
|
||||
input_grad_mindspore_parallel = input_grad_mindspore_parallels[i]
|
||||
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
|
||||
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
|
||||
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
|
||||
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
|
||||
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
|
||||
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
|
||||
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy",
|
||||
input_grad_blocks_0[self.x_id])
|
||||
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy",
|
||||
input_grad_blocks_1[self.y_id])
|
||||
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy",
|
||||
input_grad_mindspore_parallel0)
|
||||
np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy",
|
||||
input_grad_mindspore_parallel1)
|
||||
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
|
||||
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_reid_l2normalize_grad_input_128_512():
|
||||
input_shape = (128, 512)
|
||||
fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)),
|
||||
strategy2=(0, (4, 1), (4, 1)))
|
||||
fact.grad_cmp()
|
||||
|
||||
|
||||
def test_reid_l2normalize_grad_input_128_512_stridesplit():
|
||||
input_shape = (128, 512)
|
||||
fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)),
|
||||
strategy2=(0, (4, 1), (4, 1)))
|
||||
fact.grad_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_loss_parallel_4p.py>../../log/test_loss_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,329 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from numpy import allclose
|
||||
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class Matmul(Cell):
|
||||
def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
|
||||
super(Matmul, self).__init__()
|
||||
self.add = P.TensorAdd(strategy=strategy1)
|
||||
self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0)
|
||||
|
||||
def construct(self, x, w, z):
|
||||
out = self.add(x, z)
|
||||
return self.matmul(out, w)
|
||||
|
||||
|
||||
class BatchMatMul(Cell):
|
||||
def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None):
|
||||
super(BatchMatMul, self).__init__()
|
||||
self.add = P.TensorAdd(strategy=strategy1)
|
||||
self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0)
|
||||
|
||||
def construct(self, x, w, z):
|
||||
out = self.add(x, z)
|
||||
return self.batchmatmul(out, w)
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, inputa, inputb, inputz, output_grad):
|
||||
gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad)
|
||||
return gout
|
||||
|
||||
|
||||
class BatchmatmulFactory:
|
||||
def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_):
|
||||
self.strategy = strategy
|
||||
self.strategy_ = strategy_
|
||||
inputa_size = 1
|
||||
inputb_size = 1
|
||||
prefix = ""
|
||||
for s in inputa_shape:
|
||||
prefix = prefix + str(s) + "_"
|
||||
inputa_size = inputa_size * s
|
||||
prefix = prefix + "and"
|
||||
for s in inputb_shape:
|
||||
prefix = prefix + str(s) + "_"
|
||||
inputb_size = inputb_size * s
|
||||
number_rangea = min(1000, inputa_size)
|
||||
number_rangeb = min(1000, inputb_size)
|
||||
self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype(
|
||||
np.float32)
|
||||
self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype(
|
||||
np.float32)
|
||||
self.inputz = np.zeros(self.inputa.shape).astype(np.float32)
|
||||
self.transpose_a = transpose_a
|
||||
self.transpose_b = transpose_b
|
||||
|
||||
out_shape = []
|
||||
device_matrix = []
|
||||
out_strategy = []
|
||||
if transpose_a:
|
||||
temp = inputa_shape[-1]
|
||||
inputa_shape[-1] = inputa_shape[-2]
|
||||
inputa_shape[-2] = temp
|
||||
if transpose_b:
|
||||
temp = inputb_shape[-1]
|
||||
inputb_shape[-1] = inputb_shape[-2]
|
||||
inputb_shape[-2] = temp
|
||||
|
||||
if len(inputa_shape) >= len(inputb_shape):
|
||||
out_shape = list(inputa_shape)
|
||||
out_shape[-1] = inputb_shape[-1]
|
||||
else:
|
||||
out_shape = list(inputb_shape)
|
||||
out_shape[-2] = inputa_shape[-2]
|
||||
|
||||
strategy1 = list(self.strategy[1])
|
||||
strategy2 = list(self.strategy[2])
|
||||
if transpose_a:
|
||||
temp = strategy1[-1]
|
||||
strategy1[-1] = strategy1[-2]
|
||||
strategy1[-2] = temp
|
||||
if transpose_b:
|
||||
temp = strategy2[-1]
|
||||
strategy2[-1] = strategy2[-2]
|
||||
strategy2[-2] = temp
|
||||
|
||||
if len(strategy1) >= len(strategy2):
|
||||
out_strategy = strategy1.copy()
|
||||
out_strategy[-1] = strategy2[-1]
|
||||
else:
|
||||
out_strategy = strategy2.copy()
|
||||
out_strategy[-2] = strategy1[-2]
|
||||
device_matrix = out_strategy.copy()
|
||||
device_matrix.insert(-1, strategy1[-1])
|
||||
self.out_strategy = out_strategy
|
||||
|
||||
need_dev_num = 1
|
||||
for s in device_matrix:
|
||||
need_dev_num = need_dev_num * s
|
||||
self.need_dev_num = need_dev_num
|
||||
self.device_matrix = device_matrix
|
||||
|
||||
out_size = 1
|
||||
for s in out_shape:
|
||||
out_size = out_size * s
|
||||
number_range = min(1000, out_size)
|
||||
self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype(
|
||||
np.float32)
|
||||
|
||||
device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix)
|
||||
x_index = device_index[:-1].copy()
|
||||
if transpose_a:
|
||||
temp = x_index[-1]
|
||||
x_index[-1] = x_index[-2]
|
||||
x_index[-2] = temp
|
||||
y_index = device_index[:-3].copy()
|
||||
y_index.append(device_index[-2])
|
||||
y_index.append(device_index[-1])
|
||||
if transpose_b:
|
||||
temp = y_index[-1]
|
||||
y_index[-1] = y_index[-2]
|
||||
y_index[-2] = temp
|
||||
|
||||
out_index = device_index[:-2].copy()
|
||||
out_index.append(device_index[-1])
|
||||
|
||||
print(device_matrix)
|
||||
print(device_index)
|
||||
|
||||
need_dev_num_ = 1
|
||||
for s in strategy_[1]:
|
||||
need_dev_num_ = need_dev_num_ * s
|
||||
self.x_id = device_id % need_dev_num_
|
||||
self.y_id = self.list_to_id(y_index, self.strategy[2])
|
||||
self.out_id = self.list_to_id(out_index, self.out_strategy)
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
|
||||
def id_to_list(self, id_, shape):
|
||||
"""
|
||||
shape:每一维的上限,如(2,4,8)
|
||||
"""
|
||||
result = []
|
||||
r = id_
|
||||
for i in range(0, len(shape)):
|
||||
v = 1
|
||||
for j in range(i + 1, len(shape)):
|
||||
v = v * shape[j]
|
||||
result.append(r // v)
|
||||
r = r % v
|
||||
return result
|
||||
|
||||
def list_to_id(self, id_list, shape):
|
||||
result = 0
|
||||
for i in range(0, len(id_list)):
|
||||
v = 1
|
||||
for j in range(i + 1, len(id_list)):
|
||||
v = v * shape[j]
|
||||
result = result + id_list[i] * v
|
||||
return result
|
||||
|
||||
def forward_mindspore_impl(self):
|
||||
if len(self.inputa.shape) > 2:
|
||||
matmul = BatchMatMul(self.transpose_a, self.transpose_b)
|
||||
else:
|
||||
matmul = Matmul(self.transpose_a, self.transpose_b)
|
||||
matmul.set_train()
|
||||
out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz))
|
||||
return out_me.asnumpy()
|
||||
|
||||
def forward_mindspore_parallel_impl(self):
|
||||
if len(self.inputa.shape) > 2:
|
||||
matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
|
||||
else:
|
||||
matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
x = Tensor(self.inputa)
|
||||
y = Tensor(self.inputb)
|
||||
z = Tensor(self.inputz)
|
||||
xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
|
||||
ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
|
||||
zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
|
||||
x1 = Tensor(xs[self.x_id]) #
|
||||
y1 = Tensor(ys[self.y_id]) # 需要从设备矩阵推导
|
||||
z1 = Tensor(zs[self.x_id])
|
||||
matmul.set_train()
|
||||
matmul.set_auto_parallel()
|
||||
out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1])
|
||||
return out_me.asnumpy()
|
||||
|
||||
def grad_mindspore_impl(self):
|
||||
x = Tensor(self.inputa)
|
||||
y = Tensor(self.inputb)
|
||||
z = Tensor(self.inputz)
|
||||
if len(self.inputa.shape) > 2:
|
||||
matmul = BatchMatMul(self.transpose_a, self.transpose_b)
|
||||
else:
|
||||
matmul = Matmul(self.transpose_a, self.transpose_b)
|
||||
net_me = Grad(matmul)
|
||||
net_me.set_train()
|
||||
out_grad_me = Tensor(self.output_grad_np)
|
||||
out_grad = net_me(x, y, z, out_grad_me)
|
||||
return out_grad
|
||||
|
||||
def grad_mindspore_parallel_impl(self):
|
||||
if len(self.inputa.shape) > 2:
|
||||
matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
|
||||
else:
|
||||
matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_)
|
||||
x = Tensor(self.inputa)
|
||||
y = Tensor(self.inputb)
|
||||
z = Tensor(self.inputz)
|
||||
out_grad_me = Tensor(self.output_grad_np)
|
||||
|
||||
xs = self.get_parallel_blocks(self.inputa, self.strategy_[1])
|
||||
ys = self.get_parallel_blocks(self.inputb, self.strategy[2])
|
||||
zs = self.get_parallel_blocks(self.inputz, self.strategy_[1])
|
||||
out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
|
||||
|
||||
x1 = Tensor(xs[self.x_id]) # 需要从设备矩阵推导
|
||||
y1 = Tensor(ys[self.y_id]) #
|
||||
z1 = Tensor(zs[self.x_id])
|
||||
out_grad1 = Tensor(out_grads[self.out_id])
|
||||
net_me = Grad(matmul)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net_me.set_auto_parallel()
|
||||
net_me.set_train()
|
||||
|
||||
out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1],
|
||||
parallel_inputs_run=[x1, y1, z1, out_grad1])
|
||||
return out_grad
|
||||
|
||||
def forward_cmp(self):
|
||||
out_mindspore = self.forward_mindspore_impl()
|
||||
out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy)
|
||||
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
|
||||
assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)
|
||||
|
||||
def grad_cmp(self):
|
||||
input_grad_mindspore = self.grad_mindspore_impl()
|
||||
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
|
||||
input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1])
|
||||
input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2])
|
||||
input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1])
|
||||
assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001)
|
||||
assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001)
|
||||
assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_reid_batchmatmul_inputa_128_512_inputb_2000_512():
|
||||
inputa = [128, 512]
|
||||
inputb = [2000, 512]
|
||||
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512():
|
||||
inputa = [128, 512]
|
||||
inputb = [2000, 512]
|
||||
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2)))
|
||||
fact.grad_cmp()
|
||||
|
||||
|
||||
def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution():
|
||||
inputa = [128, 512]
|
||||
inputb = [2000, 512]
|
||||
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution():
|
||||
inputa = [128, 512]
|
||||
inputb = [2000, 512]
|
||||
fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2)))
|
||||
fact.grad_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_matmul_parallel_4p.py >../../log/test_matmul_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,213 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, input1, input2, output_grad):
|
||||
return grad_all_with_sens(self.network)(input1, input2, output_grad)
|
||||
|
||||
|
||||
class Max(Cell):
|
||||
def __init__(self, axis, keep_dims, strategy0=None, strategy1=None):
|
||||
super(Max, self).__init__()
|
||||
self.add = P.TensorAdd(strategy=strategy0)
|
||||
self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1)
|
||||
self.axis = axis
|
||||
|
||||
def construct(self, input1, input2):
|
||||
out = self.add(input1, input2)
|
||||
return self.reduce_max(out, self.axis)
|
||||
|
||||
|
||||
class MaxFactory:
|
||||
def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1):
|
||||
self.strategy0 = strategy0
|
||||
self.strategy1 = strategy1
|
||||
self.axis = axis
|
||||
self.keep_dims = keep_dims
|
||||
input_size = 1
|
||||
prefix = ""
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s) + "_"
|
||||
input_size = input_size * s
|
||||
number_range = min(1000, input_size)
|
||||
self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype(
|
||||
np.float32)
|
||||
self.input_np2 = self.input_np1.copy()
|
||||
self.out_grad_np = None
|
||||
out_shape = list(input_shape)
|
||||
out_shape.pop(axis)
|
||||
out_size = input_size / input_shape[axis]
|
||||
number_range_ = min(1000, out_size)
|
||||
self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype(
|
||||
np.float32)
|
||||
out_strategy = list(strategy1[1])
|
||||
out_strategy.pop(axis)
|
||||
self.out_strategy = out_strategy
|
||||
need_dev_num = 1
|
||||
need_dev_num_ = 1
|
||||
for s in strategy0[1]:
|
||||
need_dev_num = need_dev_num * s
|
||||
for s in out_strategy:
|
||||
need_dev_num_ = need_dev_num_ * s
|
||||
self.x_id = device_id % need_dev_num
|
||||
self.y_id = device_id % need_dev_num
|
||||
self.out_id = device_id % need_dev_num_
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def forward_mindspore_impl(self):
|
||||
input1 = Tensor(self.input_np1)
|
||||
input2 = Tensor(self.input_np2)
|
||||
net = Max(axis=self.axis, keep_dims=self.keep_dims)
|
||||
out = net(input1, input2)
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
x1 = Tensor(xs[self.x_id])
|
||||
y1 = Tensor(ys[self.y_id])
|
||||
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net.set_auto_parallel()
|
||||
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
|
||||
return out.asnumpy()
|
||||
|
||||
def grad_mindspore_impl(self):
|
||||
input1 = Tensor(self.input_np1)
|
||||
input2 = Tensor(self.input_np2)
|
||||
out_grad = Tensor(self.out_grad_np)
|
||||
net = Max(axis=self.axis, keep_dims=self.keep_dims)
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(input1, input2, out_grad)
|
||||
return input_grad
|
||||
|
||||
def grad_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy)
|
||||
out_grad = Tensor(output_grads[self.out_id])
|
||||
xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
x1 = Tensor(xs[self.x_id])
|
||||
y1 = Tensor(ys[self.y_id])
|
||||
net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
grad_net = Grad(net)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
grad_net.set_auto_parallel()
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad],
|
||||
parallel_inputs_run=[x1, y1, out_grad])
|
||||
return input_grad
|
||||
|
||||
def forward_cmp(self):
|
||||
out_mindspore = self.forward_mindspore_impl()
|
||||
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
|
||||
print(out_mindspore)
|
||||
print(out_mindspore_parallel)
|
||||
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
|
||||
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001)
|
||||
|
||||
def grad_cmp(self):
|
||||
input_grad_mindspore = self.grad_mindspore_impl()
|
||||
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
|
||||
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
|
||||
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
|
||||
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
|
||||
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
|
||||
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
|
||||
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
|
||||
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
|
||||
assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_reid_max_forward_input_256_64():
|
||||
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
|
||||
strategy1=(0, (4, 1)))
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
def test_reid_max_grad_input_256_64():
|
||||
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)),
|
||||
strategy1=(0, (4, 1)))
|
||||
fact.grad_cmp()
|
||||
|
||||
|
||||
def test_reid_max_forward_input_128_64_32_32():
|
||||
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
|
||||
strategy1=(0, (2, 1, 2, 1)))
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
def test_reid_max_grad_input_128_64_32_32():
|
||||
fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)),
|
||||
strategy1=(0, (2, 1, 2, 1)))
|
||||
fact.grad_cmp()
|
||||
|
||||
|
||||
def test_reid_max_forward_input_256_64_repeat():
|
||||
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
|
||||
strategy1=(0, (2, 1)))
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
def test_reid_max_grad_input_256_64_repeat():
|
||||
fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)),
|
||||
strategy1=(0, (2, 1)))
|
||||
fact.grad_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_max_parallel_4p.py>../../log/test_max_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_mul_softmax_parallel_4p.py>../../log/test_mul_softmax_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,200 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class MulSoftmax(Cell):
|
||||
def __init__(self, strategy0=None, strategy1=None, axis=0):
|
||||
super(MulSoftmax, self).__init__()
|
||||
self.mul = P.Mul(strategy=strategy0)
|
||||
self.softmax = P.Softmax(axis=axis, strategy=strategy1)
|
||||
|
||||
def construct(self, x, z):
|
||||
out = self.mul(x, z)
|
||||
return self.softmax(out)
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, output_grad):
|
||||
return grad_all_with_sens(self.network)(x, y, output_grad)
|
||||
|
||||
|
||||
class MulSoftmaxFactory:
|
||||
def __init__(self, input_shape, strategy0, strategy1):
|
||||
prefix = ""
|
||||
size = 1
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s)
|
||||
size = size * s
|
||||
self.prefix = prefix
|
||||
number_range = min(1000, size)
|
||||
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
|
||||
np.float32)
|
||||
self.input_np2 = 1.0
|
||||
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
|
||||
input_shape).astype(np.float32)
|
||||
self.strategy0 = strategy0
|
||||
self.strategy1 = strategy1
|
||||
need_dev_num = 1
|
||||
need_dev_num_ = 1
|
||||
for s in strategy0[1]:
|
||||
need_dev_num = need_dev_num * s
|
||||
for s in strategy1[1]:
|
||||
need_dev_num_ = need_dev_num_ * s
|
||||
self.x_id = device_id % need_dev_num
|
||||
self.y_id = device_id % need_dev_num
|
||||
self.out_id = device_id % need_dev_num_
|
||||
|
||||
def forward_mindspore_impl(self):
|
||||
net = MulSoftmax()
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2, ms.float32)
|
||||
out = net(x, y)
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_mindspore_parallel_impl(self):
|
||||
net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net.set_auto_parallel()
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2, ms.float32)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(self.input_np2, ms.float32)
|
||||
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
|
||||
return out.asnumpy()
|
||||
|
||||
def grad_mindspore_impl(self):
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2, ms.float32)
|
||||
net = MulSoftmax()
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, output_grad)
|
||||
return input_grad
|
||||
|
||||
def grad_mindspore_parallel_impl(self):
|
||||
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1])
|
||||
output_grad = Tensor(output_grads[self.out_id])
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2, ms.float32)
|
||||
net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
grad_net = Grad(net)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
grad_net.set_train()
|
||||
grad_net.set_auto_parallel()
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(self.input_np2, ms.float32)
|
||||
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad],
|
||||
parallel_inputs_run=[x1, y1, output_grad])
|
||||
return input_grad
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def forward_cmp(self):
|
||||
out_mindspore = self.forward_mindspore_impl()
|
||||
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
|
||||
np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel)
|
||||
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1])
|
||||
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
|
||||
|
||||
def grad_cmp(self):
|
||||
input_grad_mindspore = self.grad_mindspore_impl()
|
||||
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
|
||||
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
|
||||
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
|
||||
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
|
||||
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
|
||||
np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0)
|
||||
np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1)
|
||||
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0,
|
||||
self.strategy0[1]) # 这里由于TensorMul两个输入X1没做广播,X2做了广播
|
||||
assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
|
||||
assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001)
|
||||
|
||||
|
||||
@pytest.mark.reid_forward
|
||||
def test_reid_mul_softmax_input_128x64():
|
||||
stra0 = (0, (1, 4), ())
|
||||
stra1 = (0, (1, 4))
|
||||
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
@pytest.mark.reid_grad
|
||||
def test_reid_grad_mul_softmax_input_128x64():
|
||||
stra0 = (0, (1, 4), ())
|
||||
stra1 = (0, (1, 4))
|
||||
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
|
||||
fact.grad_cmp()
|
||||
|
||||
|
||||
@pytest.mark.reid_forward
|
||||
def test_reid_mul_softmax_input_128x64_all_to_all():
|
||||
stra0 = (0, (4, 1), ())
|
||||
stra1 = (0, (1, 4))
|
||||
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
@pytest.mark.reid_grad
|
||||
def test_reid_grad_mul_softmax_input_128x64_all_to_all():
|
||||
stra0 = (0, (4, 1), ())
|
||||
stra1 = (0, (1, 4))
|
||||
fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1)
|
||||
fact.grad_cmp()
|
|
@ -1,147 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class Onehot(Cell):
|
||||
def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None):
|
||||
super(Onehot, self).__init__()
|
||||
self.onehot = P.OneHot(axis, strategy=strategy)
|
||||
self.depth = depth
|
||||
self.on_value = Tensor(on_value, ms.float32)
|
||||
self.off_value = Tensor(off_value, ms.float32)
|
||||
|
||||
def construct(self, indices):
|
||||
return self.onehot(indices, self.depth, self.on_value, self.off_value)
|
||||
|
||||
|
||||
class OneHotFactory:
|
||||
def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None):
|
||||
size = 1
|
||||
prefix = ""
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s)
|
||||
size = size * s
|
||||
self.prefix = prefix
|
||||
number_range = min(10, size)
|
||||
self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32)
|
||||
self.depth = depth
|
||||
self.on_value = on_value
|
||||
self.off_value = off_value
|
||||
self.axis = axis
|
||||
self.dtype = dtype
|
||||
self.strategy0 = strategy0
|
||||
need_dev_num = 1
|
||||
for s in strategy0[1]:
|
||||
need_dev_num = need_dev_num * s
|
||||
self.x_id = device_id % need_dev_num
|
||||
self.out_id = device_id % need_dev_num
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def grad_mindspore_impl(self):
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2, ms.float32)
|
||||
net = AddRelu()
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, output_grad)
|
||||
return input_grad
|
||||
|
||||
def forward_mindspore_impl(self):
|
||||
indices = Tensor(self.input_np)
|
||||
net = Onehot(axis=self.axis,
|
||||
depth=self.depth,
|
||||
on_value=self.on_value,
|
||||
off_value=self.off_value)
|
||||
out = net(indices)
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
net = Onehot(axis=self.axis,
|
||||
depth=self.depth,
|
||||
on_value=self.on_value,
|
||||
off_value=self.off_value, strategy=self.strategy0)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net.set_auto_parallel()
|
||||
out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1])
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_cmp(self):
|
||||
out_mindspore = self.forward_mindspore_impl()
|
||||
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
|
||||
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1])
|
||||
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_reid_onehot_forward_int32_128_depth13000():
|
||||
fact = OneHotFactory(input_shape=(128,),
|
||||
depth=131072,
|
||||
on_value=1.000000,
|
||||
off_value=0.000000,
|
||||
axis=-1,
|
||||
dtype="float32",
|
||||
strategy0=(0, (2,)))
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
def test_reid_onehot_forward_int32_131072_depth127():
|
||||
fact = OneHotFactory(input_shape=(131072,),
|
||||
depth=127,
|
||||
on_value=1.000000,
|
||||
off_value=0.000000,
|
||||
axis=-1,
|
||||
dtype="float32",
|
||||
strategy0=(0, (4,)))
|
||||
fact.forward_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_onehot_parallel_4p.py>../../log/test_onehot_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,206 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class PReLU(Cell):
|
||||
def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None):
|
||||
super(PReLU, self).__init__()
|
||||
self.add = P.TensorAdd(strategy=strategy1_)
|
||||
self.prelu = P.PReLU(strategy=strategy_)
|
||||
self.channel = channel
|
||||
|
||||
def construct(self, x, z, w):
|
||||
out = self.add(x, z)
|
||||
return self.prelu(out, w)
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, input_, z, w, output_grad):
|
||||
return grad_all_with_sens(self.network)(input_, z, w, output_grad)
|
||||
|
||||
|
||||
class PReLUFactory:
|
||||
def __init__(self, input_shape, strategy):
|
||||
n, c = input_shape[:2]
|
||||
prefix = ""
|
||||
size = 1
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s)
|
||||
size = size * s
|
||||
self.prefix = prefix
|
||||
number_range = min(1000, size)
|
||||
self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32)
|
||||
self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1,
|
||||
input_shape).astype(np.float32)
|
||||
self.channel = c
|
||||
self.weight = np.array([np.float32(0.25)] * c)
|
||||
self.strategy = strategy
|
||||
|
||||
def forward_mindspore_impl(self):
|
||||
net = PReLU(channel=self.channel, w=self.weight)
|
||||
x = Tensor(self.input_np)
|
||||
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
|
||||
w = Tensor(self.weight)
|
||||
out = net(x, z, w)
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_mindspore_parallel_impl(self):
|
||||
net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
|
||||
strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net.set_auto_parallel()
|
||||
x = Tensor(self.input_np)
|
||||
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
|
||||
w = Tensor(self.weight)
|
||||
|
||||
inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
|
||||
block_id = device_id % len(inputs)
|
||||
x1 = Tensor(inputs[block_id])
|
||||
z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
|
||||
w1 = Tensor(self.weight)
|
||||
|
||||
out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1])
|
||||
return out.asnumpy()
|
||||
|
||||
def grad_mindspore_impl(self):
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
x = Tensor(self.input_np)
|
||||
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
|
||||
w = Tensor(self.weight)
|
||||
|
||||
net = PReLU(channel=self.channel, w=self.weight)
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, z, w, output_grad)
|
||||
return input_grad
|
||||
|
||||
def grad_mindspore_parallel_impl(self):
|
||||
output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1])
|
||||
block_id = device_id % len(output_grads)
|
||||
output_grad = Tensor(output_grads[block_id])
|
||||
x = Tensor(self.input_np)
|
||||
z = Tensor(np.zeros(self.input_np.shape), ms.float32)
|
||||
w = Tensor(self.weight)
|
||||
|
||||
net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy,
|
||||
strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1]))
|
||||
grad_net = Grad(net)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
grad_net.set_auto_parallel()
|
||||
|
||||
grad_net.set_train()
|
||||
inputs = self.get_parallel_blocks(self.input_np, self.strategy[1])
|
||||
x1 = Tensor(inputs[block_id])
|
||||
z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32)
|
||||
w1 = Tensor(self.weight)
|
||||
|
||||
input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad],
|
||||
parallel_inputs_run=[x1, z1, w1, output_grad])
|
||||
return input_grad
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def forward_cmp(self):
|
||||
out_mindspore = self.forward_mindspore_impl()
|
||||
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
|
||||
out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1])
|
||||
block_id = device_id % len(out_blocks)
|
||||
assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001)
|
||||
|
||||
def grad_cmp(self):
|
||||
input_grad_mindspore = self.grad_mindspore_impl()
|
||||
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
|
||||
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
|
||||
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
|
||||
input_grad_mindspore2 = input_grad_mindspore[2].asnumpy()
|
||||
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
|
||||
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
|
||||
input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy()
|
||||
input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1])
|
||||
input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1])
|
||||
block_id = device_id % len(input_grad_blocks)
|
||||
assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
|
||||
assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001)
|
||||
assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
|
||||
|
||||
|
||||
@pytest.mark.reid_grad
|
||||
def test_reid_prelu_input_128x64x112x112_repeat():
|
||||
stra = (0, (1, 1, 2, 1), (1))
|
||||
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
@pytest.mark.reid_grad
|
||||
def test_reid_grad_prelu_input_128x64x112x112_repeat():
|
||||
stra = (0, (1, 1, 2, 1), (1))
|
||||
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
|
||||
fact.grad_cmp()
|
||||
|
||||
|
||||
@pytest.mark.reid_grad
|
||||
def test_reid_prelu_input_128x64x112x112_mix():
|
||||
stra = (0, (2, 1, 1, 2), (1))
|
||||
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
@pytest.mark.reid_grad
|
||||
def test_reid_grad_prelu_input_128x64x112x112_mix():
|
||||
stra = (0, (2, 1, 1, 2), (1))
|
||||
fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra)
|
||||
fact.grad_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_prelu_parallel_4p.py >../../log/test_prelu_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,252 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from numpy import allclose as allclose_nparray
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, output_grad):
|
||||
return grad_all_with_sens(self.network)(x, y, output_grad)
|
||||
|
||||
|
||||
class GradScalar(Cell):
|
||||
def __init__(self, network):
|
||||
super(GradScalar, self).__init__()
|
||||
self.network = network
|
||||
self.sens = Tensor([1.0], dtype=ms.float32)
|
||||
|
||||
def construct(self, x, y):
|
||||
return grad_all_with_sens(self.network)(x, y, self.sens)
|
||||
|
||||
|
||||
class ReduceMean(Cell):
|
||||
def __init__(self, keep_dims, axis, strategy0=None, strategy1=None):
|
||||
super(ReduceMean, self).__init__()
|
||||
self.add = P.TensorAdd(strategy=strategy0)
|
||||
self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1)
|
||||
self.axis = axis
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.add(x, y)
|
||||
return self.reduce_mean(out, self.axis)
|
||||
|
||||
|
||||
class ReduceMeanFactory:
|
||||
def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None):
|
||||
prefix = ""
|
||||
size = 1
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s)
|
||||
size = size * s
|
||||
self.prefix = prefix
|
||||
number_range = min(1000, size)
|
||||
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
|
||||
np.float32)
|
||||
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
|
||||
np.float32)
|
||||
self.keep_dims = keep_dims
|
||||
self.axis = axis
|
||||
target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape
|
||||
target_size = 1
|
||||
for s in target_shape:
|
||||
target_size = target_size * s
|
||||
number_range = min(1000, target_size)
|
||||
self.output_grad_np = np.array([1.0], dtype=np.float32)
|
||||
if len(target_shape) > 0:
|
||||
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype(
|
||||
np.float32) + 1.0
|
||||
self.shape = target_shape
|
||||
self.strategy0 = strategy0
|
||||
self.strategy1 = strategy1
|
||||
out_strategy = []
|
||||
axis_ = list(axis)
|
||||
if axis_[0] == -1:
|
||||
axis_[0] = len(input_shape) - 1
|
||||
for i in range(0, len(input_shape)):
|
||||
if i in axis_:
|
||||
if keep_dims:
|
||||
out_strategy.append(1)
|
||||
else:
|
||||
out_strategy.append(strategy1[1][i])
|
||||
self.out_strategy = out_strategy
|
||||
need_dev_num0 = 1
|
||||
need_dev_num1 = 1
|
||||
for s in strategy0[1]:
|
||||
need_dev_num0 = need_dev_num0 * s
|
||||
for s in out_strategy:
|
||||
need_dev_num1 = need_dev_num1 * s
|
||||
self.x_id = device_id % need_dev_num0
|
||||
self.y_id = device_id % need_dev_num0
|
||||
block_id = device_id % need_dev_num0
|
||||
device_index = self.id_to_list(block_id, self.strategy1[1])
|
||||
print(device_index)
|
||||
for i in axis:
|
||||
device_index[i] = 0
|
||||
print(device_index)
|
||||
self.out_id = self.list_to_id(device_index, self.out_strategy)
|
||||
print(self.out_id)
|
||||
|
||||
def id_to_list(self, id_, shape):
|
||||
result = []
|
||||
r = id_
|
||||
for i in range(0, len(shape)):
|
||||
v = 1
|
||||
for j in range(i + 1, len(shape)):
|
||||
v = v * shape[j]
|
||||
result.append(r // v)
|
||||
r = r % v
|
||||
return result
|
||||
|
||||
def list_to_id(self, id_list, shape):
|
||||
result = 0
|
||||
for i in range(0, len(id_list)):
|
||||
v = 1
|
||||
for j in range(i + 1, len(id_list)):
|
||||
v = v * shape[j]
|
||||
result = result + id_list[i] * v
|
||||
return result
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def forward_mindspore_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
|
||||
out = net(x, y)
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(inputs_y[self.y_id])
|
||||
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net.set_auto_parallel()
|
||||
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
|
||||
return out.asnumpy()
|
||||
|
||||
def grad_mindspore_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
out_grad = Tensor(self.output_grad_np)
|
||||
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis)
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, out_grad)
|
||||
return input_grad
|
||||
|
||||
def grad_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(inputs_y[self.y_id])
|
||||
output_grad1 = Tensor(outgrads[self.out_id])
|
||||
net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
grad_net = Grad(net)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
grad_net.set_auto_parallel()
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
|
||||
parallel_inputs_run=[x1, y1, output_grad1])
|
||||
return input_grad
|
||||
|
||||
def forward_cmp(self):
|
||||
out_mindspore = self.forward_mindspore_impl()
|
||||
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
|
||||
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
|
||||
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
|
||||
|
||||
def grad_cmp(self):
|
||||
input_grad_mindspore = self.grad_mindspore_impl()
|
||||
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
|
||||
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
|
||||
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
|
||||
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
|
||||
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
|
||||
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
|
||||
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
|
||||
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
|
||||
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_reid_reducemean_input_64x16():
|
||||
fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
|
||||
strategy1=(0, (4,)))
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
def test_grad_reid_reducemean_input_64x16():
|
||||
fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)),
|
||||
strategy1=(0, (4,)))
|
||||
fact.grad_cmp()
|
||||
|
||||
|
||||
def test_reid_reducemean_input_64x128x28x28():
|
||||
fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
|
||||
strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
|
||||
fact.forward_cmp()
|
||||
|
||||
|
||||
def test_grad_reid_reducemean_input_64x128x28x28():
|
||||
fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3),
|
||||
strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1)))
|
||||
fact.grad_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_reducemean_parallel_4p.py>../../log/test_reducemean_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,206 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pytest
|
||||
from numpy import allclose as allclose_nparray
|
||||
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, output_grad):
|
||||
return grad_all_with_sens(self.network)(x, y, output_grad)
|
||||
|
||||
|
||||
class Reshape(Cell):
|
||||
def __init__(self, target_shape, strategy0=None, strategy1=None):
|
||||
super(Reshape, self).__init__()
|
||||
self.add = P.TensorAdd(strategy=strategy0)
|
||||
self.reshape = P.Reshape(strategy=strategy1)
|
||||
self.shape = tuple(target_shape)
|
||||
|
||||
def construct(self, input1, input2):
|
||||
x = self.add(input1, input2)
|
||||
return self.reshape(x, self.shape)
|
||||
|
||||
|
||||
class ReshapeFactory:
|
||||
def __init__(self, input_shape, target_shape, strategy0, strategy1):
|
||||
prefix = ""
|
||||
size = 1
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s)
|
||||
size = size * s
|
||||
self.prefix = prefix
|
||||
number_range = min(1000, size)
|
||||
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
|
||||
np.float32)
|
||||
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
|
||||
np.float32)
|
||||
target_size = 1
|
||||
for s in target_shape:
|
||||
target_size = target_size * s
|
||||
number_range = min(1000, target_size)
|
||||
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
|
||||
target_shape).astype(np.float32)
|
||||
self.target_shape = target_shape
|
||||
self.strategy0 = strategy0
|
||||
self.strategy1 = strategy1
|
||||
out_strategy = [1] * len(target_shape)
|
||||
out_strategy[0] = strategy1[1][0]
|
||||
self.out_strategy = out_strategy
|
||||
|
||||
need_dev_num0 = 1
|
||||
need_dev_num1 = 1
|
||||
for s in strategy0[1]:
|
||||
need_dev_num0 = need_dev_num0 * s
|
||||
for s in out_strategy:
|
||||
need_dev_num1 = need_dev_num1 * s
|
||||
self.x_id = device_id % need_dev_num0
|
||||
self.y_id = device_id % need_dev_num0
|
||||
self.out_id = device_id % need_dev_num1
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def forward_reshape_mindspore_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
net = Reshape(self.target_shape)
|
||||
out = net(x, y)
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_reshape_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(inputs_y[self.y_id])
|
||||
net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net.set_auto_parallel()
|
||||
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
|
||||
return out.asnumpy()
|
||||
|
||||
def grad_reshape_mindspore_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
net = Reshape(self.target_shape)
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, output_grad)
|
||||
return input_grad
|
||||
|
||||
def grad_reshape_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(inputs_y[self.y_id])
|
||||
output_grad1 = Tensor(outgrads[self.out_id])
|
||||
net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
grad_net = Grad(net)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
grad_net.set_auto_parallel()
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
|
||||
parallel_inputs_run=[x1, y1, output_grad1])
|
||||
return input_grad
|
||||
|
||||
def forward_reshape_cmp(self):
|
||||
out_mindspore = self.forward_reshape_mindspore_impl()
|
||||
out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl()
|
||||
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
|
||||
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
|
||||
|
||||
def grad_reshape_cmp(self):
|
||||
input_grad_mindspore = self.grad_reshape_mindspore_impl()
|
||||
input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl()
|
||||
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
|
||||
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
|
||||
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
|
||||
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
|
||||
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
|
||||
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
|
||||
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
|
||||
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
|
||||
|
||||
|
||||
@pytest.mark.reid_forward
|
||||
def test_reid_reshape_input_128x512x7x7_target_128x25088():
|
||||
fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
|
||||
strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
|
||||
fact.forward_reshape_cmp()
|
||||
|
||||
|
||||
def test_reid_reshape_grad_input_128x512x7x7_target_128x25088():
|
||||
fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088),
|
||||
strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1)))
|
||||
fact.grad_reshape_cmp()
|
||||
|
||||
|
||||
@pytest.mark.reid_forward
|
||||
def test_reid_reshape_input_128x64_target_128x64x1x1():
|
||||
fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
|
||||
strategy1=(0, (2, 1)))
|
||||
fact.forward_reshape_cmp()
|
||||
|
||||
|
||||
@pytest.mark.reid_grad
|
||||
def test_reid_reshape_grad_input_128x64_target_128x64x1x1():
|
||||
fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)),
|
||||
strategy1=(0, (2, 1)))
|
||||
fact.grad_reshape_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_reshape_parallel_4p.py>../../log/test_reshape_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
|
@ -1,235 +0,0 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from numpy import allclose as allclose_nparray
|
||||
|
||||
import mindspore.communication.management as distributedTool
|
||||
from mindspore import context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import grad_all_with_sens
|
||||
|
||||
device_num = 4
|
||||
device_id = int(os.environ["RANK_ID"])
|
||||
path = "./output/"
|
||||
|
||||
|
||||
def setup_module():
|
||||
print("~~~~~~~~~~~set up~~~~~~~~~~~~~")
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
context.set_auto_parallel_context(device_num=device_num, global_rank=device_id)
|
||||
distributedTool.init()
|
||||
distributedTool.create_group("0-3", [0, 1, 2, 3])
|
||||
print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~")
|
||||
|
||||
|
||||
def teardown_module():
|
||||
print("~~~~~~~~~~~~tear down~~~~~~~~~~")
|
||||
|
||||
|
||||
class Net(Cell):
|
||||
def __init__(self, perm_in, strategy0=None, strategy1=None):
|
||||
super(Net, self).__init__()
|
||||
self.add = P.TensorAdd(strategy=strategy0)
|
||||
self.transpose = P.Transpose(strategy=strategy1)
|
||||
self.perm_in = perm_in
|
||||
|
||||
def construct(self, x, y):
|
||||
out = self.add(x, y)
|
||||
return self.transpose(out, self.perm_in)
|
||||
|
||||
|
||||
class Grad(Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y, output_grad):
|
||||
return grad_all_with_sens(self.network)(x, y, output_grad)
|
||||
|
||||
|
||||
class TransposeFactory:
|
||||
def __init__(self, input_shape, perm_in, strategy0, strategy1):
|
||||
prefix = ""
|
||||
size = 1
|
||||
for s in input_shape:
|
||||
prefix = prefix + str(s)
|
||||
size = size * s
|
||||
self.prefix = prefix
|
||||
number_range = min(1000, size)
|
||||
self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(
|
||||
np.float32)
|
||||
self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype(
|
||||
np.float32)
|
||||
target_shape = self.input_np1.transpose(perm_in).shape
|
||||
target_size = 1
|
||||
for s in target_shape:
|
||||
target_size = target_size * s
|
||||
number_range = min(1000, target_size)
|
||||
self.target_shape = target_shape
|
||||
self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2,
|
||||
target_shape).astype(np.float32)
|
||||
self.perm_in = perm_in
|
||||
self.strategy0 = strategy0
|
||||
self.strategy1 = strategy1
|
||||
out_strategy = []
|
||||
for i in perm_in:
|
||||
out_strategy.append(strategy1[1][i])
|
||||
self.out_strategy = out_strategy
|
||||
need_dev_num0 = 1
|
||||
need_dev_num1 = 1
|
||||
for s in strategy0[1]:
|
||||
need_dev_num0 = need_dev_num0 * s
|
||||
for s in out_strategy:
|
||||
need_dev_num1 = need_dev_num1 * s
|
||||
self.x_id = device_id % need_dev_num0
|
||||
self.y_id = device_id % need_dev_num0
|
||||
device_index = self.id_to_list(device_id % need_dev_num1,
|
||||
self.strategy1[1]) # encoding to get the index before transpose
|
||||
device_index_transpose = []
|
||||
for i in perm_in:
|
||||
device_index_transpose.append(device_index[i])
|
||||
self.out_id = self.list_to_id(device_index_transpose, self.out_strategy)
|
||||
|
||||
def get_parallel_blocks(self, input_, strategy):
|
||||
blocks = [input_]
|
||||
i = 0
|
||||
for stra in strategy:
|
||||
temp = []
|
||||
while len(blocks) > 0:
|
||||
block = blocks.pop(0)
|
||||
temp.extend(np.split(block, stra, axis=i))
|
||||
blocks.extend(temp)
|
||||
i += 1
|
||||
return blocks
|
||||
|
||||
def id_to_list(self, id_, shape):
|
||||
result = []
|
||||
r = id_
|
||||
for i in range(0, len(shape)):
|
||||
v = 1
|
||||
for j in range(i + 1, len(shape)):
|
||||
v = v * shape[j]
|
||||
result.append(r // v)
|
||||
r = r % v
|
||||
return result
|
||||
|
||||
def list_to_id(self, id_list, shape):
|
||||
result = 0
|
||||
for i in range(0, len(id_list)):
|
||||
v = 1
|
||||
for j in range(i + 1, len(id_list)):
|
||||
v = v * shape[j]
|
||||
result = result + id_list[i] * v
|
||||
return result
|
||||
|
||||
def forward_mindspore_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
net = Net(self.perm_in)
|
||||
out = net(x, y)
|
||||
return out.asnumpy()
|
||||
|
||||
def forward_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(inputs_y[self.y_id])
|
||||
net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
net.set_auto_parallel()
|
||||
out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1])
|
||||
return out.asnumpy()
|
||||
|
||||
def grad_mindspore_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
net = Net(self.perm_in)
|
||||
grad_net = Grad(net)
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, output_grad)
|
||||
return input_grad
|
||||
|
||||
def grad_mindspore_parallel_impl(self):
|
||||
x = Tensor(self.input_np1)
|
||||
y = Tensor(self.input_np2)
|
||||
output_grad = Tensor(self.output_grad_np)
|
||||
inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1])
|
||||
inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1])
|
||||
outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy)
|
||||
x1 = Tensor(inputs_x[self.x_id])
|
||||
y1 = Tensor(inputs_y[self.y_id])
|
||||
output_grad1 = Tensor(outgrads[self.out_id])
|
||||
net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1)
|
||||
grad_net = Grad(net)
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
grad_net.set_auto_parallel()
|
||||
grad_net.set_train()
|
||||
input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1],
|
||||
parallel_inputs_run=[x1, y1, output_grad1])
|
||||
return input_grad
|
||||
|
||||
def forward_transpose_cmp(self):
|
||||
out_mindspore = self.forward_mindspore_impl()
|
||||
out_mindspore_parallel = self.forward_mindspore_parallel_impl()
|
||||
out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy)
|
||||
assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001)
|
||||
|
||||
def grad_transpose_cmp(self):
|
||||
input_grad_mindspore = self.grad_mindspore_impl()
|
||||
input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl()
|
||||
input_grad_mindspore0 = input_grad_mindspore[0].asnumpy()
|
||||
input_grad_mindspore1 = input_grad_mindspore[1].asnumpy()
|
||||
input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy()
|
||||
input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy()
|
||||
input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1])
|
||||
input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2])
|
||||
assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001)
|
||||
assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_reid_transpose_input_256x512_output_512x256_perm_1x0():
|
||||
fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
|
||||
fact.forward_transpose_cmp()
|
||||
|
||||
|
||||
def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0():
|
||||
fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2)))
|
||||
fact.grad_transpose_cmp()
|
||||
|
||||
|
||||
def test_reid_transpose_input_512x256_output_256x512_perm_1x0():
|
||||
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
|
||||
fact.forward_transpose_cmp()
|
||||
|
||||
|
||||
def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0():
|
||||
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4)))
|
||||
fact.grad_transpose_cmp()
|
||||
|
||||
|
||||
def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat():
|
||||
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
|
||||
fact.forward_transpose_cmp()
|
||||
|
||||
|
||||
def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat():
|
||||
fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1)))
|
||||
fact.grad_transpose_cmp()
|
|
@ -1,27 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
for((i=0;i<4;i++));
|
||||
do
|
||||
rm -rf device$i
|
||||
mkdir device$i
|
||||
cd device$i
|
||||
mkdir output
|
||||
source ../../dist_env_4p.sh $i
|
||||
env >log$i.log
|
||||
pytest -s ../test_transpose_parallel_4p.py>../../log/test_transpose_parallel_4p_log$i.log 2>&1 &
|
||||
cd ..
|
||||
done
|
Loading…
Reference in New Issue