From 1e6ee83874c3f1335f28ee622ea7bf3f326ef163 Mon Sep 17 00:00:00 2001 From: Yi Huaijie Date: Fri, 22 May 2020 17:35:18 +0800 Subject: [PATCH] delete parallel end-to-end test cases --- .../add_relu/_test_add_relu_parallel_4p.py | 178 --------- .../add_relu/add_relu_parallel_4p.sh | 27 -- .../_test_conv2d_parallel_4p.py | 356 ------------------ .../batch_parallel/conv2d_parallel_4p.sh | 27 -- .../parallel_end_to_end/dist_env_4p.sh | 36 -- .../dropout/_test_dropout_parallel_4p.py | 120 ------ .../dropout/dropout_parallel_4p.sh | 27 -- .../hcom/_test_allgather_4p.py | 154 -------- .../hcom/_test_allreduce_4p.py | 175 --------- .../parallel_end_to_end/hcom/allgather_4p.sh | 27 -- .../parallel_end_to_end/hcom/allreduce_4p.sh | 27 -- .../_test_l2normalize_parallel_4p.py | 206 ---------- .../l2normalize/l2normalize_parallel_4p.sh | 27 -- .../parallel_end_to_end/log/README.MD | 1 - .../loss/_test_loss_parallel_4p.py | 195 ---------- .../loss/loss_parallel_4p.sh | 27 -- .../matmul/_test_matmul_parallel_4p.py | 329 ---------------- .../matmul/matmul_parallel_4p.sh | 27 -- .../max/_test_max_parallel_4p.py | 213 ----------- .../max/max_parallel_4p.sh | 27 -- .../mul_softmax/mul_activation_parallel_4p.sh | 27 -- .../need_fix_test_mul_softmax_parallel_4p.py | 200 ---------- .../onehot/_test_onehot_parallel_4p.py | 147 -------- .../onehot/onehot_parallel_4p.sh | 27 -- .../prelu/_test_prelu_parallel_4p.py | 206 ---------- .../prelu/prelu_parallel_4p.sh | 27 -- .../_test_reducemean_parallel_4p.py | 252 ------------- .../reducemean/reducemean_parallel_4p.sh | 27 -- .../reshape/_test_reshape_parallel_4p.py | 206 ---------- .../reshape/reshape_parallel_4p.sh | 27 -- .../transpose/_test_transpose_parallel_4p.py | 235 ------------ .../transpose/transpose_parallel_4p.sh | 27 -- 32 files changed, 3614 deletions(-) delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/add_relu/add_relu_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/batch_parallel/conv2d_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/dist_env_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/dropout/dropout_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/hcom/allgather_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/hcom/allreduce_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/l2normalize/l2normalize_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/log/README.MD delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/loss/loss_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/matmul/matmul_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/max/max_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/mul_softmax/mul_activation_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/onehot/onehot_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/prelu/prelu_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/reducemean/reducemean_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/reshape/reshape_parallel_4p.sh delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py delete mode 100644 tests/ut/python/parallel/parallel_end_to_end/transpose/transpose_parallel_4p.sh diff --git a/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py deleted file mode 100644 index b1a4423b323..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/add_relu/_test_add_relu_parallel_4p.py +++ /dev/null @@ -1,178 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import pytest - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class AddRelu(Cell): - def __init__(self, strategy0=None, strategy1=None): - super(AddRelu, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.relu = P.ReLU(strategy=strategy1) - - def construct(self, x, z): - out = self.add(x, z) - return self.relu(out) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class AddReluFactory: - def __init__(self, input_shape, strategy0, strategy1): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = 1.0 - self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, - input_shape).astype(np.float32) - self.strategy0 = strategy0 - self.strategy1 = strategy1 - need_dev_num = 1 - need_dev_num_ = 1 - for s in strategy0[1]: - need_dev_num = need_dev_num * s - for s in strategy1[1]: - need_dev_num_ = need_dev_num_ * s - self.x_id = device_id % need_dev_num - self.y_id = device_id % need_dev_num - self.out_id = device_id % need_dev_num_ - - def forward_mindspore_impl(self): - net = AddRelu() - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - out = net(x, y) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(self.input_np2, ms.float32) - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - output_grad = Tensor(self.output_grad_np) - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - net = AddRelu() - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) - output_grad = Tensor(output_grads[self.out_id]) - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(self.input_np2, ms.float32) - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], - parallel_inputs_run=[x1, y1, output_grad]) - return input_grad - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - _ = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - _ = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -@pytest.mark.reid_forward -def test_reid_add_relu_input_256_64(): - stra0 = (0, (2, 2), ()) - stra1 = (0, (2, 2)) - fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) - fact.forward_cmp() - - -@pytest.mark.reid_grad -def test_reid_grad_add_relu_input_256_64(): - stra0 = (0, (2, 2), ()) - stra1 = (0, (2, 2)) - fact = AddReluFactory(input_shape=(256, 64), strategy0=stra0, strategy1=stra1) - fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/add_relu/add_relu_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/add_relu/add_relu_parallel_4p.sh deleted file mode 100644 index 1b9df575a70..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/add_relu/add_relu_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_add_relu_parallel_4p.py>../../log/test_add_relu_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py deleted file mode 100644 index b492312da63..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/_test_conv2d_parallel_4p.py +++ /dev/null @@ -1,356 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -from numpy import allclose - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore._checkparam import check_bool, twice -from mindspore.common.initializer import initializer -from mindspore.common.parameter import Parameter -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class _Conv(Cell): - r"""Applies a N-D convolution over an input signal composed of several input - planes. - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - pad_mode, - padding, - dilation, - group, - has_bias, - weight_init, - bias_init): - super(_Conv, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = kernel_size - self.stride = stride - self.pad_mode = pad_mode - self.padding = padding - self.dilation = dilation - self.group = group - self.has_bias = has_bias - if not (isinstance(in_channels, int) and in_channels > 0): - raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op passed ' - + str(in_channels) + ', should be a int and greater than 0.') - if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \ - (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \ - kernel_size[0] < 1 or kernel_size[1] < 1: - raise ValueError('Attr \'kernel_size\' of \'Conv2D\' Op passed ' - + str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.') - if in_channels % group != 0: - raise ValueError('Attr \'in_channels\' of \'Conv2D\' Op must be divisible by ' - 'attr \'group\' of \'Conv2D\' Op.') - if out_channels % group != 0: - raise ValueError('Attr \'out_channels\' of \'Conv2D\' Op must be divisible by ' - 'attr \'group\' of \'Conv2D\' Op.') - - self.weight = Parameter(initializer( - weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight') - - if check_bool(has_bias): - self.bias = Parameter(initializer( - bias_init, [out_channels]), name='bias') - else: - if bias_init != 'zeros': - print("Value of 'has_bias' is False, value of 'bias_init' will be ignored.") - self.bias = None - - def construct(self, *inputs): - raise NotImplementedError - - -class Conv2d(_Conv): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - pad_mode='same', - padding=0, - dilation=1, - group=1, - has_bias=False, - weight_init='normal', - bias_init='zeros', - strategy=None): - kernel_size = twice(kernel_size) - super(Conv2d, self).__init__( - in_channels, - out_channels, - kernel_size, - stride, - pad_mode, - padding, - dilation, - group, - has_bias, - weight_init, - bias_init) - self.add = P.TensorAdd(strategy) - self.conv2d = P.Conv2D(out_channel=self.out_channels, - kernel_size=self.kernel_size, - mode=1, - pad_mode=self.pad_mode, - pad=self.padding, - stride=self.stride, - dilation=self.dilation, - group=self.group, - strategy=None) - self.bias_add = P.BiasAdd() - - def construct(self, input1, input2): - x = self.add(input1, input2) - if self.has_bias: - return self.bias_add(self.conv2d(x, self.weight), - self.bias) - return self.conv2d(x, self.weight) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, input1, input2, output_grad): - return grad_all_with_sens(self.network)(input1, input2, output_grad) - - -class Conv2dFactory: - def __init__(self, input_shape, filter_shape, stride, pad_mode, padding, dilation, group, has_bias): - self.in_n, self.in_c, self.in_h, self.in_w = input_shape - self.out_c, self.kernel_c, self.kernel_h, self.kernel_w = filter_shape - self.stride = stride - self.pad_mode = pad_mode - self.padding = padding - self.dilation = dilation - self.group = group - self.strategy0 = (0, (4, 1, 1, 1), (1, 1, 1, 1)) - prefix = "" - input_size = 1 - filter_size = 1 - for s in input_shape: - prefix = prefix + str(s) + "_" - input_size = input_size * s - self.prefix = prefix - for s in filter_shape: - filter_size = filter_size * s - number_range1 = min(10, input_size) - number_range2 = min(10, filter_size) - self.input_np1 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 2, input_shape).astype( - np.float16) - self.input_np2 = np.reshape(np.arange(0, input_size) % number_range1 - number_range1 / 4, input_shape).astype( - np.float16) - self.weight_np = np.reshape(np.arange(0, filter_size) % number_range2 - number_range2 / 2, filter_shape).astype( - np.float16) - self.has_bias = has_bias - if self.has_bias is True: - self.bias_np = np.arange(0, self.out_c).astype(np.float16) - - self.out_shape = (128, 64, 56, 56) - out_size = 1 - for s in self.out_shape: - out_size = out_size * s - number_range3 = min(10, out_size) - self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range3 - number_range3 / 2, - self.out_shape).astype(np.float16) - self.x_id = device_id % 4 - self.y_id = device_id % 4 - self.out_strategy = self.strategy0[1] - self.out_id = device_id % 4 - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_conv2d_mindspore_impl(self): - input1 = Tensor(self.input_np1) - input2 = Tensor(self.input_np2) - weight = Tensor(self.weight_np) - if self.has_bias: - bias = Tensor(self.bias_np) - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=True, weight_init=weight, - bias_init=bias) - else: - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=False, weight_init=weight) - out = net(input1, input2) - return out.asnumpy() - - def forward_conv2d_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - weight = Tensor(self.weight_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - if self.has_bias: - bias = Tensor(self.bias_np) - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=True, weight_init=weight, - bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) - else: - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=False, weight_init=weight, - strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_conv2d_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - weight = Tensor(self.weight_np) - output_grad = Tensor(self.output_grad_np) - if self.has_bias: - bias = Tensor(self.bias_np) - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=True, weight_init=weight, - bias_init=bias,) - else: - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=False, weight_init=weight) - - grad_net = Grad(net) - grad_net.set_train() - out_grad = grad_net(x, y, output_grad) - return out_grad - - def grad_conv2d_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - weight = Tensor(self.weight_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad = Tensor(self.output_grad_np) - output_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - output_grad1 = Tensor(output_grads[self.out_id]) - if self.has_bias: - bias = Tensor(self.bias_np) - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=True, weight_init=weight, - bias_init=bias, strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) - else: - net = Conv2d(in_channels=self.in_c, out_channels=self.out_c, - kernel_size=(self.kernel_h, self.kernel_w), - stride=self.stride, pad_mode=self.pad_mode, - padding=self.padding, dilation=self.dilation, - group=self.group, has_bias=False, weight_init=weight, - strategy=(self.strategy0[0], self.strategy0[1], self.strategy0[1])) - - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_train() - grad_net.set_auto_parallel() - out_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], - parallel_inputs_run=[x1, y1, output_grad1]) - return out_grad - - def forward_conv2d_cmp(self): - out_mindspore = self.forward_conv2d_mindspore_impl() - out_mindspore_parallel = self.forward_conv2d_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) - - def grad_conv2d_cmp(self): - input_grad_mindspore = self.grad_conv2d_mindspore_impl() - input_grad_mindspore_parallel = self.grad_conv2d_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[1]) - assert allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.001, 0.001) - assert allclose(input_grad_blocks_1[self.x_id], input_grad_mindspore_parallel1, 0.001, 0.001) - - -def test_reid_conv2d_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): - fact = Conv2dFactory(input_shape=(128, 64, 112, 112), - filter_shape=(64, 64, 1, 1), - stride=2, pad_mode='valid', padding=0, - dilation=1, group=1, has_bias=False) - fact.forward_conv2d_cmp() - - -def test_reid_conv2d_grad_input_128_64_112_112_kernel_64_64_1_1_stride_2_padding_0_bias_true(): - fact = Conv2dFactory(input_shape=(128, 64, 112, 112), - filter_shape=(64, 64, 1, 1), - stride=2, pad_mode='valid', padding=0, - dilation=1, group=1, has_bias=False) - fact.grad_conv2d_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/conv2d_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/conv2d_parallel_4p.sh deleted file mode 100644 index 720b912a4bc..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/batch_parallel/conv2d_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_conv2d_parallel_4p.py>../../log/test_conv2d_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/dist_env_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/dist_env_4p.sh deleted file mode 100644 index 283c3c12de4..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/dist_env_4p.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -export SLOG_PRINT_TO_STDOUT=1 -source /root/miniconda3/bin/activate ci3.6 -export RANK_SIZE=4 -export RANK_TABLE_FILE=../../rank_table_4p.json -export RANK_ID=$1 -export DEVICE_ID=$1 -export HCCL_FLAG=1 -export DEPLOY_MODE=0 -export AICPU_FLAG=1 -export DUMP_OP=1 -export PYTHONPATH=../../../../../../../../mindspore:/usr/local/HiAI/runtime/python3.6/site-packages/topi.egg/:/usr/local/HiAI/runtime/python3.6/site-packages/te.egg/:/usr/local/HiAI/runtime/ops/op_impl/built-in/ai_core/tbe/ -export OPTION_EXEC_EXTERN_PLUGIN_PATH=/usr/local/HiAI/runtime/lib64/libhccl.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so -export LD_LIBRARY_PATH=/usr/local/HiAI/runtime/lib64 -export FE_FLAG=1 -export PATH=/usr/local/HiAI/runtime/ccec_compiler/bin:$PATH -if [ $1 -eq 0 ]; -then - export DUMP_GE_GRAPH=true - export ME_DRAW_GRAPH=1 -fi diff --git a/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py deleted file mode 100644 index 41991aac74e..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/dropout/_test_dropout_parallel_4p.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.nn import Dropout - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Net(Cell): - def __init__(self, keep_prob, seed0, seed1, strategy=None): - super(Net, self).__init__() - self.drop = Dropout(keep_prob, seed0, seed1, dtype=ms.float32, strategy=strategy) - - def construct(self, input_): - x = self.drop(input_) - return x - - -# pylint: disable=comparison-with-itself -class DropoutFactory: - def __init__(self, input_shape, keep_prob, seed0, seed1, strategy0=None): - size = 1 - prefix = "" - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(10, size) - self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.float32) - self.keep_prob = keep_prob - self.seed0 = seed0 - self.seed1 = seed1 - self.strategy0 = strategy0 - need_dev_num = 1 - for s in strategy0[1]: - need_dev_num = need_dev_num * s - self.x_id = device_id % need_dev_num - self.out_id = device_id % need_dev_num - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def d4_tensor_compare(self, input_, out_me): - [a, b, c, d] = input_.shape - for i in range(a): - for j in range(b): - for k in range(c): - for e in range(d): - if out_me[i, j, k, e] == 0: - assert True - else: - assert np.allclose(out_me[i, j, k, e], input_[i, j, k, e] * (1 / 0.4), 0.0001, 0.0001) - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np) - inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - net = Net(0.4, 0, 0, strategy=self.strategy0) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) - return out.asnumpy() - - def forward_cmp(self): - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - input_blocks = self.get_parallel_blocks(self.input_np, self.strategy0[1]) - self.d4_tensor_compare(input_blocks[self.out_id], out_mindspore_parallel) - - -def test_reid_dropout_forward_seed_F32_64_512_8_8(): - fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (4, 1, 1, 1))) - fact.forward_cmp() - - -def test_reid_dropout_forward_seed_F32_64_512_8_8_repeat(): - fact = DropoutFactory(input_shape=(64, 512, 8, 8), keep_prob=0.4, seed0=0, seed1=0, strategy0=(0, (2, 1, 1, 1))) - fact.forward_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/dropout/dropout_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/dropout/dropout_parallel_4p.sh deleted file mode 100644 index 8bd6cdc350e..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/dropout/dropout_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_dropout_parallel_4p.py>../../log/test_dropout_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py b/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py deleted file mode 100644 index 893ee1cd38b..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allgather_4p.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class MatmulSingle(Cell): - def __init__(self, transpose_a=False, transpose_b=False): - super(MatmulSingle, self).__init__() - self.matmul = P.MatMul(transpose_a, transpose_b) - self.pow = P.Pow() - self.reduce_sum = P.ReduceSum() - - def construct(self, x, y): - out = self.matmul(x, y) - out = self.pow(out, 2.0) - out = self.reduce_sum(out, None) - return out - - -class MatmulAllgather(Cell): - def __init__(self, group, transpose_a=False, transpose_b=False): - super(MatmulAllgather, self).__init__() - self.allgather = P.AllGather(group=group) - self.matmul = P.MatMul(transpose_a, transpose_b) - self.pow = P.Pow() - self.reduce_sum = P.ReduceSum() - self.allreduce = P.AllReduce(group=group) - - def construct(self, x, y): - x = self.allgather(x) - out = self.matmul(x, y) - out = self.pow(out, 2.0) - out = self.reduce_sum(out, None) - out = self.allreduce(out) - return out - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, sens): - return grad_all_with_sens(self.network)(x, y, sens) - - -class MatmulAllgatherFactory: - def __init__(self, inputx_shape, inputy_shape, x_stra, y_stra): - self.inputx = self.gen_value(inputx_shape, 10) - self.inputy = self.gen_value(inputy_shape, 20) - self.x_stra = x_stra - self.y_stra = y_stra - stra_size = 1 - for s in x_stra: - stra_size = stra_size * s - self.stra_size = stra_size - - def gen_value(self, input_shape, delta): - size = 1 - for s in input_shape: - size = size * s - number_range = min(100, size) - input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) - return input_np - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def grad_mindspore_impl_single(self): - x = Tensor(self.inputx) - y = Tensor(self.inputy) - sens = Tensor(1.0, dtype=ms.float32) - net = MatmulSingle() - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, sens) - return input_grad - - def grad_mindspore_impl_reduce(self): - inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) - inputys = self.get_parallel_blocks(self.inputy, self.y_stra) - x = Tensor(inputxs[device_id % self.stra_size]) - y = Tensor(inputys[device_id % self.stra_size]) - repeat_num = device_num / self.stra_size - v = self.stra_size * repeat_num * repeat_num * repeat_num - sens = Tensor(1.0 / v, dtype=ms.float32) - net = MatmulAllgather("hccl_world_group") - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, sens) - return input_grad - - def grad_cmp(self): - single_results = self.grad_mindspore_impl_single() - reduce_results = self.grad_mindspore_impl_reduce() - single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] - reduce_result0 = reduce_results[0].asnumpy() - single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] - reduce_result1 = reduce_results[1].asnumpy() - assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) - assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) - - -def test_reduce_grad(): - inputx_shape = (64, 32) - inputy_shape = (32, 64) - fact = MatmulAllgatherFactory(inputx_shape, inputy_shape, (4, 1), (1, 4)) - fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py b/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py deleted file mode 100644 index 6ea30fac2d4..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/hcom/_test_allreduce_4p.py +++ /dev/null @@ -1,175 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class MatmulSingle(Cell): - def __init__(self, transpose_a=False, transpose_b=False): - super(MatmulSingle, self).__init__() - self.matmul1 = P.MatMul(transpose_a, transpose_b) - self.matmul2 = P.MatMul(transpose_a, transpose_b) - self.pow = P.Pow() - self.reduce_sum = P.ReduceSum() - - def construct(self, x, y, z): - out = self.matmul1(x, y) - out = self.matmul2(out, z) - out = self.pow(out, 2.0) - out = self.reduce_sum(out, None) - return out - - -class MatmulReduce(Cell): - def __init__(self, group, transpose_a=False, transpose_b=False): - super(MatmulReduce, self).__init__() - self.matmul1 = P.MatMul(transpose_a, transpose_b) - self.allreduce1 = P.AllReduce(group=group) - self.matmul2 = P.MatMul(transpose_a, transpose_b) - self.pow = P.Pow() - self.reduce_sum = P.ReduceSum() - self.allreduce2 = P.AllReduce(group=group) - - def construct(self, x, y, z): - out = self.matmul1(x, y) - out = self.allreduce1(out) - out = self.matmul2(out, z) - out = self.pow(out, 2.0) - out = self.reduce_sum(out, None) - out = self.allreduce2(out) - return out - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, z, sens): - return grad_all_with_sens(self.network)(x, y, z, sens) - - -class MatmulReduceFactory: - def __init__(self, inputx_shape, inputy_shape, inputz_shape, x_stra, y_stra, z_stra): - self.inputx = self.gen_value(inputx_shape, 10) - self.inputy = self.gen_value(inputy_shape, 20) - self.inputz = self.gen_value(inputz_shape, 30) - self.x_stra = x_stra - self.y_stra = y_stra - self.z_stra = z_stra - stra_size = 1 - for s in x_stra: - stra_size = stra_size * s - self.stra_size = stra_size - - def gen_value(self, input_shape, delta): - size = 1 - for s in input_shape: - size = size * s - number_range = min(100, size) - input_np = np.reshape(np.arange(0, size) % number_range - delta, input_shape).astype(np.float32) - return input_np - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def grad_mindspore_impl_single(self): - x = Tensor(self.inputx) - y = Tensor(self.inputy) - z = Tensor(self.inputz) - sens = Tensor(1.0, dtype=ms.float32) - net = MatmulSingle() - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, z, sens) - return input_grad - - def grad_mindspore_impl_reduce(self): - inputxs = self.get_parallel_blocks(self.inputx, self.x_stra) - inputys = self.get_parallel_blocks(self.inputy, self.y_stra) - inputzs = self.get_parallel_blocks(self.inputz, self.z_stra) - x = Tensor(inputxs[device_id % self.stra_size]) - y = Tensor(inputys[device_id % self.stra_size]) - z = Tensor(inputzs[device_id % self.stra_size]) - repeat_num = device_num / self.stra_size - v = self.stra_size * repeat_num * repeat_num * repeat_num - sens = Tensor(1.0 / v, dtype=ms.float32) - net = MatmulReduce("hccl_world_group") - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, z, sens) - return input_grad - - def grad_cmp(self): - single_results = self.grad_mindspore_impl_single() - reduce_results = self.grad_mindspore_impl_reduce() - single_result0 = self.get_parallel_blocks(single_results[0].asnumpy(), self.x_stra)[device_id % self.stra_size] - reduce_result0 = reduce_results[0].asnumpy() - single_result1 = self.get_parallel_blocks(single_results[1].asnumpy(), self.y_stra)[device_id % self.stra_size] - reduce_result1 = reduce_results[1].asnumpy() - single_result2 = self.get_parallel_blocks(single_results[2].asnumpy(), self.z_stra)[device_id % self.stra_size] - reduce_result2 = reduce_results[2].asnumpy() - assert np.allclose(single_result0, reduce_result0, 0.0001, 0.0001) - assert np.allclose(single_result1, reduce_result1, 0.0001, 0.0001) - assert np.allclose(single_result2, reduce_result2, 0.0001, 0.0001) - - -def test_reduce_grad(): - inputx_shape = (32, 64) - inputy_shape = (64, 64) - inputz_shape = (64, 32) - fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 4), (4, 1), (1, 4)) - fact.grad_cmp() - - -def test_reduce_grad_repeat(): - inputx_shape = (32, 64) - inputy_shape = (64, 64) - inputz_shape = (64, 32) - fact = MatmulReduceFactory(inputx_shape, inputy_shape, inputz_shape, (1, 2), (2, 1), (1, 2)) - fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/hcom/allgather_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/hcom/allgather_4p.sh deleted file mode 100644 index c44a6e1e57a..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/hcom/allgather_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_allgather_4p.py>../../log/test_allgather_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/hcom/allreduce_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/hcom/allreduce_4p.sh deleted file mode 100644 index 93bdddebcca..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/hcom/allreduce_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_allreduce_4p.py>../../log/test_allreduce_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py deleted file mode 100644 index 866efdeba7d..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/l2normalize/_test_l2normalize_parallel_4p.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class L2normalize(Cell): - def __init__(self, axis=0, epsilon=1e-4, strategy0=None, strategy1=None): - super(L2normalize, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.l2norm = P.L2Normalize(axis, epsilon, strategy1) - - def construct(self, x, y): - out = self.add(x, y) - out = self.l2norm(out) - return out - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class L2normalizeFactory: - def __init__(self, input_shape, axis, strategy0, strategy1): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( - np.float32) - target_shape = input_shape - self.target_shape = target_shape - target_size = 1 - for s in target_shape: - target_size = target_size * s - number_range = min(1000, target_size) - self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, - target_shape).astype(np.float32) - self.axis = axis - self.epsilon = 1e-4 - self.strategy0 = strategy0 - self.strategy1 = strategy1 - out_strategy = strategy1[1] - self.out_strategy = out_strategy - need_dev_num0 = 1 - need_dev_num1 = 1 - for s in strategy0[1]: - need_dev_num0 = need_dev_num0 * s - for s in out_strategy: - need_dev_num1 = need_dev_num1 * s - self.x_id = device_id % need_dev_num0 - self.y_id = device_id % need_dev_num0 - self.out_id = device_id % need_dev_num1 - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - net = L2normalize(self.axis, self.epsilon) - out = net(x, y) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - net = L2normalize(self.axis, self.epsilon) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad1 = Tensor(outgrads[self.out_id]) - net = L2normalize(self.axis, self.epsilon, strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], - parallel_inputs_run=[x1, y1, output_grad1]) - return input_grad - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -def test_reid_l2normalize_input_128_512(): - input_shape = (128, 512) - axis = 0 - fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) - fact.forward_cmp() - - -def test_reid_l2normalize_grad_input_128_512(): - input_shape = (128, 512) - axis = 0 - fact = L2normalizeFactory(input_shape, axis, (0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) - fact.grad_cmp() - - -def test_reid_l2normalize_input_128_512_repeat(): - input_shape = (128, 512) - axis = 0 - fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) - fact.forward_cmp() - - -def test_reid_l2normalize_grad_input_128_512_repeat(): - input_shape = (128, 512) - axis = 0 - fact = L2normalizeFactory(input_shape, axis, strategy0=(0, (1, 2), (1, 2)), strategy1=(0, (1, 2))) - fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/l2normalize/l2normalize_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/l2normalize/l2normalize_parallel_4p.sh deleted file mode 100644 index 66bcdf8f200..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/l2normalize/l2normalize_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_l2normalize_parallel_4p.py>../../log/test_l2normalize_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/log/README.MD b/tests/ut/python/parallel/parallel_end_to_end/log/README.MD deleted file mode 100644 index c4d4042ded0..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/log/README.MD +++ /dev/null @@ -1 +0,0 @@ -log files for auto parallel end to end test cases diff --git a/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py deleted file mode 100644 index 6b8288e4bda..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/loss/_test_loss_parallel_4p.py +++ /dev/null @@ -1,195 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class AddRelu(Cell): - def __init__(self, strategy0=None, strategy1=None): - super(AddRelu, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.relu = P.ReLU(strategy=strategy1) - - def construct(self, x, y): - out = self.add(x, y) - out = self.relu(out) - return out - - -class NetWithLoss(Cell): - def __init__(self, network, strategy2=None): - super(NetWithLoss, self).__init__() - self.loss = P.SoftmaxCrossEntropyWithLogits(strategy=strategy2) - self.network = network - - def construct(self, x, y, b): - predict = self.network(x, y) - return self.loss(predict, b)[0] - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, b): - return grad_all(self.network)(x, y, b) - - -class AddReluFactory: - def __init__(self, input_shape, strategy0, strategy1, strategy2): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( - np.float32) - target_shape = input_shape - self.target_shape = target_shape - target_size = 1 - for s in target_shape: - target_size = target_size * s - number_range = min(10, target_size) - self.output_grad_np = np.reshape((np.arange(0, target_size) % number_range) * 0.1, target_shape).astype( - np.float32) - self.strategy0 = strategy0 - self.strategy1 = strategy1 - self.strategy2 = strategy2 - out_strategy = strategy1[1] - self.out_strategy = out_strategy - need_dev_num0 = 1 - need_dev_num1 = 1 - for s in strategy0[1]: - need_dev_num0 = need_dev_num0 * s - for s in out_strategy: - need_dev_num1 = need_dev_num1 * s - self.x_id = device_id % need_dev_num0 - self.y_id = device_id % need_dev_num0 - self.out_id = device_id % need_dev_num1 - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def grad_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - net = AddRelu() - net_with_loss = NetWithLoss(net) - grad_net = Grad(net_with_loss) - grad_net.set_train() - input_grads = [] - for i in range(0, 3): - input_grad = grad_net(x, y, output_grad) - input_grads.append(input_grad) - return input_grads - - def grad_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad1 = Tensor(outgrads[self.out_id]) - net = AddRelu(strategy0=self.strategy0, strategy1=self.strategy1) - net_with_loss = NetWithLoss(net, strategy2=self.strategy2) - grad_net = Grad(net_with_loss) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grads = [] - for i in range(0, 3): - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], - parallel_inputs_run=[x1, y1, output_grad1]) - input_grads.append(input_grad) - return input_grads - - def grad_cmp(self): - input_grad_mindspores = self.grad_mindspore_impl() - input_grad_mindspore_parallels = self.grad_mindspore_parallel_impl() - for i in range(0, len(input_grad_mindspores)): - input_grad_mindspore = input_grad_mindspores[i] - input_grad_mindspore_parallel = input_grad_mindspore_parallels[i] - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single0.npy", - input_grad_blocks_0[self.x_id]) - np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_single1.npy", - input_grad_blocks_1[self.y_id]) - np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", - input_grad_mindspore_parallel0) - np.save(path + str(i) + "_" + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", - input_grad_mindspore_parallel1) - assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -def test_reid_l2normalize_grad_input_128_512(): - input_shape = (128, 512) - fact = AddReluFactory(input_shape, strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (4, 1)), - strategy2=(0, (4, 1), (4, 1))) - fact.grad_cmp() - - -def test_reid_l2normalize_grad_input_128_512_stridesplit(): - input_shape = (128, 512) - fact = AddReluFactory(input_shape, strategy0=(0, (1, 1), (1, 1)), strategy1=(0, (4, 1)), - strategy2=(0, (4, 1), (4, 1))) - fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/loss/loss_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/loss/loss_parallel_4p.sh deleted file mode 100644 index d6e075f8852..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/loss/loss_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_loss_parallel_4p.py>../../log/test_loss_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py deleted file mode 100644 index 37ae0f72b06..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/matmul/_test_matmul_parallel_4p.py +++ /dev/null @@ -1,329 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -from numpy import allclose - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Matmul(Cell): - def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): - super(Matmul, self).__init__() - self.add = P.TensorAdd(strategy=strategy1) - self.matmul = P.MatMul(transpose_a, transpose_b, strategy=strategy0) - - def construct(self, x, w, z): - out = self.add(x, z) - return self.matmul(out, w) - - -class BatchMatMul(Cell): - def __init__(self, transpose_a=False, transpose_b=False, strategy0=None, strategy1=None): - super(BatchMatMul, self).__init__() - self.add = P.TensorAdd(strategy=strategy1) - self.batchmatmul = P.BatchMatMul(transpose_a, transpose_b, strategy=strategy0) - - def construct(self, x, w, z): - out = self.add(x, z) - return self.batchmatmul(out, w) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, inputa, inputb, inputz, output_grad): - gout = grad_all_with_sens(self.network)(inputa, inputb, inputz, output_grad) - return gout - - -class BatchmatmulFactory: - def __init__(self, inputa_shape, inputb_shape, transpose_a, transpose_b, strategy, strategy_): - self.strategy = strategy - self.strategy_ = strategy_ - inputa_size = 1 - inputb_size = 1 - prefix = "" - for s in inputa_shape: - prefix = prefix + str(s) + "_" - inputa_size = inputa_size * s - prefix = prefix + "and" - for s in inputb_shape: - prefix = prefix + str(s) + "_" - inputb_size = inputb_size * s - number_rangea = min(1000, inputa_size) - number_rangeb = min(1000, inputb_size) - self.inputa = np.reshape(np.arange(0, inputa_size) % number_rangea - number_rangea / 2, inputa_shape).astype( - np.float32) - self.inputb = np.reshape(np.arange(0, inputb_size) % number_rangeb - number_rangeb / 2, inputb_shape).astype( - np.float32) - self.inputz = np.zeros(self.inputa.shape).astype(np.float32) - self.transpose_a = transpose_a - self.transpose_b = transpose_b - - out_shape = [] - device_matrix = [] - out_strategy = [] - if transpose_a: - temp = inputa_shape[-1] - inputa_shape[-1] = inputa_shape[-2] - inputa_shape[-2] = temp - if transpose_b: - temp = inputb_shape[-1] - inputb_shape[-1] = inputb_shape[-2] - inputb_shape[-2] = temp - - if len(inputa_shape) >= len(inputb_shape): - out_shape = list(inputa_shape) - out_shape[-1] = inputb_shape[-1] - else: - out_shape = list(inputb_shape) - out_shape[-2] = inputa_shape[-2] - - strategy1 = list(self.strategy[1]) - strategy2 = list(self.strategy[2]) - if transpose_a: - temp = strategy1[-1] - strategy1[-1] = strategy1[-2] - strategy1[-2] = temp - if transpose_b: - temp = strategy2[-1] - strategy2[-1] = strategy2[-2] - strategy2[-2] = temp - - if len(strategy1) >= len(strategy2): - out_strategy = strategy1.copy() - out_strategy[-1] = strategy2[-1] - else: - out_strategy = strategy2.copy() - out_strategy[-2] = strategy1[-2] - device_matrix = out_strategy.copy() - device_matrix.insert(-1, strategy1[-1]) - self.out_strategy = out_strategy - - need_dev_num = 1 - for s in device_matrix: - need_dev_num = need_dev_num * s - self.need_dev_num = need_dev_num - self.device_matrix = device_matrix - - out_size = 1 - for s in out_shape: - out_size = out_size * s - number_range = min(1000, out_size) - self.output_grad_np = np.reshape(np.arange(0, out_size) % number_range - number_range / 2, out_shape).astype( - np.float32) - - device_index = self.id_to_list(device_id % need_dev_num, self.device_matrix) - x_index = device_index[:-1].copy() - if transpose_a: - temp = x_index[-1] - x_index[-1] = x_index[-2] - x_index[-2] = temp - y_index = device_index[:-3].copy() - y_index.append(device_index[-2]) - y_index.append(device_index[-1]) - if transpose_b: - temp = y_index[-1] - y_index[-1] = y_index[-2] - y_index[-2] = temp - - out_index = device_index[:-2].copy() - out_index.append(device_index[-1]) - - print(device_matrix) - print(device_index) - - need_dev_num_ = 1 - for s in strategy_[1]: - need_dev_num_ = need_dev_num_ * s - self.x_id = device_id % need_dev_num_ - self.y_id = self.list_to_id(y_index, self.strategy[2]) - self.out_id = self.list_to_id(out_index, self.out_strategy) - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - - def id_to_list(self, id_, shape): - """ - shape:每一维的上限,如(2,4,8) - """ - result = [] - r = id_ - for i in range(0, len(shape)): - v = 1 - for j in range(i + 1, len(shape)): - v = v * shape[j] - result.append(r // v) - r = r % v - return result - - def list_to_id(self, id_list, shape): - result = 0 - for i in range(0, len(id_list)): - v = 1 - for j in range(i + 1, len(id_list)): - v = v * shape[j] - result = result + id_list[i] * v - return result - - def forward_mindspore_impl(self): - if len(self.inputa.shape) > 2: - matmul = BatchMatMul(self.transpose_a, self.transpose_b) - else: - matmul = Matmul(self.transpose_a, self.transpose_b) - matmul.set_train() - out_me = matmul(Tensor(self.inputa), Tensor(self.inputb), Tensor(self.inputz)) - return out_me.asnumpy() - - def forward_mindspore_parallel_impl(self): - if len(self.inputa.shape) > 2: - matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) - else: - matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - x = Tensor(self.inputa) - y = Tensor(self.inputb) - z = Tensor(self.inputz) - xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) - ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) - zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) - x1 = Tensor(xs[self.x_id]) # - y1 = Tensor(ys[self.y_id]) # 需要从设备矩阵推导 - z1 = Tensor(zs[self.x_id]) - matmul.set_train() - matmul.set_auto_parallel() - out_me = matmul(x, y, z, parallel_inputs_compile=[x, y, z], parallel_inputs_run=[x1, y1, z1]) - return out_me.asnumpy() - - def grad_mindspore_impl(self): - x = Tensor(self.inputa) - y = Tensor(self.inputb) - z = Tensor(self.inputz) - if len(self.inputa.shape) > 2: - matmul = BatchMatMul(self.transpose_a, self.transpose_b) - else: - matmul = Matmul(self.transpose_a, self.transpose_b) - net_me = Grad(matmul) - net_me.set_train() - out_grad_me = Tensor(self.output_grad_np) - out_grad = net_me(x, y, z, out_grad_me) - return out_grad - - def grad_mindspore_parallel_impl(self): - if len(self.inputa.shape) > 2: - matmul = BatchMatMul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) - else: - matmul = Matmul(self.transpose_a, self.transpose_b, strategy0=self.strategy, strategy1=self.strategy_) - x = Tensor(self.inputa) - y = Tensor(self.inputb) - z = Tensor(self.inputz) - out_grad_me = Tensor(self.output_grad_np) - - xs = self.get_parallel_blocks(self.inputa, self.strategy_[1]) - ys = self.get_parallel_blocks(self.inputb, self.strategy[2]) - zs = self.get_parallel_blocks(self.inputz, self.strategy_[1]) - out_grads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - - x1 = Tensor(xs[self.x_id]) # 需要从设备矩阵推导 - y1 = Tensor(ys[self.y_id]) # - z1 = Tensor(zs[self.x_id]) - out_grad1 = Tensor(out_grads[self.out_id]) - net_me = Grad(matmul) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net_me.set_auto_parallel() - net_me.set_train() - - out_grad = net_me(x, y, z, out_grad_me, parallel_inputs_compile=[x, y, z, out_grad1], - parallel_inputs_run=[x1, y1, z1, out_grad1]) - return out_grad - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspores = self.get_parallel_blocks(out_mindspore, self.out_strategy) - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - assert allclose(out_mindspores[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspores0 = self.get_parallel_blocks(input_grad_mindspore[0].asnumpy(), self.strategy_[1]) - input_grad_mindspores1 = self.get_parallel_blocks(input_grad_mindspore[1].asnumpy(), self.strategy[2]) - input_grad_mindspores2 = self.get_parallel_blocks(input_grad_mindspore[2].asnumpy(), self.strategy_[1]) - assert allclose(input_grad_mindspores0[self.x_id], input_grad_mindspore_parallel[0].asnumpy(), 0.0001, 0.0001) - assert allclose(input_grad_mindspores1[self.y_id], input_grad_mindspore_parallel[1].asnumpy(), 0.0001, 0.0001) - assert allclose(input_grad_mindspores2[self.x_id], input_grad_mindspore_parallel[2].asnumpy(), 0.0001, 0.0001) - - -def test_reid_batchmatmul_inputa_128_512_inputb_2000_512(): - inputa = [128, 512] - inputb = [2000, 512] - fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) - fact.forward_cmp() - - -def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512(): - inputa = [128, 512] - inputb = [2000, 512] - fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (2, 2), (1, 2)), (0, (2, 2), (2, 2))) - fact.grad_cmp() - - -def test_reid_batchmatmul_inputa_128_512_inputb_2000_512_redistribution(): - inputa = [128, 512] - inputb = [2000, 512] - fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) - fact.forward_cmp() - - -def test_reid_batchmatmul_grad_inputa_128_512_inputb_2000_512_redistribution(): - inputa = [128, 512] - inputb = [2000, 512] - fact = BatchmatmulFactory(inputa, inputb, False, True, (0, (1, 2), (1, 2)), (0, (2, 2), (2, 2))) - fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/matmul/matmul_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/matmul/matmul_parallel_4p.sh deleted file mode 100644 index 5c58c0a1f2e..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/matmul/matmul_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_matmul_parallel_4p.py >../../log/test_matmul_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py deleted file mode 100644 index d4247f73191..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/max/_test_max_parallel_4p.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, input1, input2, output_grad): - return grad_all_with_sens(self.network)(input1, input2, output_grad) - - -class Max(Cell): - def __init__(self, axis, keep_dims, strategy0=None, strategy1=None): - super(Max, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.reduce_max = P.ReduceMax(keep_dims=keep_dims).set_strategy(strategy=strategy1) - self.axis = axis - - def construct(self, input1, input2): - out = self.add(input1, input2) - return self.reduce_max(out, self.axis) - - -class MaxFactory: - def __init__(self, input_shape, axis, keep_dims, strategy0, strategy1): - self.strategy0 = strategy0 - self.strategy1 = strategy1 - self.axis = axis - self.keep_dims = keep_dims - input_size = 1 - prefix = "" - for s in input_shape: - prefix = prefix + str(s) + "_" - input_size = input_size * s - number_range = min(1000, input_size) - self.input_np1 = np.reshape(np.arange(0, input_size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = self.input_np1.copy() - self.out_grad_np = None - out_shape = list(input_shape) - out_shape.pop(axis) - out_size = input_size / input_shape[axis] - number_range_ = min(1000, out_size) - self.out_grad_np = np.reshape(np.arange(0, out_size) % number_range_ - number_range_ / 2, out_shape).astype( - np.float32) - out_strategy = list(strategy1[1]) - out_strategy.pop(axis) - self.out_strategy = out_strategy - need_dev_num = 1 - need_dev_num_ = 1 - for s in strategy0[1]: - need_dev_num = need_dev_num * s - for s in out_strategy: - need_dev_num_ = need_dev_num_ * s - self.x_id = device_id % need_dev_num - self.y_id = device_id % need_dev_num - self.out_id = device_id % need_dev_num_ - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_mindspore_impl(self): - input1 = Tensor(self.input_np1) - input2 = Tensor(self.input_np2) - net = Max(axis=self.axis, keep_dims=self.keep_dims) - out = net(input1, input2) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(xs[self.x_id]) - y1 = Tensor(ys[self.y_id]) - net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - input1 = Tensor(self.input_np1) - input2 = Tensor(self.input_np2) - out_grad = Tensor(self.out_grad_np) - net = Max(axis=self.axis, keep_dims=self.keep_dims) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(input1, input2, out_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grads = self.get_parallel_blocks(self.out_grad_np, self.out_strategy) - out_grad = Tensor(output_grads[self.out_id]) - xs = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - ys = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(xs[self.x_id]) - y1 = Tensor(ys[self.y_id]) - net = Max(axis=self.axis, keep_dims=self.keep_dims, strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grad = grad_net(x, y, out_grad, parallel_inputs_compile=[x, y, out_grad], - parallel_inputs_run=[x1, y1, out_grad]) - return input_grad - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - print(out_mindspore) - print(out_mindspore_parallel) - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert np.allclose(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -def test_reid_max_forward_input_256_64(): - fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), - strategy1=(0, (4, 1))) - fact.forward_cmp() - - -def test_reid_max_grad_input_256_64(): - fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (4, 1), (4, 1)), - strategy1=(0, (4, 1))) - fact.grad_cmp() - - -def test_reid_max_forward_input_128_64_32_32(): - fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), - strategy1=(0, (2, 1, 2, 1))) - fact.forward_cmp() - - -def test_reid_max_grad_input_128_64_32_32(): - fact = MaxFactory(input_shape=(128, 64, 32, 32), axis=3, keep_dims=False, strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), - strategy1=(0, (2, 1, 2, 1))) - fact.grad_cmp() - - -def test_reid_max_forward_input_256_64_repeat(): - fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), - strategy1=(0, (2, 1))) - fact.forward_cmp() - - -def test_reid_max_grad_input_256_64_repeat(): - fact = MaxFactory(input_shape=(256, 64), axis=1, keep_dims=False, strategy0=(0, (2, 1), (2, 1)), - strategy1=(0, (2, 1))) - fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/max/max_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/max/max_parallel_4p.sh deleted file mode 100644 index bf44b717ccb..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/max/max_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_max_parallel_4p.py>../../log/test_max_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/mul_activation_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/mul_activation_parallel_4p.sh deleted file mode 100644 index af28b9c710e..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/mul_activation_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_mul_softmax_parallel_4p.py>../../log/test_mul_softmax_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py deleted file mode 100644 index 19bf73f38ab..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/mul_softmax/need_fix_test_mul_softmax_parallel_4p.py +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import pytest - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class MulSoftmax(Cell): - def __init__(self, strategy0=None, strategy1=None, axis=0): - super(MulSoftmax, self).__init__() - self.mul = P.Mul(strategy=strategy0) - self.softmax = P.Softmax(axis=axis, strategy=strategy1) - - def construct(self, x, z): - out = self.mul(x, z) - return self.softmax(out) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class MulSoftmaxFactory: - def __init__(self, input_shape, strategy0, strategy1): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = 1.0 - self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, - input_shape).astype(np.float32) - self.strategy0 = strategy0 - self.strategy1 = strategy1 - need_dev_num = 1 - need_dev_num_ = 1 - for s in strategy0[1]: - need_dev_num = need_dev_num * s - for s in strategy1[1]: - need_dev_num_ = need_dev_num_ * s - self.x_id = device_id % need_dev_num - self.y_id = device_id % need_dev_num - self.out_id = device_id % need_dev_num_ - - def forward_mindspore_impl(self): - net = MulSoftmax() - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - out = net(x, y) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(self.input_np2, ms.float32) - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - output_grad = Tensor(self.output_grad_np) - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - net = MulSoftmax() - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy1[1]) - output_grad = Tensor(output_grads[self.out_id]) - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - net = MulSoftmax(strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_train() - grad_net.set_auto_parallel() - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(self.input_np2, ms.float32) - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad], - parallel_inputs_run=[x1, y1, output_grad]) - return input_grad - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - np.save(path + str(device_id) + "_" + self.prefix + "_forward_parallel.npy", out_mindspore_parallel) - out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy1[1]) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel0.npy", input_grad_mindspore_parallel0) - np.save(path + str(device_id) + "_" + self.prefix + "_grad_parallel1.npy", input_grad_mindspore_parallel1) - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, - self.strategy0[1]) # 这里由于TensorMul两个输入X1没做广播,X2做了广播 - assert np.allclose(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert np.allclose(input_grad_mindspore1, input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -@pytest.mark.reid_forward -def test_reid_mul_softmax_input_128x64(): - stra0 = (0, (1, 4), ()) - stra1 = (0, (1, 4)) - fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) - fact.forward_cmp() - - -@pytest.mark.reid_grad -def test_reid_grad_mul_softmax_input_128x64(): - stra0 = (0, (1, 4), ()) - stra1 = (0, (1, 4)) - fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) - fact.grad_cmp() - - -@pytest.mark.reid_forward -def test_reid_mul_softmax_input_128x64_all_to_all(): - stra0 = (0, (4, 1), ()) - stra1 = (0, (1, 4)) - fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) - fact.forward_cmp() - - -@pytest.mark.reid_grad -def test_reid_grad_mul_softmax_input_128x64_all_to_all(): - stra0 = (0, (4, 1), ()) - stra1 = (0, (1, 4)) - fact = MulSoftmaxFactory(input_shape=(128, 64), strategy0=stra0, strategy1=stra1) - fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py deleted file mode 100644 index 0648d769abc..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/onehot/_test_onehot_parallel_4p.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Onehot(Cell): - def __init__(self, axis=-1, depth=1, on_value=1.0, off_value=0.0, strategy=None): - super(Onehot, self).__init__() - self.onehot = P.OneHot(axis, strategy=strategy) - self.depth = depth - self.on_value = Tensor(on_value, ms.float32) - self.off_value = Tensor(off_value, ms.float32) - - def construct(self, indices): - return self.onehot(indices, self.depth, self.on_value, self.off_value) - - -class OneHotFactory: - def __init__(self, input_shape, depth, on_value=1.0, off_value=0.0, axis=None, dtype=None, strategy0=None): - size = 1 - prefix = "" - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(10, size) - self.input_np = np.reshape(np.arange(0, size) % number_range, input_shape).astype(np.int32) - self.depth = depth - self.on_value = on_value - self.off_value = off_value - self.axis = axis - self.dtype = dtype - self.strategy0 = strategy0 - need_dev_num = 1 - for s in strategy0[1]: - need_dev_num = need_dev_num * s - self.x_id = device_id % need_dev_num - self.out_id = device_id % need_dev_num - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def grad_mindspore_impl(self): - output_grad = Tensor(self.output_grad_np) - x = Tensor(self.input_np1) - y = Tensor(self.input_np2, ms.float32) - net = AddRelu() - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def forward_mindspore_impl(self): - indices = Tensor(self.input_np) - net = Onehot(axis=self.axis, - depth=self.depth, - on_value=self.on_value, - off_value=self.off_value) - out = net(indices) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np) - inputs_x = self.get_parallel_blocks(self.input_np, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - net = Onehot(axis=self.axis, - depth=self.depth, - on_value=self.on_value, - off_value=self.off_value, strategy=self.strategy0) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, parallel_inputs_compile=[x], parallel_inputs_run=[x1]) - return out.asnumpy() - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy0[1]) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.0001) - - -def test_reid_onehot_forward_int32_128_depth13000(): - fact = OneHotFactory(input_shape=(128,), - depth=131072, - on_value=1.000000, - off_value=0.000000, - axis=-1, - dtype="float32", - strategy0=(0, (2,))) - fact.forward_cmp() - - -def test_reid_onehot_forward_int32_131072_depth127(): - fact = OneHotFactory(input_shape=(131072,), - depth=127, - on_value=1.000000, - off_value=0.000000, - axis=-1, - dtype="float32", - strategy0=(0, (4,))) - fact.forward_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/onehot/onehot_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/onehot/onehot_parallel_4p.sh deleted file mode 100644 index ba68b342485..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/onehot/onehot_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_onehot_parallel_4p.py>../../log/test_onehot_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py deleted file mode 100644 index f0a45111bc4..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/prelu/_test_prelu_parallel_4p.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import pytest - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class PReLU(Cell): - def __init__(self, channel=1, w=0.25, strategy_=None, strategy1_=None): - super(PReLU, self).__init__() - self.add = P.TensorAdd(strategy=strategy1_) - self.prelu = P.PReLU(strategy=strategy_) - self.channel = channel - - def construct(self, x, z, w): - out = self.add(x, z) - return self.prelu(out, w) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, input_, z, w, output_grad): - return grad_all_with_sens(self.network)(input_, z, w, output_grad) - - -class PReLUFactory: - def __init__(self, input_shape, strategy): - n, c = input_shape[:2] - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype(np.float32) - self.output_grad_np = np.reshape((np.arange(0, size) % (number_range - 10) - number_range / 2) * 0.1, - input_shape).astype(np.float32) - self.channel = c - self.weight = np.array([np.float32(0.25)] * c) - self.strategy = strategy - - def forward_mindspore_impl(self): - net = PReLU(channel=self.channel, w=self.weight) - x = Tensor(self.input_np) - z = Tensor(np.zeros(self.input_np.shape), ms.float32) - w = Tensor(self.weight) - out = net(x, z, w) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, - strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - x = Tensor(self.input_np) - z = Tensor(np.zeros(self.input_np.shape), ms.float32) - w = Tensor(self.weight) - - inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) - block_id = device_id % len(inputs) - x1 = Tensor(inputs[block_id]) - z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) - w1 = Tensor(self.weight) - - out = net(x, z, w, parallel_inputs_compile=[x, z, w], parallel_inputs_run=[x1, z1, w1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - output_grad = Tensor(self.output_grad_np) - x = Tensor(self.input_np) - z = Tensor(np.zeros(self.input_np.shape), ms.float32) - w = Tensor(self.weight) - - net = PReLU(channel=self.channel, w=self.weight) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, z, w, output_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - output_grads = self.get_parallel_blocks(self.output_grad_np, self.strategy[1]) - block_id = device_id % len(output_grads) - output_grad = Tensor(output_grads[block_id]) - x = Tensor(self.input_np) - z = Tensor(np.zeros(self.input_np.shape), ms.float32) - w = Tensor(self.weight) - - net = PReLU(channel=self.channel, w=self.weight, strategy_=self.strategy, - strategy1_=(self.strategy[0], self.strategy[1], self.strategy[1])) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - - grad_net.set_train() - inputs = self.get_parallel_blocks(self.input_np, self.strategy[1]) - x1 = Tensor(inputs[block_id]) - z1 = Tensor(np.zeros(inputs[block_id].shape), ms.float32) - w1 = Tensor(self.weight) - - input_grad = grad_net(x, z, w, output_grad, parallel_inputs_compile=[x, z, w, output_grad], - parallel_inputs_run=[x1, z1, w1, output_grad]) - return input_grad - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.strategy[1]) - block_id = device_id % len(out_blocks) - assert np.allclose(out_blocks[block_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore2 = input_grad_mindspore[2].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_mindspore_parallel2 = input_grad_mindspore_parallel[2].asnumpy() - input_grad_blocks = self.get_parallel_blocks(input_grad_mindspore0, self.strategy[1]) - input1_grad_blocks = self.get_parallel_blocks(input_grad_mindspore1, self.strategy[1]) - block_id = device_id % len(input_grad_blocks) - assert np.allclose(input_grad_blocks[block_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert np.allclose(input_grad_mindspore2, input_grad_mindspore_parallel2, 0.0001, 0.0001) - assert np.allclose(input1_grad_blocks[block_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -@pytest.mark.reid_grad -def test_reid_prelu_input_128x64x112x112_repeat(): - stra = (0, (1, 1, 2, 1), (1)) - fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) - fact.forward_cmp() - - -@pytest.mark.reid_grad -def test_reid_grad_prelu_input_128x64x112x112_repeat(): - stra = (0, (1, 1, 2, 1), (1)) - fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) - fact.grad_cmp() - - -@pytest.mark.reid_grad -def test_reid_prelu_input_128x64x112x112_mix(): - stra = (0, (2, 1, 1, 2), (1)) - fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) - fact.forward_cmp() - - -@pytest.mark.reid_grad -def test_reid_grad_prelu_input_128x64x112x112_mix(): - stra = (0, (2, 1, 1, 2), (1)) - fact = PReLUFactory(input_shape=(128, 64, 112, 112), strategy=stra) - fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/prelu/prelu_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/prelu/prelu_parallel_4p.sh deleted file mode 100644 index f58d3735d8c..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/prelu/prelu_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_prelu_parallel_4p.py >../../log/test_prelu_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py deleted file mode 100644 index 24a3227da78..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/reducemean/_test_reducemean_parallel_4p.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -from numpy import allclose as allclose_nparray - -import mindspore as ms -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class GradScalar(Cell): - def __init__(self, network): - super(GradScalar, self).__init__() - self.network = network - self.sens = Tensor([1.0], dtype=ms.float32) - - def construct(self, x, y): - return grad_all_with_sens(self.network)(x, y, self.sens) - - -class ReduceMean(Cell): - def __init__(self, keep_dims, axis, strategy0=None, strategy1=None): - super(ReduceMean, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.reduce_mean = P.ReduceMean(keep_dims=keep_dims).set_strategy(strategy=strategy1) - self.axis = axis - - def construct(self, x, y): - out = self.add(x, y) - return self.reduce_mean(out, self.axis) - - -class ReduceMeanFactory: - def __init__(self, input_shape, keep_dims, axis, strategy0=None, strategy1=None): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( - np.float32) - self.keep_dims = keep_dims - self.axis = axis - target_shape = self.input_np1.mean(axis=axis, keepdims=keep_dims).shape - target_size = 1 - for s in target_shape: - target_size = target_size * s - number_range = min(1000, target_size) - self.output_grad_np = np.array([1.0], dtype=np.float32) - if len(target_shape) > 0: - self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range, target_shape).astype( - np.float32) + 1.0 - self.shape = target_shape - self.strategy0 = strategy0 - self.strategy1 = strategy1 - out_strategy = [] - axis_ = list(axis) - if axis_[0] == -1: - axis_[0] = len(input_shape) - 1 - for i in range(0, len(input_shape)): - if i in axis_: - if keep_dims: - out_strategy.append(1) - else: - out_strategy.append(strategy1[1][i]) - self.out_strategy = out_strategy - need_dev_num0 = 1 - need_dev_num1 = 1 - for s in strategy0[1]: - need_dev_num0 = need_dev_num0 * s - for s in out_strategy: - need_dev_num1 = need_dev_num1 * s - self.x_id = device_id % need_dev_num0 - self.y_id = device_id % need_dev_num0 - block_id = device_id % need_dev_num0 - device_index = self.id_to_list(block_id, self.strategy1[1]) - print(device_index) - for i in axis: - device_index[i] = 0 - print(device_index) - self.out_id = self.list_to_id(device_index, self.out_strategy) - print(self.out_id) - - def id_to_list(self, id_, shape): - result = [] - r = id_ - for i in range(0, len(shape)): - v = 1 - for j in range(i + 1, len(shape)): - v = v * shape[j] - result.append(r // v) - r = r % v - return result - - def list_to_id(self, id_list, shape): - result = 0 - for i in range(0, len(id_list)): - v = 1 - for j in range(i + 1, len(id_list)): - v = v * shape[j] - result = result + id_list[i] * v - return result - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) - out = net(x, y) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - out_grad = Tensor(self.output_grad_np) - net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, out_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad1 = Tensor(outgrads[self.out_id]) - net = ReduceMean(keep_dims=self.keep_dims, axis=self.axis, strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], - parallel_inputs_run=[x1, y1, output_grad1]) - return input_grad - - def forward_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -def test_reid_reducemean_input_64x16(): - fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), - strategy1=(0, (4,))) - fact.forward_cmp() - - -def test_grad_reid_reducemean_input_64x16(): - fact = ReduceMeanFactory(input_shape=(64 * 16,), keep_dims=False, axis=(-1,), strategy0=(0, (4,), (4,)), - strategy1=(0, (4,))) - fact.grad_cmp() - - -def test_reid_reducemean_input_64x128x28x28(): - fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), - strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) - fact.forward_cmp() - - -def test_grad_reid_reducemean_input_64x128x28x28(): - fact = ReduceMeanFactory(input_shape=(64, 128, 32, 32), keep_dims=True, axis=(2, 3), - strategy0=(0, (2, 1, 2, 1), (2, 1, 2, 1)), strategy1=(0, (2, 1, 2, 1))) - fact.grad_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/reducemean/reducemean_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/reducemean/reducemean_parallel_4p.sh deleted file mode 100644 index b78a5c2b6f8..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/reducemean/reducemean_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_reducemean_parallel_4p.py>../../log/test_reducemean_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py deleted file mode 100644 index cbfdd511d7e..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/reshape/_test_reshape_parallel_4p.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import pytest -from numpy import allclose as allclose_nparray - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class Reshape(Cell): - def __init__(self, target_shape, strategy0=None, strategy1=None): - super(Reshape, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.reshape = P.Reshape(strategy=strategy1) - self.shape = tuple(target_shape) - - def construct(self, input1, input2): - x = self.add(input1, input2) - return self.reshape(x, self.shape) - - -class ReshapeFactory: - def __init__(self, input_shape, target_shape, strategy0, strategy1): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( - np.float32) - target_size = 1 - for s in target_shape: - target_size = target_size * s - number_range = min(1000, target_size) - self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, - target_shape).astype(np.float32) - self.target_shape = target_shape - self.strategy0 = strategy0 - self.strategy1 = strategy1 - out_strategy = [1] * len(target_shape) - out_strategy[0] = strategy1[1][0] - self.out_strategy = out_strategy - - need_dev_num0 = 1 - need_dev_num1 = 1 - for s in strategy0[1]: - need_dev_num0 = need_dev_num0 * s - for s in out_strategy: - need_dev_num1 = need_dev_num1 * s - self.x_id = device_id % need_dev_num0 - self.y_id = device_id % need_dev_num0 - self.out_id = device_id % need_dev_num1 - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def forward_reshape_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - net = Reshape(self.target_shape) - out = net(x, y) - return out.asnumpy() - - def forward_reshape_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_reshape_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - net = Reshape(self.target_shape) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def grad_reshape_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad1 = Tensor(outgrads[self.out_id]) - net = Reshape(self.target_shape, strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], - parallel_inputs_run=[x1, y1, output_grad1]) - return input_grad - - def forward_reshape_cmp(self): - out_mindspore = self.forward_reshape_mindspore_impl() - out_mindspore_parallel = self.forward_reshape_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_reshape_cmp(self): - input_grad_mindspore = self.grad_reshape_mindspore_impl() - input_grad_mindspore_parallel = self.grad_reshape_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -@pytest.mark.reid_forward -def test_reid_reshape_input_128x512x7x7_target_128x25088(): - fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), - strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) - fact.forward_reshape_cmp() - - -def test_reid_reshape_grad_input_128x512x7x7_target_128x25088(): - fact = ReshapeFactory(input_shape=(128, 512, 7, 7), target_shape=(128, 25088), - strategy0=(0, (4, 1, 1, 1), (4, 1, 1, 1)), strategy1=(0, (4, 1, 1, 1))) - fact.grad_reshape_cmp() - - -@pytest.mark.reid_forward -def test_reid_reshape_input_128x64_target_128x64x1x1(): - fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), - strategy1=(0, (2, 1))) - fact.forward_reshape_cmp() - - -@pytest.mark.reid_grad -def test_reid_reshape_grad_input_128x64_target_128x64x1x1(): - fact = ReshapeFactory(input_shape=(128, 64), target_shape=(128, 64, 1, 1), strategy0=(0, (2, 1), (2, 1)), - strategy1=(0, (2, 1))) - fact.grad_reshape_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/reshape/reshape_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/reshape/reshape_parallel_4p.sh deleted file mode 100644 index 9561e9525e4..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/reshape/reshape_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_reshape_parallel_4p.py>../../log/test_reshape_parallel_4p_log$i.log 2>&1 & - cd .. -done diff --git a/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py b/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py deleted file mode 100644 index 6a6fe1a79e8..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/transpose/_test_transpose_parallel_4p.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -from numpy import allclose as allclose_nparray - -import mindspore.communication.management as distributedTool -from mindspore import context -from mindspore.common.tensor import Tensor -from mindspore.nn import Cell -from mindspore.ops import operations as P -from mindspore.ops.composite import grad_all_with_sens - -device_num = 4 -device_id = int(os.environ["RANK_ID"]) -path = "./output/" - - -def setup_module(): - print("~~~~~~~~~~~set up~~~~~~~~~~~~~") - context.set_context(mode=context.GRAPH_MODE) - context.set_auto_parallel_context(device_num=device_num, global_rank=device_id) - distributedTool.init() - distributedTool.create_group("0-3", [0, 1, 2, 3]) - print("~~~~~~~~~~~set up finished~~~~~~~~~~~~~") - - -def teardown_module(): - print("~~~~~~~~~~~~tear down~~~~~~~~~~") - - -class Net(Cell): - def __init__(self, perm_in, strategy0=None, strategy1=None): - super(Net, self).__init__() - self.add = P.TensorAdd(strategy=strategy0) - self.transpose = P.Transpose(strategy=strategy1) - self.perm_in = perm_in - - def construct(self, x, y): - out = self.add(x, y) - return self.transpose(out, self.perm_in) - - -class Grad(Cell): - def __init__(self, network): - super(Grad, self).__init__() - self.network = network - - def construct(self, x, y, output_grad): - return grad_all_with_sens(self.network)(x, y, output_grad) - - -class TransposeFactory: - def __init__(self, input_shape, perm_in, strategy0, strategy1): - prefix = "" - size = 1 - for s in input_shape: - prefix = prefix + str(s) - size = size * s - self.prefix = prefix - number_range = min(1000, size) - self.input_np1 = np.reshape(np.arange(0, size) % number_range - number_range / 2, input_shape).astype( - np.float32) - self.input_np2 = np.reshape(np.arange(0, size) % number_range - number_range / 4, input_shape).astype( - np.float32) - target_shape = self.input_np1.transpose(perm_in).shape - target_size = 1 - for s in target_shape: - target_size = target_size * s - number_range = min(1000, target_size) - self.target_shape = target_shape - self.output_grad_np = np.reshape(np.arange(0, target_size) % number_range - number_range / 2, - target_shape).astype(np.float32) - self.perm_in = perm_in - self.strategy0 = strategy0 - self.strategy1 = strategy1 - out_strategy = [] - for i in perm_in: - out_strategy.append(strategy1[1][i]) - self.out_strategy = out_strategy - need_dev_num0 = 1 - need_dev_num1 = 1 - for s in strategy0[1]: - need_dev_num0 = need_dev_num0 * s - for s in out_strategy: - need_dev_num1 = need_dev_num1 * s - self.x_id = device_id % need_dev_num0 - self.y_id = device_id % need_dev_num0 - device_index = self.id_to_list(device_id % need_dev_num1, - self.strategy1[1]) # encoding to get the index before transpose - device_index_transpose = [] - for i in perm_in: - device_index_transpose.append(device_index[i]) - self.out_id = self.list_to_id(device_index_transpose, self.out_strategy) - - def get_parallel_blocks(self, input_, strategy): - blocks = [input_] - i = 0 - for stra in strategy: - temp = [] - while len(blocks) > 0: - block = blocks.pop(0) - temp.extend(np.split(block, stra, axis=i)) - blocks.extend(temp) - i += 1 - return blocks - - def id_to_list(self, id_, shape): - result = [] - r = id_ - for i in range(0, len(shape)): - v = 1 - for j in range(i + 1, len(shape)): - v = v * shape[j] - result.append(r // v) - r = r % v - return result - - def list_to_id(self, id_list, shape): - result = 0 - for i in range(0, len(id_list)): - v = 1 - for j in range(i + 1, len(id_list)): - v = v * shape[j] - result = result + id_list[i] * v - return result - - def forward_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - net = Net(self.perm_in) - out = net(x, y) - return out.asnumpy() - - def forward_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - net.set_auto_parallel() - out = net(x, y, parallel_inputs_compile=[x, y], parallel_inputs_run=[x1, y1]) - return out.asnumpy() - - def grad_mindspore_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - net = Net(self.perm_in) - grad_net = Grad(net) - grad_net.set_train() - input_grad = grad_net(x, y, output_grad) - return input_grad - - def grad_mindspore_parallel_impl(self): - x = Tensor(self.input_np1) - y = Tensor(self.input_np2) - output_grad = Tensor(self.output_grad_np) - inputs_x = self.get_parallel_blocks(self.input_np1, self.strategy0[1]) - inputs_y = self.get_parallel_blocks(self.input_np2, self.strategy0[1]) - outgrads = self.get_parallel_blocks(self.output_grad_np, self.out_strategy) - x1 = Tensor(inputs_x[self.x_id]) - y1 = Tensor(inputs_y[self.y_id]) - output_grad1 = Tensor(outgrads[self.out_id]) - net = Net(self.perm_in, strategy0=self.strategy0, strategy1=self.strategy1) - grad_net = Grad(net) - context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") - grad_net.set_auto_parallel() - grad_net.set_train() - input_grad = grad_net(x, y, output_grad, parallel_inputs_compile=[x, y, output_grad1], - parallel_inputs_run=[x1, y1, output_grad1]) - return input_grad - - def forward_transpose_cmp(self): - out_mindspore = self.forward_mindspore_impl() - out_mindspore_parallel = self.forward_mindspore_parallel_impl() - out_blocks = self.get_parallel_blocks(out_mindspore, self.out_strategy) - assert np.allclose(out_blocks[self.out_id], out_mindspore_parallel, 0.0001, 0.001) - - def grad_transpose_cmp(self): - input_grad_mindspore = self.grad_mindspore_impl() - input_grad_mindspore_parallel = self.grad_mindspore_parallel_impl() - input_grad_mindspore0 = input_grad_mindspore[0].asnumpy() - input_grad_mindspore1 = input_grad_mindspore[1].asnumpy() - input_grad_mindspore_parallel0 = input_grad_mindspore_parallel[0].asnumpy() - input_grad_mindspore_parallel1 = input_grad_mindspore_parallel[1].asnumpy() - input_grad_blocks_0 = self.get_parallel_blocks(input_grad_mindspore0, self.strategy0[1]) - input_grad_blocks_1 = self.get_parallel_blocks(input_grad_mindspore1, self.strategy0[2]) - assert allclose_nparray(input_grad_blocks_0[self.x_id], input_grad_mindspore_parallel0, 0.0001, 0.0001) - assert allclose_nparray(input_grad_blocks_1[self.y_id], input_grad_mindspore_parallel1, 0.0001, 0.0001) - - -def test_reid_transpose_input_256x512_output_512x256_perm_1x0(): - fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) - fact.forward_transpose_cmp() - - -def test_reid_grad_transpose_input_256x512_output_512x256_perm_1x0(): - fact = TransposeFactory((256, 512), (1, 0), strategy0=(0, (2, 2), (2, 2)), strategy1=(0, (2, 2))) - fact.grad_transpose_cmp() - - -def test_reid_transpose_input_512x256_output_256x512_perm_1x0(): - fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) - fact.forward_transpose_cmp() - - -def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0(): - fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (4, 1), (4, 1)), strategy1=(0, (1, 4))) - fact.grad_transpose_cmp() - - -def test_reid_transpose_input_512x256_output_256x512_perm_1x0_repeat(): - fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) - fact.forward_transpose_cmp() - - -def test_reid_grad_transpose_input_512x256_output_256x512_perm_1x0_repeat(): - fact = TransposeFactory((512, 256), (1, 0), strategy0=(0, (2, 1), (2, 1)), strategy1=(0, (2, 1))) - fact.grad_transpose_cmp() diff --git a/tests/ut/python/parallel/parallel_end_to_end/transpose/transpose_parallel_4p.sh b/tests/ut/python/parallel/parallel_end_to_end/transpose/transpose_parallel_4p.sh deleted file mode 100644 index ac4962b8884..00000000000 --- a/tests/ut/python/parallel/parallel_end_to_end/transpose/transpose_parallel_4p.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright 2019 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -for((i=0;i<4;i++)); -do - rm -rf device$i - mkdir device$i - cd device$i - mkdir output - source ../../dist_env_4p.sh $i - env >log$i.log - pytest -s ../test_transpose_parallel_4p.py>../../log/test_transpose_parallel_4p_log$i.log 2>&1 & - cd .. -done