Add st for dump

2022-04-06 17:33:09 -04:00 · 2022-04-06 17:33:09 -04:00 · 67c98923d5
parent e84f16ac7d
commit 67c98923d5
4 changed files with 242 additions and 31 deletions
--- a/tests/st/dump/dump_test_utils.py
+++ b/tests/st/dump/dump_test_utils.py
@ -1,4 +1,4 @@
-# Copyright 2021 Huawei Technologies Co., Ltd
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -106,7 +106,7 @@ def generate_dump_json(dump_path, json_file_name, test_key):
    if test_key == "test_async_dump":
        data = async_dump_dict
        data["common_dump_settings"]["path"] = dump_path
-    elif test_key == "test_e2e_dump":
+    elif test_key in ("test_e2e_dump", "test_e2e_dump_trans_false"):
        data = e2e_dump_dict
        data["common_dump_settings"]["path"] = dump_path
    elif test_key == "test_async_dump_net_multi_layer_mode1":
@ -126,6 +126,10 @@ def generate_dump_json(dump_path, json_file_name, test_key):
        data = async_dump_dict
        data["common_dump_settings"]["path"] = dump_path
        data["common_dump_settings"]["file_format"] = "bin"
    elif test_key == "test_e2e_dump_trans_true":
        data = e2e_dump_dict
        data["common_dump_settings"]["path"] = dump_path
        data["e2e_dump_settings"]["trans_flag"] = True
    else:
        raise ValueError(
            "Failed to generate dump json file. The test name value " + test_key + " is invalid.")
--- a/tests/st/dump/test_data_dump.py
+++ b/tests/st/dump/test_data_dump.py
@ -1,4 +1,4 @@
-# Copyright 2020-2021 Huawei Technologies Co., Ltd
+# Copyright 2020-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -611,6 +611,23 @@ def test_ascend_full_dump():
    run_saved_data_dump_test('test_async_dump', 'full')
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
 def test_ascend_full_dump_kernel_by_kernel():
    """
    Feature: Ascend Full Dump in kernel-by-kernel (MindRT) mode
    Description: Test Ascend full dump
    Expectation: Tensors are stored in npy files and their statistics stored in statistic.csv
    """
    os.environ['GRAPH_OP_RUN'] = "1"
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    run_saved_data_dump_test('test_async_dump', 'full')
    del os.environ['GRAPH_OP_RUN']
@constexpr
 def construct_tensor(cst):
    return Tensor(np.array(cst))
--- a/tests/st/dump/test_dump_format.py
+++ b/tests/st/dump/test_dump_format.py
@ -0,0 +1,159 @@
 # Copyright 2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 import os
 import sys
 import tempfile
 import glob
 import shutil
 import pytest
 import numpy as np
 import mindspore as ms
 import mindspore.context as context
 import mindspore.nn as nn
 import mindspore.ops as ops
 from mindspore import Tensor
 from dump_test_utils import generate_dump_json, check_dump_structure
 from tests.security_utils import security_off_wrap
 class ConvNet(nn.Cell):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv2 = ops.Conv2D(out_channel=3, kernel_size=1)
    def construct(self, x, weight):
        return self.conv2(x, weight)
 def run_trans_flag(test_name):
    if sys.platform != 'linux':
        return
    with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
        dump_path = os.path.join(tmp_dir, test_name)
        dump_config_path = os.path.join(tmp_dir, '{}.json'.format(test_name))
        generate_dump_json(dump_path, dump_config_path, test_name)
        os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
        if os.path.isdir(dump_path):
            shutil.rmtree(dump_path)
        net = ConvNet()
        tensor = Tensor(np.ones([1, 3, 3, 3]), ms.float32)
        weight = Tensor(np.ones([3, 3, 1, 1]), ms.float32)
        expect = net(tensor, weight)
        check_dump_structure(dump_path, dump_config_path, 1, 1, 1)
        dump_data_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
        assert os.path.exists(dump_data_path)
        if test_name == "test_e2e_dump_trans_true":
            # tensor data in host format.
            output_name = "Conv2D.Conv2D-op*.0.0.*.output.0.DefaultFormat.npy"
            output_path = glob.glob(os.path.join(dump_data_path, output_name))[0]
            real_path = os.path.realpath(output_path)
            output = np.load(real_path)
            assert output.shape == (1, 3, 3, 3)
            assert np.array_equal(output, expect)
        elif test_name == "test_e2e_dump_trans_false":
            # tensor data in device format.
            output_name = "Conv2D.Conv2D-op*.0.0.*.output.0.NC1HWC0.npy"
            output_path = glob.glob(os.path.join(dump_data_path, output_name))[0]
            real_path = os.path.realpath(output_path)
            output = np.load(real_path)
            assert output.shape == (1, 1, 3, 3, 16)
        else:
            # tensor data in host format.
            output_name = "Conv2D.Conv2D-op*.*.*.*.output.0.NCHW.npy"
            output_path = glob.glob(os.path.join(dump_data_path, output_name))[0]
            real_path = os.path.realpath(output_path)
            output = np.load(real_path)
            assert output.shape == (1, 3, 3, 3)
            assert np.array_equal(output, expect)
        del os.environ['MINDSPORE_DUMP_CONFIG']
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
 def test_ascend_e2e_trans_true():
    """
    Feature: Ascend e2e dump.
    Description: Test e2e dump in Ascend with trans_flag is configured to true.
    Expectation: Dump files has tensor data in host format (4 dimensions).
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    run_trans_flag("test_e2e_dump_trans_true")
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
 def test_ascend_e2e_trans_false():
    """
    Feature: Ascend e2e dump.
    Description: Test e2e dump in Ascend with trans_flag is configured to false.
    Expectation: Dump files has tensor data in device format.
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    run_trans_flag("test_e2e_dump_trans_false")
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
 def test_ascend_kernel_by_kernel_trans_true():
    """
    Feature: Ascend kernel by kernel dump.
    Description: Test kernel by kernel dump in Ascend with trans_flag is configured to true.
    Expectation: Dump files has tensor data in host format (4 dimensions).
    """
    os.environ['GRAPH_OP_RUN'] = "1"
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    run_trans_flag("test_e2e_dump_trans_true")
    del os.environ['GRAPH_OP_RUN']
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
 def test_ascend_kernel_by_kernel_trans_false():
    """
    Feature: Ascend kernel by kernel dump.
    Description: Test kernel by kernel dump in Ascend with trans_flag is configured to false.
    Expectation: Dump files has tensor data in device format.
    """
    os.environ['GRAPH_OP_RUN'] = "1"
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    run_trans_flag("test_e2e_dump_trans_false")
    del os.environ['GRAPH_OP_RUN']
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
 def test_ascend_a_plus_m_conversion():
    """
    Feature: Ascend A+M dump.
    Description: Test A+M dump in Ascend and check the format of the dump data.
    Expectation: Dump files has tensor data in host format (4 dimensions).
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    run_trans_flag("test_async_dump_npy")
--- a/tests/st/dump/test_multi_root_graph_dump.py
+++ b/tests/st/dump/test_multi_root_graph_dump.py
@ -1,4 +1,4 @@
-# Copyright 2021 Huawei Technologies Co., Ltd
+# Copyright 2021-2022 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -43,8 +43,12 @@ class NewAddNet(Cell):
        super(NewAddNet, self).__init__()
        self.add = P.AddN()
-    def construct(self, x, y):
+    def construct(self, b1, b2, x, y):
        z = self.add([x, y, y])
        if b1 < b2:
            z = self.add([x, y, y])
        else:
            z = self.add([x, x, y])
        return z
@ -52,13 +56,29 @@ def train_addnet(epoch):
    net = AddNet()
    net2 = NewAddNet()
    output_list = []
    b1 = Tensor(np.array(1).astype(np.float32))
    b2 = Tensor(np.array(3).astype(np.float32))
    input_x = Tensor(np.ones([2, 1, 2, 1]).astype(np.float32))
    input_y = Tensor(np.ones([2, 1, 2, 1]).astype(np.float32))
    for _ in range(epoch):
        out_put = net(input_x, input_y)
-        out2 = net2(out_put, input_x)
+        out2 = net2(b1, b2, out_put, input_x)
        output_list.append(out2.asnumpy())
        input_x = input_x + input_y
        b1 = b1+1
    return output_list
 def check_graph_structure(dump_file_path, execution_order_path, graph_id, expect_steps):
    dump_data_path = os.path.join(dump_file_path, graph_id)
    assert sorted(os.listdir(dump_data_path)) == expect_steps
    graph_history_file_path = os.path.join(
        execution_order_path, 'ms_global_execution_order_graph_{}.csv'.format(graph_id))
    assert path.exists(graph_history_file_path)
    with open(graph_history_file_path) as csvfile:
        history_graph = csv.reader(csvfile)
        iter_list_graph = [row[0] for row in history_graph]
    assert iter_list_graph == expect_steps
 def run_multi_root_graph_dump(device, dump_mode, test_name):
@ -79,31 +99,24 @@ def run_multi_root_graph_dump(device, dump_mode, test_name):
        for _ in range(3):
            if not os.path.exists(dump_file_path):
                time.sleep(2)
        # Multi root graph script : we have 2 graphs under rank_0 dir
        # Each graph should have 3 iteration
        # Each graph was executed once per epoch,
        # Graph 0 was executed in even iterations, graph one was executed in odd iterations
        assert len(os.listdir(dump_file_path)) == 2
        dump_path_graph_0 = os.path.join(dump_file_path, '0')
        dump_path_graph_1 = os.path.join(dump_file_path, '1')
        assert sorted(os.listdir(dump_path_graph_0)) == ['0', '2', '4']
        assert sorted(os.listdir(dump_path_graph_1)) == ['1', '3', '5']
        execution_order_path = os.path.join(dump_path, 'rank_0', 'execution_order')
-        # Four files in execution_order dir.
+        # Multi root graph script: check dump data dir and graph history files and see if iteration number is matched.
-        # Two files for each graph (ms_execution_order and ms_global_execution_order)
+        if device == "GPU":
-        assert len(os.listdir(execution_order_path)) == 4
+            # In GPU, we have 4 kernel graphs folders under rank_0 dir.
-        global_exec_order_graph_0 = os.path.join(execution_order_path, 'ms_global_execution_order_graph_0.csv')
+            # In graph history dir, there are 2 files for each graph (ms_execution_order and ms_global_execution_order).
-        assert path.exists(global_exec_order_graph_0)
+            assert len(os.listdir(dump_file_path)) == 4
-        with open(global_exec_order_graph_0) as csvfile:
+            assert len(os.listdir(execution_order_path)) == 8
-            history_graph_0 = csv.reader(csvfile)
+            check_graph_structure(dump_file_path, execution_order_path, '0', ['0', '2', '4'])
-            iter_list_graph_0 = list(history_graph_0)
+            check_graph_structure(dump_file_path, execution_order_path, '1', ['1', '3', '5'])
-        assert iter_list_graph_0 == [['0'], ['2'], ['4']]
+        else:
-        global_exec_order_graph_1 = os.path.join(execution_order_path, 'ms_global_execution_order_graph_1.csv')
+            # In Ascend, we have 2 root graphs folders under rank_0 dir.
-        assert path.exists(global_exec_order_graph_1)
+            # In graph history dir, there are 4 ms_execution_order files and 2 ms_global_execution_order files.
-        with open(global_exec_order_graph_1) as csvfile:
+            # Each graph should have 3 iterations. Each graph was executed once per epoch.
-            history_graph_1 = csv.reader(csvfile)
+            # Graph 0 was executed in even iterations, graph 1 was executed in odd iterations.
-            iter_list_graph_1 = list(history_graph_1)
+            assert len(os.listdir(dump_file_path)) == 2
-        assert iter_list_graph_1 == [['1'], ['3'], ['5']]
+            assert len(os.listdir(execution_order_path)) == 6
            check_graph_structure(dump_file_path, execution_order_path, '0', ['0', '2', '4'])
            check_graph_structure(dump_file_path, execution_order_path, '1', ['1', '3', '5'])
@pytest.mark.level0
@ -154,5 +167,23 @@ def test_Ascend_async_multi_root_graph_dump():
    Expectation:
        Dump for two different graphs, graph 0 even iterations and graph 1 odd iterations.
    """
    run_multi_root_graph_dump("Ascend", "async_dump", "test_Ascend_async_multi_root_graph_dump")
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
 def test_ascend_multi_root_graph_dump_kernel_by_kernel():
    """
    Feature:
        Multi root graph dump for Ascend kernel by kernel.
    Description:
        Test multi root graph dump in Ascend kernel by kernel.
    Expectation:
        Dump for two different graphs, graph 0 even iterations and graph 1 odd iterations.
    """
    os.environ['GRAPH_OP_RUN'] = "1"
    run_multi_root_graph_dump("Ascend", "e2e_dump", "test_Ascend_e2e_multi_root_graph_dump")
    del os.environ['GRAPH_OP_RUN']