Enable cann api allback register function

Add testcases for cann api feature
This commit is contained in:
TinaMengtingZhang 2021-11-25 10:59:21 -05:00
parent 0a3a177b80
commit 16a19be56f
4 changed files with 170 additions and 17 deletions

View File

@ -47,6 +47,7 @@
#ifndef ENABLE_SECURITY
#include "debug/data_dump/dump_json_parser.h"
#include "debug/data_dump/e2e_dump.h"
#include "debug/debugger/debugger_utils.h"
#endif
#include "debug/anf_ir_utils.h"
#include "backend/optimizer/graph_kernel/graph_kernel_optimization.h"
@ -61,6 +62,7 @@
#include "debug/debugger/proto_exporter_stub.h"
#endif
#include "common/util/error_manager/error_manager.h"
#include "toolchain/adx_datadump_callback.h"
#include "toolchain/adx_datadump_server.h"
#ifdef ENABLE_DUMP_IR
#include "debug/rdr/running_data_recorder.h"
@ -75,6 +77,7 @@
#ifndef ENABLE_SECURITY
#include "profiler/device/ascend/memory_profiling.h"
using Adx::AdxRegDumpProcessCallBack;
using mindspore::device::ascend::ProfilingManager;
using mindspore::profiler::ascend::MemoryProfiling;
#endif
@ -1049,6 +1052,12 @@ void DumpInit(uint32_t device_id) {
json_parser.CopyHcclJsonToDir(device_id);
json_parser.CopyMSCfgJsonToDir(device_id);
if (json_parser.async_dump_enabled()) {
#ifdef ENABLE_D
// register callback to adx
if (json_parser.FileFormatIsNpy()) {
AdxRegDumpProcessCallBack(DumpDataCallBack);
}
#endif
if (AdxDataDumpServerInit() != 0) {
MS_LOG(EXCEPTION) << "Adx data dump server init failed";
}

View File

@ -118,10 +118,14 @@ def generate_dump_json(dump_path, json_file_name, test_key):
elif test_key == "test_Ascend_async_multi_root_graph_dump":
data = async_dump_dict_3
data["common_dump_settings"]["path"] = dump_path
elif test_key == "test_async_dump_file_format":
elif test_key == "test_async_dump_npy":
data = async_dump_dict
data["common_dump_settings"]["path"] = dump_path
data["common_dump_settings"]["file_format"] = "npy"
elif test_key == "test_async_dump_bin":
data = async_dump_dict
data["common_dump_settings"]["path"] = dump_path
data["common_dump_settings"]["file_format"] = "bin"
else:
raise ValueError(
"Failed to generate dump json file. The test name value " + test_key + " is invalid.")
@ -137,6 +141,11 @@ def generate_dump_json_with_overflow(dump_path, json_file_name, test_key, op):
data = async_dump_dict
data["common_dump_settings"]["path"] = dump_path
data["common_dump_settings"]["op_debug_mode"] = op
elif test_key == "test_async_dump_npy":
data = async_dump_dict
data["common_dump_settings"]["path"] = dump_path
data["common_dump_settings"]["op_debug_mode"] = op
data["common_dump_settings"]["file_format"] = "npy"
else:
raise ValueError(
"Failed to generate dump json file. Overflow only support in async dump")

View File

@ -0,0 +1,150 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import sys
import tempfile
import time
import shutil
import glob
import json
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
from dump_test_utils import generate_dump_json, generate_dump_json_with_overflow, check_dump_structure
from tests.security_utils import security_off_wrap
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.add = P.Add()
def construct(self, x_, y_):
return self.add(x_, y_)
x = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)
y = np.array([[7, 8, 9], [10, 11, 12]]).astype(np.float32)
def run_async_dump(test_name):
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
dump_path = os.path.join(tmp_dir, 'async_dump')
dump_config_path = os.path.join(tmp_dir, 'async_dump.json')
generate_dump_json(dump_path, dump_config_path, test_name)
os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
if os.path.isdir(dump_path):
shutil.rmtree(dump_path)
add = Net()
add(Tensor(x), Tensor(y))
for _ in range(3):
if not os.path.exists(dump_file_path):
time.sleep(2)
check_dump_structure(dump_path, dump_config_path, 1, 1, 1)
assert len(os.listdir(dump_file_path)) == 1
# check content of the generated dump data
if test_name == "test_async_dump_npy":
output_name = "Add.Add-op*.*.*.*.output.0.ND.npy"
output_path = glob.glob(os.path.join(dump_file_path, output_name))[0]
real_path = os.path.realpath(output_path)
output = np.load(real_path)
expect = np.array([[8, 10, 12], [14, 16, 18]], np.float32)
assert np.array_equal(output, expect)
del os.environ['MINDSPORE_DUMP_CONFIG']
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_async_dump_npy():
"""
Feature: async dump on Ascend
Description: test async dump with file_format = "npy"
Expectation: dump data are generated as npy file format
"""
run_async_dump("test_async_dump_npy")
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_async_dump_bin():
"""
Feature: async dump on Ascend in npy format
Description: test async dump with file_format = "bin"
Expectation: dump data are generated as protobuf file format (suffix with timestamp)
"""
run_async_dump("test_async_dump_bin")
def run_overflow_dump(test_name):
"""Run async dump and generate overflow"""
if sys.platform != 'linux':
return
overflow_x = np.array([60000, 60000]).astype(np.float16)
with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
dump_path = os.path.join(tmp_dir, 'overflow_dump')
dump_config_path = os.path.join(tmp_dir, 'overflow_dump.json')
generate_dump_json_with_overflow(dump_path, dump_config_path, test_name, 3)
os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
if os.path.isdir(dump_path):
shutil.rmtree(dump_path)
add = Net()
add(Tensor(overflow_x), Tensor(overflow_x))
exe_graph_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
for _ in range(5):
if not os.path.exists(exe_graph_path):
time.sleep(2)
check_dump_structure(dump_path, dump_config_path, 1, 1, 1)
# check if overflow dump generate exact two files, and the naming format
assert len(os.listdir(exe_graph_path)) == 2
output_path = glob.glob(os.path.join(exe_graph_path, "Add.Add-op*.*.*.*.output.0.ND.npy"))[0]
overflow_path = glob.glob(os.path.join(exe_graph_path, "Opdebug.Node_OpDebug.*.*.*.output.0.json"))[0]
assert output_path
assert overflow_path
# check content of the output tensor
real_path = os.path.realpath(output_path)
output = np.load(real_path)
expect = np.array([65504, 65504], np.float16)
assert np.array_equal(output, expect)
# check content of opdebug info json file
with open(overflow_path, 'rb') as json_file:
data = json.load(json_file)
assert data
del os.environ['MINDSPORE_DUMP_CONFIG']
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_ascend_overflow_dump():
"""
Feature: Overflow Dump
Description: Test overflow dump
Expectation: Overflow is occurred, and overflow dump file is in correct format
"""
context.set_context(mode=context.GRAPH_MODE, device_target='Ascend')
run_overflow_dump("test_async_dump_npy")

View File

@ -79,27 +79,12 @@ def run_async_dump(test_name):
def test_async_dump():
"""
Feature: async dump on Ascend
Description: test async dump with default file_format value
Description: test async dump with default file_format value ("bin")
Expectation: dump data are generated as protobuf file format (suffix with timestamp)
"""
run_async_dump("test_async_dump")
@pytest.mark.skip(reason="wait for run package updates in Dec 01")
@pytest.mark.level1
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_async_dump_file_format():
"""
Feature: async dump on Ascend in npy format
Description: test async dump with file_format is configured as npy
Expectation: dump data are generated as npy file format
"""
run_async_dump("test_async_dump_file_format")
def run_e2e_dump():
if sys.platform != 'linux':
return