fix enable=false still dump issue for cell dump

This commit is contained in:
sabrinasun 2021-11-30 22:10:20 -05:00
parent c4762bf362
commit c095908772
2 changed files with 51 additions and 9 deletions

View File

@ -656,11 +656,10 @@ void DumpJsonParser::GetCellDumpFlag(const session::KernelGraph &kernel_graph) {
for (const auto &kernel : kernel_graph.execution_order()) {
MS_EXCEPTION_IF_NULL(kernel);
auto dump_flag = AnfAlgo::GetDumpFlag(kernel);
if (!dump_flag) {
continue;
if (dump_flag.has_value() && dump_flag.value().compare("true") == 0) {
MS_LOG(DEBUG) << "Dump flag is true for " << GetKernelNodeName(kernel);
cell_dump_kernels_.push_back(GetKernelNodeName(kernel));
}
MS_LOG(INFO) << "Dump flag is true for " << GetKernelNodeName(kernel);
cell_dump_kernels_.push_back(GetKernelNodeName(kernel));
}
}

View File

@ -18,6 +18,8 @@ import tempfile
import time
import shutil
import glob
from enum import Enum
import numpy as np
import pytest
from mindspore import Tensor, set_dump
@ -31,6 +33,10 @@ from mindspore.nn import WithLossCell
from dump_test_utils import generate_cell_dump_json, check_dump_structure
from tests.security_utils import security_off_wrap
class IsDump(Enum):
SET_DUMP_TRUE = 1
SET_DUMP_FALSE = 2
SET_NONE = 3
class ReluReduceMeanDenseRelu(Cell):
def __init__(self, kernel, bias, in_channel, num_class):
@ -51,8 +57,11 @@ def run_multi_layer_train(is_set_dump):
weight = Tensor(np.ones((1000, 2048)).astype(np.float32))
bias = Tensor(np.ones((1000,)).astype(np.float32))
net = ReluReduceMeanDenseRelu(weight, bias, 2048, 1000)
if is_set_dump:
if is_set_dump is IsDump.SET_DUMP_TRUE:
set_dump(net.relu)
elif is_set_dump is IsDump.SET_DUMP_FALSE:
set_dump(net.relu, enabled=False)
set_dump(net.mean)
criterion = SoftmaxCrossEntropyWithLogits(sparse=False)
optimizer = Momentum(learning_rate=0.1, momentum=0.1,
params=filter(lambda x: x.requires_grad, net.get_parameters()))
@ -84,7 +93,7 @@ def test_ascend_cell_dump():
os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
if os.path.isdir(dump_path):
shutil.rmtree(dump_path)
run_multi_layer_train(True)
run_multi_layer_train(IsDump.SET_DUMP_TRUE)
dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
for _ in range(5):
if not os.path.exists(dump_file_path):
@ -121,7 +130,7 @@ def test_ascend_not_cell_dump():
os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
if os.path.isdir(dump_path):
shutil.rmtree(dump_path)
run_multi_layer_train(True)
run_multi_layer_train(IsDump.SET_DUMP_TRUE)
dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
for _ in range(5):
if not os.path.exists(dump_file_path):
@ -153,10 +162,44 @@ def test_ascend_cell_empty_dump():
os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
if os.path.isdir(dump_path):
shutil.rmtree(dump_path)
run_multi_layer_train(False)
run_multi_layer_train(IsDump.SET_NONE)
dump_file_path = os.path.join(dump_path, 'rank_0', 'Net')
time.sleep(5)
# make sure set_dump is ignored and all cell layer are dumped
# make sure no files are dumped
assert not os.path.exists(dump_file_path)
del os.environ['MINDSPORE_DUMP_CONFIG']
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
@security_off_wrap
def test_ascend_cell_dump_set_enable_false():
"""
Feature: Cell Dump
Description: Test cell dump
Expectation: Should ignore set_dump when enabled=False
"""
if sys.platform != 'linux':
return
with tempfile.TemporaryDirectory(dir='/tmp') as tmp_dir:
dump_path = os.path.join(tmp_dir, 'cell_dump')
dump_config_path = os.path.join(tmp_dir, 'cell_dump.json')
generate_cell_dump_json(dump_path, dump_config_path, 'test_async_dump', 2)
os.environ['MINDSPORE_DUMP_CONFIG'] = dump_config_path
if os.path.isdir(dump_path):
shutil.rmtree(dump_path)
run_multi_layer_train(IsDump.SET_DUMP_FALSE)
dump_file_path = os.path.join(dump_path, 'rank_0', 'Net', '0', '0')
for _ in range(5):
if not os.path.exists(dump_file_path):
time.sleep(1)
check_dump_structure(dump_path, dump_config_path, 1, 1, 1)
# make sure directory has dumped files with enabled=True
assert len(os.listdir(dump_file_path)) == 1
mean_file_name = "ReduceMean.Default_network-WithLossCell__backbone-ReluReduceMeanDenseRelu_ReduceMean-*.*.*.*"
mean_file = glob.glob(os.path.join(dump_file_path, mean_file_name))[0]
assert mean_file
del os.environ['MINDSPORE_DUMP_CONFIG']