forked from mindspore-Ecosystem/mindspore
!22468 Add st for error manager
Merge pull request !22468 from tanghuikang/tbe_em
This commit is contained in:
commit
c4fee1dff0
|
@ -305,7 +305,7 @@ std::string AscendKernelCompileManager::FormatSelectResultProcess(const nlohmann
|
|||
}
|
||||
|
||||
void AscendKernelCompileManager::QueryResultProcess(const nlohmann::json &json, TargetJobStatus *task_info,
|
||||
int adjust_log_level = 3) {
|
||||
int adjust_log_level = EXCEPTION) {
|
||||
auto job_type = GetJsonValue<std::string>(json, kJobType);
|
||||
auto json_name = GetJsonValue<std::string>(json, kFusionOpName);
|
||||
MS_LOG(DEBUG) << "Job: " << job_type << " post processing";
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Fake rank table file
|
|
@ -0,0 +1,26 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import os
|
||||
|
||||
from mindspore import context
|
||||
from mindspore.communication.management import init
|
||||
|
||||
def test_hccl_init_fail():
|
||||
fake_rank_table_file = os.getenv("FAKE_RANK_TABLE_FILE")
|
||||
os.environ["RANK_TABLE_FILE"] = fake_rank_table_file
|
||||
os.environ["RANK_ID"] = "2"
|
||||
|
||||
context.set_context(device_id=2, device_target="Ascend")
|
||||
init()
|
|
@ -0,0 +1,32 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
BASE_PATH=$(
|
||||
cd "$(dirname $0)"
|
||||
pwd
|
||||
)
|
||||
export FAKE_RANK_TABLE_FILE=$BASE_PATH/fake_rank_table_file.json
|
||||
|
||||
pytest -s -v hccl_init_fail.py > test_hccl_init_fail.log 2>&1 &
|
||||
|
||||
process_pid=$(echo $!)
|
||||
wait ${process_pid}
|
||||
|
||||
status=$(echo $?)
|
||||
if [ "${status}" == "0" ]; then
|
||||
exit 1
|
||||
fi
|
||||
exit 0
|
|
@ -0,0 +1,26 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
pytest -s -v tbe_compile_fail.py > test_tbe_compile_fail.log 2>&1 &
|
||||
|
||||
process_pid=$(echo $!)
|
||||
wait ${process_pid}
|
||||
|
||||
status=$(echo $?)
|
||||
if [ "${status}" == "0" ]; then
|
||||
exit 1
|
||||
fi
|
||||
exit 0
|
|
@ -0,0 +1,53 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.common.api import ms_function
|
||||
from mindspore.common.initializer import initializer
|
||||
from mindspore.common.parameter import Parameter
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
context.set_context(device_target="Ascend")
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
out_channel = 64
|
||||
kernel_size = 7
|
||||
self.conv = P.Conv2D(out_channel,
|
||||
kernel_size,
|
||||
mode=1,
|
||||
pad_mode="valid",
|
||||
pad=0,
|
||||
stride=99,
|
||||
dilation=1,
|
||||
group=1)
|
||||
self.w = Parameter(initializer(
|
||||
'normal', [64, 3, 7, 7]), name='w')
|
||||
|
||||
@ms_function
|
||||
def construct(self, x):
|
||||
return self.conv(x, self.w)
|
||||
|
||||
|
||||
def test_tbe_compile_faile():
|
||||
x = np.random.randn(32, 3, 224, 224).astype(np.float32)
|
||||
conv = Net()
|
||||
output = conv(Tensor(x))
|
||||
print(output.asnumpy())
|
|
@ -0,0 +1,49 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_single
|
||||
def test_hccl_init_fail():
|
||||
sh_path = os.path.split(os.path.realpath(__file__))[0]
|
||||
ret = os.system(f"sh {sh_path}/run_hccl_init_fail.sh")
|
||||
assert ret == 0
|
||||
grep_ret = os.system(f"grep 'Ascend error occurred, error message:' {sh_path}/test_hccl_init_fail.log -c")
|
||||
assert grep_ret == 0
|
||||
grep_ret = os.system(f"grep 'EI0004:' {sh_path}/test_hccl_init_fail.log -c")
|
||||
assert grep_ret == 0
|
||||
grep_ret = os.system(f"grep 'Invalid ranktable, with rankID' {sh_path}/test_hccl_init_fail.log -c")
|
||||
assert grep_ret == 0
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_single
|
||||
def test_tbe_compile_fail():
|
||||
sh_path = os.path.split(os.path.realpath(__file__))[0]
|
||||
ret = os.system(f"sh {sh_path}/run_tbe_compile_fail.sh")
|
||||
assert ret == 0
|
||||
grep_ret = os.system(f"grep 'Ascend error occurred, error message:' {sh_path}/test_tbe_compile_fail.log -c")
|
||||
assert grep_ret == 0
|
||||
grep_ret = os.system(f"grep 'E60011:' {sh_path}/test_tbe_compile_fail.log -c")
|
||||
assert grep_ret == 0
|
||||
grep_ret = os.system(r"grep 'In op\[conv2d\], the \[strideh\] must in range \[1, 63\], actual is \[99\]' "
|
||||
+ f"{sh_path}/test_tbe_compile_fail.log -c")
|
||||
assert grep_ret == 0
|
Loading…
Reference in New Issue