forked from mindspore-Ecosystem/mindspore
Add st for error manager
This commit is contained in:
parent
f9e26895c2
commit
be0e94626a
|
@ -305,7 +305,7 @@ std::string AscendKernelCompileManager::FormatSelectResultProcess(const nlohmann
|
||||||
}
|
}
|
||||||
|
|
||||||
void AscendKernelCompileManager::QueryResultProcess(const nlohmann::json &json, TargetJobStatus *task_info,
|
void AscendKernelCompileManager::QueryResultProcess(const nlohmann::json &json, TargetJobStatus *task_info,
|
||||||
int adjust_log_level = 3) {
|
int adjust_log_level = EXCEPTION) {
|
||||||
auto job_type = GetJsonValue<std::string>(json, kJobType);
|
auto job_type = GetJsonValue<std::string>(json, kJobType);
|
||||||
auto json_name = GetJsonValue<std::string>(json, kFusionOpName);
|
auto json_name = GetJsonValue<std::string>(json, kFusionOpName);
|
||||||
MS_LOG(DEBUG) << "Job: " << job_type << " post processing";
|
MS_LOG(DEBUG) << "Job: " << job_type << " post processing";
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Fake rank table file
|
|
@ -0,0 +1,26 @@
|
||||||
|
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
import os
|
||||||
|
|
||||||
|
from mindspore import context
|
||||||
|
from mindspore.communication.management import init
|
||||||
|
|
||||||
|
def test_hccl_init_fail():
|
||||||
|
fake_rank_table_file = os.getenv("FAKE_RANK_TABLE_FILE")
|
||||||
|
os.environ["RANK_TABLE_FILE"] = fake_rank_table_file
|
||||||
|
os.environ["RANK_ID"] = "2"
|
||||||
|
|
||||||
|
context.set_context(device_id=2, device_target="Ascend")
|
||||||
|
init()
|
|
@ -0,0 +1,32 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
BASE_PATH=$(
|
||||||
|
cd "$(dirname $0)"
|
||||||
|
pwd
|
||||||
|
)
|
||||||
|
export FAKE_RANK_TABLE_FILE=$BASE_PATH/fake_rank_table_file.json
|
||||||
|
|
||||||
|
pytest -s -v hccl_init_fail.py > test_hccl_init_fail.log 2>&1 &
|
||||||
|
|
||||||
|
process_pid=$(echo $!)
|
||||||
|
wait ${process_pid}
|
||||||
|
|
||||||
|
status=$(echo $?)
|
||||||
|
if [ "${status}" == "0" ]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
exit 0
|
|
@ -0,0 +1,26 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
pytest -s -v tbe_compile_fail.py > test_tbe_compile_fail.log 2>&1 &
|
||||||
|
|
||||||
|
process_pid=$(echo $!)
|
||||||
|
wait ${process_pid}
|
||||||
|
|
||||||
|
status=$(echo $?)
|
||||||
|
if [ "${status}" == "0" ]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
exit 0
|
|
@ -0,0 +1,53 @@
|
||||||
|
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import mindspore.context as context
|
||||||
|
import mindspore.nn as nn
|
||||||
|
from mindspore import Tensor
|
||||||
|
from mindspore.common.api import ms_function
|
||||||
|
from mindspore.common.initializer import initializer
|
||||||
|
from mindspore.common.parameter import Parameter
|
||||||
|
from mindspore.ops import operations as P
|
||||||
|
|
||||||
|
context.set_context(device_target="Ascend")
|
||||||
|
|
||||||
|
|
||||||
|
class Net(nn.Cell):
|
||||||
|
def __init__(self):
|
||||||
|
super(Net, self).__init__()
|
||||||
|
out_channel = 64
|
||||||
|
kernel_size = 7
|
||||||
|
self.conv = P.Conv2D(out_channel,
|
||||||
|
kernel_size,
|
||||||
|
mode=1,
|
||||||
|
pad_mode="valid",
|
||||||
|
pad=0,
|
||||||
|
stride=99,
|
||||||
|
dilation=1,
|
||||||
|
group=1)
|
||||||
|
self.w = Parameter(initializer(
|
||||||
|
'normal', [64, 3, 7, 7]), name='w')
|
||||||
|
|
||||||
|
@ms_function
|
||||||
|
def construct(self, x):
|
||||||
|
return self.conv(x, self.w)
|
||||||
|
|
||||||
|
|
||||||
|
def test_tbe_compile_faile():
|
||||||
|
x = np.random.randn(32, 3, 224, 224).astype(np.float32)
|
||||||
|
conv = Net()
|
||||||
|
output = conv(Tensor(x))
|
||||||
|
print(output.asnumpy())
|
|
@ -0,0 +1,49 @@
|
||||||
|
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.level1
|
||||||
|
@pytest.mark.platform_arm_ascend_training
|
||||||
|
@pytest.mark.platform_x86_ascend_training
|
||||||
|
@pytest.mark.env_single
|
||||||
|
def test_hccl_init_fail():
|
||||||
|
sh_path = os.path.split(os.path.realpath(__file__))[0]
|
||||||
|
ret = os.system(f"sh {sh_path}/run_hccl_init_fail.sh")
|
||||||
|
assert ret == 0
|
||||||
|
grep_ret = os.system(f"grep 'Ascend error occurred, error message:' {sh_path}/test_hccl_init_fail.log -c")
|
||||||
|
assert grep_ret == 0
|
||||||
|
grep_ret = os.system(f"grep 'EI0004:' {sh_path}/test_hccl_init_fail.log -c")
|
||||||
|
assert grep_ret == 0
|
||||||
|
grep_ret = os.system(f"grep 'Invalid ranktable, with rankID' {sh_path}/test_hccl_init_fail.log -c")
|
||||||
|
assert grep_ret == 0
|
||||||
|
|
||||||
|
@pytest.mark.level1
|
||||||
|
@pytest.mark.platform_arm_ascend_training
|
||||||
|
@pytest.mark.platform_x86_ascend_training
|
||||||
|
@pytest.mark.env_single
|
||||||
|
def test_tbe_compile_fail():
|
||||||
|
sh_path = os.path.split(os.path.realpath(__file__))[0]
|
||||||
|
ret = os.system(f"sh {sh_path}/run_tbe_compile_fail.sh")
|
||||||
|
assert ret == 0
|
||||||
|
grep_ret = os.system(f"grep 'Ascend error occurred, error message:' {sh_path}/test_tbe_compile_fail.log -c")
|
||||||
|
assert grep_ret == 0
|
||||||
|
grep_ret = os.system(f"grep 'E60011:' {sh_path}/test_tbe_compile_fail.log -c")
|
||||||
|
assert grep_ret == 0
|
||||||
|
grep_ret = os.system(r"grep 'In op\[conv2d\], the \[strideh\] must in range \[1, 63\], actual is \[99\]' "
|
||||||
|
+ f"{sh_path}/test_tbe_compile_fail.log -c")
|
||||||
|
assert grep_ret == 0
|
Loading…
Reference in New Issue