From b8373ff412d39d8df3c96313b7e2da780d87d343 Mon Sep 17 00:00:00 2001 From: liu-yongqi-63 Date: Thu, 9 Feb 2023 19:25:22 +0800 Subject: [PATCH] Fixed errors reported in the test_gpu_retest.py and test_gpu_lenet.py use cases --- .../minddata/mindrecord/io/shard_reader.cc | 11 ++++--- .../dataset/core/validator_helpers.py | 4 +-- mindspore/python/mindspore/train/model.py | 2 +- tests/st/dataset/test_gpu_lenet.py | 5 ---- tests/st/dataset/test_gpu_reset.py | 16 +++++++--- .../st/dataset/test_pynative_lenet_gpu_2p.py | 29 +++++++++++++++++++ 6 files changed, 49 insertions(+), 18 deletions(-) create mode 100644 tests/st/dataset/test_pynative_lenet_gpu_2p.py diff --git a/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc index 787b2f7e6be..42a1cf8a3a4 100644 --- a/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc +++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc @@ -111,12 +111,11 @@ Status ShardReader::Init(const std::vector &file_paths, bool load_d if (num_rows_ > LAZY_LOAD_THRESHOLD) { lazy_load_ = true; tasks_.lazy_load_ = true; - MS_LOG(WARNING) - << "The number of samples is larger than " << LAZY_LOAD_THRESHOLD - << ", enable lazy load mode. If you want to speed up data loading, " - << "it is recommended that you save multiple samples into one record when creating MindRecord files," - << " so that you can enable fast loading mode, and don't forget to adjust your batch size " - << "according to the current samples."; + MS_LOG(INFO) << "The number of samples is larger than " << LAZY_LOAD_THRESHOLD + << ", enable lazy load mode. If you want to speed up data loading, " + << "it is recommended that you save multiple samples into one record when creating MindRecord files," + << " so that you can enable fast loading mode, and don't forget to adjust your batch size " + << "according to the current samples."; } auto disk_size = page_size_ * row_group_summary.size(); diff --git a/mindspore/python/mindspore/dataset/core/validator_helpers.py b/mindspore/python/mindspore/dataset/core/validator_helpers.py index a080a6f66c0..399b46fb6c9 100644 --- a/mindspore/python/mindspore/dataset/core/validator_helpers.py +++ b/mindspore/python/mindspore/dataset/core/validator_helpers.py @@ -746,9 +746,9 @@ def check_gnn_list_or_ndarray(param, param_name, data_type=int): elif data_type == str: data_type = np.str_ - if param.dtype.type != data_type: + if param.dtype != data_type: raise TypeError("Each member in {0} should be of type {1}. Got {2}.".format( - param_name, data_type, param.dtype.type)) + param_name, data_type, param.dtype)) def check_tensor_op(param, param_name): diff --git a/mindspore/python/mindspore/train/model.py b/mindspore/python/mindspore/train/model.py index 697585df701..96c99445b10 100644 --- a/mindspore/python/mindspore/train/model.py +++ b/mindspore/python/mindspore/train/model.py @@ -431,7 +431,7 @@ class Model: if dataset_sink_mode: network = connect_network_with_dataset(network, dataset_helper) - if self.enable_recovery and is_train: + if _get_recovery_context("enable_recovery") and is_train: _set_training_dataset(dataset_helper) diff --git a/tests/st/dataset/test_gpu_lenet.py b/tests/st/dataset/test_gpu_lenet.py index b43ba89266e..151a2c8b642 100644 --- a/tests/st/dataset/test_gpu_lenet.py +++ b/tests/st/dataset/test_gpu_lenet.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -import pytest - import mindspore as ms import mindspore.dataset as ds import mindspore.nn as nn @@ -135,9 +133,6 @@ def set_parameter(): all_reduce_fusion_config=config.all_reduce_fusion_config) -@pytest.mark.level1 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard def test_train_net_fade_then_sink(): """ Feature: The PYNATIVE mode under GPU has been trained for two consecutive times diff --git a/tests/st/dataset/test_gpu_reset.py b/tests/st/dataset/test_gpu_reset.py index cd93204f66f..ea05024d7c3 100644 --- a/tests/st/dataset/test_gpu_reset.py +++ b/tests/st/dataset/test_gpu_reset.py @@ -14,12 +14,14 @@ # ============================================================================ import pytest -from mindspore.ops import operations as P -import mindspore.nn as nn -from mindspore.train import Model, Callback -from mindspore.common import set_seed import mindspore.dataset as ds +import mindspore.nn as nn +from mindspore import context from mindspore import log as logger +from mindspore.common import set_seed +from mindspore.ops import operations as P +from mindspore.parallel._recovery_context import _get_recovery_context_func_map +from mindspore.train import Model, Callback set_seed(1) @@ -84,6 +86,12 @@ def test_dataset_reset_sink(fast_recovery, num_parallel_workers, python_multipro Description: Test Dataset recovery when GPU (and sink mode) is used. Expectation: Training completes successfully """ + def enable_recovery(): + """Get whether enable recovery""" + return True + + context.set_context(mode=context.GRAPH_MODE) + _get_recovery_context_func_map["enable_recovery"] = enable_recovery original_fast_recovery = ds.config.get_fast_recovery() ds.config.set_fast_recovery(fast_recovery) data = create_np_dataset(20, num_parallel_workers, python_multiprocessing) diff --git a/tests/st/dataset/test_pynative_lenet_gpu_2p.py b/tests/st/dataset/test_pynative_lenet_gpu_2p.py new file mode 100644 index 00000000000..97373328e45 --- /dev/null +++ b/tests/st/dataset/test_pynative_lenet_gpu_2p.py @@ -0,0 +1,29 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os +import pytest + + +@pytest.mark.level1 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_single +def test_pynative_lenet_gpu_2p_mpi(): + """ + Feature: PyNative LeNet 2P + Description: test PyNative LeNet 2P with mpirun + Expectation: success, return_code==0 + """ + return_code = os.system("mpirun -n 2 pytest -s test_gpu_lenet.py::test_train_net_fade_then_sink") + assert return_code == 0