!48649 Fixed errors reported in the test_gpu_retest.py and test_gpu_lenet.py use cases

Merge pull request !48649 from 刘勇琪/master
2023-02-13 03:16:41 +00:00 · 2023-02-13 03:16:41 +00:00 · a1de8e147a
parent 92dd278d21 b8373ff412
commit a1de8e147a
6 changed files with 49 additions and 18 deletions
--- a/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
@ -111,12 +111,11 @@ Status ShardReader::Init(const std::vector<std::string> &file_paths, bool load_d
  if (num_rows_ > LAZY_LOAD_THRESHOLD) {
    lazy_load_ = true;
    tasks_.lazy_load_ = true;
-    MS_LOG(WARNING)
-      << "The number of samples is larger than " << LAZY_LOAD_THRESHOLD
-      << ", enable lazy load mode. If you want to speed up data loading, "
-      << "it is recommended that you save multiple samples into one record when creating MindRecord files,"
-      << " so that you can enable fast loading mode, and don't forget to adjust your batch size "
-      << "according to the current samples.";
+    MS_LOG(INFO) << "The number of samples is larger than " << LAZY_LOAD_THRESHOLD
+                 << ", enable lazy load mode. If you want to speed up data loading, "
+                 << "it is recommended that you save multiple samples into one record when creating MindRecord files,"
+                 << " so that you can enable fast loading mode, and don't forget to adjust your batch size "
+                 << "according to the current samples.";
  }

  auto disk_size = page_size_ * row_group_summary.size();
--- a/mindspore/python/mindspore/dataset/core/validator_helpers.py
+++ b/mindspore/python/mindspore/dataset/core/validator_helpers.py
@ -746,9 +746,9 @@ def check_gnn_list_or_ndarray(param, param_name, data_type=int):
        elif data_type == str:
            data_type = np.str_

-        if param.dtype.type != data_type:
+        if param.dtype != data_type:
            raise TypeError("Each member in {0} should be of type {1}. Got {2}.".format(
-                param_name, data_type, param.dtype.type))
+                param_name, data_type, param.dtype))


 def check_tensor_op(param, param_name):
--- a/mindspore/python/mindspore/train/model.py
+++ b/mindspore/python/mindspore/train/model.py
@ -431,7 +431,7 @@ class Model:
        if dataset_sink_mode:
            network = connect_network_with_dataset(network, dataset_helper)

-        if self.enable_recovery and is_train:
+        if _get_recovery_context("enable_recovery") and is_train:
            _set_training_dataset(dataset_helper)


--- a/tests/st/dataset/test_gpu_lenet.py
+++ b/tests/st/dataset/test_gpu_lenet.py
@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-import pytest
-
 import mindspore as ms
 import mindspore.dataset as ds
 import mindspore.nn as nn
@ -135,9 +133,6 @@ def set_parameter():
                                 all_reduce_fusion_config=config.all_reduce_fusion_config)


-@pytest.mark.level1
-@pytest.mark.platform_x86_gpu_training
-@pytest.mark.env_onecard
 def test_train_net_fade_then_sink():
    """
    Feature: The PYNATIVE mode under GPU has been trained for two consecutive times
--- a/tests/st/dataset/test_gpu_reset.py
+++ b/tests/st/dataset/test_gpu_reset.py
@ -14,12 +14,14 @@
 # ============================================================================
 import pytest

-from mindspore.ops import operations as P
-import mindspore.nn as nn
-from mindspore.train import Model, Callback
-from mindspore.common import set_seed
 import mindspore.dataset as ds
+import mindspore.nn as nn
+from mindspore import context
 from mindspore import log as logger
+from mindspore.common import set_seed
+from mindspore.ops import operations as P
+from mindspore.parallel._recovery_context import _get_recovery_context_func_map
+from mindspore.train import Model, Callback

 set_seed(1)

@ -84,6 +86,12 @@ def test_dataset_reset_sink(fast_recovery, num_parallel_workers, python_multipro
    Description: Test Dataset recovery when GPU (and sink mode) is used.
    Expectation: Training completes successfully
    """
+    def enable_recovery():
+        """Get whether enable recovery"""
+        return True
+
+    context.set_context(mode=context.GRAPH_MODE)
+    _get_recovery_context_func_map["enable_recovery"] = enable_recovery
    original_fast_recovery = ds.config.get_fast_recovery()
    ds.config.set_fast_recovery(fast_recovery)
    data = create_np_dataset(20, num_parallel_workers, python_multiprocessing)
--- a/tests/st/dataset/test_pynative_lenet_gpu_2p.py
+++ b/tests/st/dataset/test_pynative_lenet_gpu_2p.py
@ -0,0 +1,29 @@
+# Copyright 2023 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import os
+import pytest
+
+
+@pytest.mark.level1
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_single
+def test_pynative_lenet_gpu_2p_mpi():
+    """
+    Feature: PyNative LeNet 2P
+    Description: test PyNative LeNet 2P with mpirun
+    Expectation: success, return_code==0
+    """
+    return_code = os.system("mpirun -n 2 pytest -s test_gpu_lenet.py::test_train_net_fade_then_sink")
+    assert return_code == 0