!48649 Fixed errors reported in the test_gpu_retest.py and test_gpu_lenet.py use cases

Merge pull request !48649 from 刘勇琪/master
This commit is contained in:
i-robot 2023-02-13 03:16:41 +00:00 committed by Gitee
commit a1de8e147a
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
6 changed files with 49 additions and 18 deletions

View File

@ -111,12 +111,11 @@ Status ShardReader::Init(const std::vector<std::string> &file_paths, bool load_d
if (num_rows_ > LAZY_LOAD_THRESHOLD) {
lazy_load_ = true;
tasks_.lazy_load_ = true;
MS_LOG(WARNING)
<< "The number of samples is larger than " << LAZY_LOAD_THRESHOLD
<< ", enable lazy load mode. If you want to speed up data loading, "
<< "it is recommended that you save multiple samples into one record when creating MindRecord files,"
<< " so that you can enable fast loading mode, and don't forget to adjust your batch size "
<< "according to the current samples.";
MS_LOG(INFO) << "The number of samples is larger than " << LAZY_LOAD_THRESHOLD
<< ", enable lazy load mode. If you want to speed up data loading, "
<< "it is recommended that you save multiple samples into one record when creating MindRecord files,"
<< " so that you can enable fast loading mode, and don't forget to adjust your batch size "
<< "according to the current samples.";
}
auto disk_size = page_size_ * row_group_summary.size();

View File

@ -746,9 +746,9 @@ def check_gnn_list_or_ndarray(param, param_name, data_type=int):
elif data_type == str:
data_type = np.str_
if param.dtype.type != data_type:
if param.dtype != data_type:
raise TypeError("Each member in {0} should be of type {1}. Got {2}.".format(
param_name, data_type, param.dtype.type))
param_name, data_type, param.dtype))
def check_tensor_op(param, param_name):

View File

@ -431,7 +431,7 @@ class Model:
if dataset_sink_mode:
network = connect_network_with_dataset(network, dataset_helper)
if self.enable_recovery and is_train:
if _get_recovery_context("enable_recovery") and is_train:
_set_training_dataset(dataset_helper)

View File

@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import pytest
import mindspore as ms
import mindspore.dataset as ds
import mindspore.nn as nn
@ -135,9 +133,6 @@ def set_parameter():
all_reduce_fusion_config=config.all_reduce_fusion_config)
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_train_net_fade_then_sink():
"""
Feature: The PYNATIVE mode under GPU has been trained for two consecutive times

View File

@ -14,12 +14,14 @@
# ============================================================================
import pytest
from mindspore.ops import operations as P
import mindspore.nn as nn
from mindspore.train import Model, Callback
from mindspore.common import set_seed
import mindspore.dataset as ds
import mindspore.nn as nn
from mindspore import context
from mindspore import log as logger
from mindspore.common import set_seed
from mindspore.ops import operations as P
from mindspore.parallel._recovery_context import _get_recovery_context_func_map
from mindspore.train import Model, Callback
set_seed(1)
@ -84,6 +86,12 @@ def test_dataset_reset_sink(fast_recovery, num_parallel_workers, python_multipro
Description: Test Dataset recovery when GPU (and sink mode) is used.
Expectation: Training completes successfully
"""
def enable_recovery():
"""Get whether enable recovery"""
return True
context.set_context(mode=context.GRAPH_MODE)
_get_recovery_context_func_map["enable_recovery"] = enable_recovery
original_fast_recovery = ds.config.get_fast_recovery()
ds.config.set_fast_recovery(fast_recovery)
data = create_np_dataset(20, num_parallel_workers, python_multiprocessing)

View File

@ -0,0 +1,29 @@
# Copyright 2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import pytest
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_single
def test_pynative_lenet_gpu_2p_mpi():
"""
Feature: PyNative LeNet 2P
Description: test PyNative LeNet 2P with mpirun
Expectation: success, return_code==0
"""
return_code = os.system("mpirun -n 2 pytest -s test_gpu_lenet.py::test_train_net_fade_then_sink")
assert return_code == 0