!48649 Fixed errors reported in the test_gpu_retest.py and test_gpu_lenet.py use cases
Merge pull request !48649 from 刘勇琪/master
This commit is contained in:
commit
a1de8e147a
|
@ -111,12 +111,11 @@ Status ShardReader::Init(const std::vector<std::string> &file_paths, bool load_d
|
|||
if (num_rows_ > LAZY_LOAD_THRESHOLD) {
|
||||
lazy_load_ = true;
|
||||
tasks_.lazy_load_ = true;
|
||||
MS_LOG(WARNING)
|
||||
<< "The number of samples is larger than " << LAZY_LOAD_THRESHOLD
|
||||
<< ", enable lazy load mode. If you want to speed up data loading, "
|
||||
<< "it is recommended that you save multiple samples into one record when creating MindRecord files,"
|
||||
<< " so that you can enable fast loading mode, and don't forget to adjust your batch size "
|
||||
<< "according to the current samples.";
|
||||
MS_LOG(INFO) << "The number of samples is larger than " << LAZY_LOAD_THRESHOLD
|
||||
<< ", enable lazy load mode. If you want to speed up data loading, "
|
||||
<< "it is recommended that you save multiple samples into one record when creating MindRecord files,"
|
||||
<< " so that you can enable fast loading mode, and don't forget to adjust your batch size "
|
||||
<< "according to the current samples.";
|
||||
}
|
||||
|
||||
auto disk_size = page_size_ * row_group_summary.size();
|
||||
|
|
|
@ -746,9 +746,9 @@ def check_gnn_list_or_ndarray(param, param_name, data_type=int):
|
|||
elif data_type == str:
|
||||
data_type = np.str_
|
||||
|
||||
if param.dtype.type != data_type:
|
||||
if param.dtype != data_type:
|
||||
raise TypeError("Each member in {0} should be of type {1}. Got {2}.".format(
|
||||
param_name, data_type, param.dtype.type))
|
||||
param_name, data_type, param.dtype))
|
||||
|
||||
|
||||
def check_tensor_op(param, param_name):
|
||||
|
|
|
@ -431,7 +431,7 @@ class Model:
|
|||
if dataset_sink_mode:
|
||||
network = connect_network_with_dataset(network, dataset_helper)
|
||||
|
||||
if self.enable_recovery and is_train:
|
||||
if _get_recovery_context("enable_recovery") and is_train:
|
||||
_set_training_dataset(dataset_helper)
|
||||
|
||||
|
||||
|
|
|
@ -12,8 +12,6 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import pytest
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.nn as nn
|
||||
|
@ -135,9 +133,6 @@ def set_parameter():
|
|||
all_reduce_fusion_config=config.all_reduce_fusion_config)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_train_net_fade_then_sink():
|
||||
"""
|
||||
Feature: The PYNATIVE mode under GPU has been trained for two consecutive times
|
||||
|
|
|
@ -14,12 +14,14 @@
|
|||
# ============================================================================
|
||||
import pytest
|
||||
|
||||
from mindspore.ops import operations as P
|
||||
import mindspore.nn as nn
|
||||
from mindspore.train import Model, Callback
|
||||
from mindspore.common import set_seed
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.nn as nn
|
||||
from mindspore import context
|
||||
from mindspore import log as logger
|
||||
from mindspore.common import set_seed
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.parallel._recovery_context import _get_recovery_context_func_map
|
||||
from mindspore.train import Model, Callback
|
||||
|
||||
set_seed(1)
|
||||
|
||||
|
@ -84,6 +86,12 @@ def test_dataset_reset_sink(fast_recovery, num_parallel_workers, python_multipro
|
|||
Description: Test Dataset recovery when GPU (and sink mode) is used.
|
||||
Expectation: Training completes successfully
|
||||
"""
|
||||
def enable_recovery():
|
||||
"""Get whether enable recovery"""
|
||||
return True
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
_get_recovery_context_func_map["enable_recovery"] = enable_recovery
|
||||
original_fast_recovery = ds.config.get_fast_recovery()
|
||||
ds.config.set_fast_recovery(fast_recovery)
|
||||
data = create_np_dataset(20, num_parallel_workers, python_multiprocessing)
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
# Copyright 2023 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import os
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_single
|
||||
def test_pynative_lenet_gpu_2p_mpi():
|
||||
"""
|
||||
Feature: PyNative LeNet 2P
|
||||
Description: test PyNative LeNet 2P with mpirun
|
||||
Expectation: success, return_code==0
|
||||
"""
|
||||
return_code = os.system("mpirun -n 2 pytest -s test_gpu_lenet.py::test_train_net_fade_then_sink")
|
||||
assert return_code == 0
|
Loading…
Reference in New Issue