246 lines
9.8 KiB
Python
246 lines
9.8 KiB
Python
# Copyright 2022 Huawei Technologies Co., Ltd
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""
|
|
Test Dataset AutoTune's Save and Load Configuration support
|
|
"""
|
|
import os
|
|
import json
|
|
import random
|
|
import numpy as np
|
|
import pytest
|
|
import mindspore.dataset as ds
|
|
import mindspore.dataset.transforms as transforms
|
|
import mindspore.dataset.vision as vision
|
|
|
|
MNIST_DATA_DIR = "../data/dataset/testMnistData"
|
|
DATA_DIR = "../data/dataset/testPK/data"
|
|
|
|
|
|
def data_pipeline_same(file1, file2):
|
|
assert file1.exists()
|
|
assert file2.exists()
|
|
with file1.open() as f1, file2.open() as f2:
|
|
pipeline1 = json.load(f1)
|
|
pipeline1 = pipeline1["tree"] if "tree" in pipeline1 else pipeline1
|
|
pipeline2 = json.load(f2)
|
|
pipeline2 = pipeline2["tree"] if "tree" in pipeline2 else pipeline2
|
|
return pipeline1 == pipeline2
|
|
|
|
|
|
@pytest.mark.forked
|
|
class TestAutotuneSaveLoad:
|
|
# Note: Use pytest fixture tmp_path to create files within this temporary directory,
|
|
# which is automatically created for each test and deleted at the end of the test.
|
|
|
|
@staticmethod
|
|
def setup_method():
|
|
os.environ['RANK_ID'] = str(random.randint(0, 9))
|
|
|
|
@staticmethod
|
|
def teardown_method():
|
|
del os.environ['RANK_ID']
|
|
|
|
@staticmethod
|
|
def test_autotune_generator_pipeline(tmp_path):
|
|
"""
|
|
Feature: Autotuning
|
|
Description: Test save final config with GeneratorDataset pipeline: Generator -> Shuffle -> Batch
|
|
Expectation: Pipeline runs successfully
|
|
"""
|
|
original_autotune = ds.config.get_enable_autotune()
|
|
ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_generator_atfinal"))
|
|
|
|
source = [(np.array([x]),) for x in range(1024)]
|
|
data1 = ds.GeneratorDataset(source, ["data"])
|
|
data1 = data1.shuffle(64)
|
|
data1 = data1.batch(32)
|
|
|
|
ds.serialize(data1, str(tmp_path / "test_autotune_generator_serialized.json"))
|
|
|
|
itr = data1.create_dict_iterator(num_epochs=5)
|
|
for _ in range(5):
|
|
for _ in itr:
|
|
pass
|
|
del itr
|
|
ds.config.set_enable_autotune(original_autotune)
|
|
|
|
file = tmp_path / ("test_autotune_generator_atfinal_" + os.environ['RANK_ID'] + ".json")
|
|
assert file.exists()
|
|
|
|
@staticmethod
|
|
def test_autotune_save_overwrite_generator(tmp_path):
|
|
"""
|
|
Feature: Autotuning
|
|
Description: Test set_enable_autotune and existing json_filepath is overwritten
|
|
Expectation: set_enable_autotune() executes successfully with file-exist warning produced.
|
|
Execution of 2nd pipeline overwrites AutoTune configuration file of 1st pipeline.
|
|
"""
|
|
source = [(np.array([x]),) for x in range(1024)]
|
|
|
|
at_final_json_filename = "test_autotune_save_overwrite_generator_atfinal.json"
|
|
original_autotune = ds.config.get_enable_autotune()
|
|
ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename))
|
|
|
|
data1 = ds.GeneratorDataset(source, ["data"])
|
|
|
|
for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
pass
|
|
|
|
ds.config.set_enable_autotune(False)
|
|
|
|
ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename)
|
|
|
|
data2 = ds.GeneratorDataset(source, ["data"])
|
|
data2 = data2.shuffle(64)
|
|
|
|
for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
pass
|
|
|
|
ds.config.set_enable_autotune(original_autotune)
|
|
|
|
@staticmethod
|
|
def test_autotune_mnist_pipeline(tmp_path):
|
|
"""
|
|
Feature: Autotuning
|
|
Description: Test save final config with Mnist pipeline: Mnist -> Batch -> Map
|
|
Expectation: Pipeline runs successfully
|
|
"""
|
|
original_autotune = ds.config.get_enable_autotune()
|
|
ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_mnist_pipeline_atfinal"))
|
|
original_seed = ds.config.get_seed()
|
|
ds.config.set_seed(1)
|
|
|
|
data1 = ds.MnistDataset(MNIST_DATA_DIR, num_samples=100)
|
|
one_hot_encode = transforms.OneHot(10) # num_classes is input argument
|
|
data1 = data1.map(operations=one_hot_encode, input_columns="label")
|
|
|
|
data1 = data1.batch(batch_size=10, drop_remainder=True)
|
|
|
|
ds.serialize(data1, str(tmp_path / "test_autotune_mnist_pipeline_serialized.json"))
|
|
|
|
for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
pass
|
|
|
|
ds.config.set_enable_autotune(original_autotune)
|
|
|
|
# Confirm final AutoTune config file pipeline is identical to the serialized file pipeline.
|
|
file1 = tmp_path / ("test_autotune_mnist_pipeline_atfinal_" + os.environ['RANK_ID'] + ".json")
|
|
file2 = tmp_path / "test_autotune_mnist_pipeline_serialized.json"
|
|
assert data_pipeline_same(file1, file2)
|
|
|
|
desdata1 = ds.deserialize(json_filepath=str(file1))
|
|
desdata2 = ds.deserialize(json_filepath=str(file2))
|
|
|
|
num = 0
|
|
for newdata1, newdata2 in zip(desdata1.create_dict_iterator(num_epochs=1, output_numpy=True),
|
|
desdata2.create_dict_iterator(num_epochs=1, output_numpy=True)):
|
|
np.testing.assert_array_equal(newdata1['image'], newdata2['image'])
|
|
np.testing.assert_array_equal(newdata1['label'], newdata2['label'])
|
|
num += 1
|
|
assert num == 10
|
|
|
|
ds.config.set_seed(original_seed)
|
|
|
|
@staticmethod
|
|
def test_autotune_warning_with_offload(tmp_path, capfd):
|
|
"""
|
|
Feature: Autotuning
|
|
Description: Test autotune config saving with offload=True
|
|
Expectation: Autotune should not write the config file and print a log message
|
|
"""
|
|
original_seed = ds.config.get_seed()
|
|
ds.config.set_seed(1)
|
|
at_final_json_filename = "test_autotune_warning_with_offload_config.json"
|
|
config_path = tmp_path / at_final_json_filename
|
|
original_autotune = ds.config.get_enable_autotune()
|
|
ds.config.set_enable_autotune(True, str(config_path))
|
|
|
|
# Dataset with offload activated.
|
|
dataset = ds.ImageFolderDataset(DATA_DIR, num_samples=8)
|
|
dataset = dataset.map(operations=[vision.Decode()], input_columns="image")
|
|
dataset = dataset.map(operations=[vision.HWC2CHW()], input_columns="image", offload=True)
|
|
dataset = dataset.batch(8, drop_remainder=True)
|
|
|
|
for _ in dataset.create_tuple_iterator(num_epochs=1, output_numpy=True):
|
|
pass
|
|
|
|
_, err = capfd.readouterr()
|
|
|
|
assert "Some nodes have been offloaded. AutoTune is unable to write the autotune configuration to disk. " \
|
|
"Disable offload to prevent this from happening." in err
|
|
|
|
with pytest.raises(FileNotFoundError):
|
|
with open(config_path) as _:
|
|
pass
|
|
|
|
ds.config.set_enable_autotune(original_autotune)
|
|
ds.config.set_seed(original_seed)
|
|
|
|
@staticmethod
|
|
def test_autotune_save_overwrite_mnist(tmp_path):
|
|
"""
|
|
Feature: Autotuning
|
|
Description: Test set_enable_autotune and existing json_filepath is overwritten
|
|
Expectation: set_enable_autotune() executes successfully with file-exist warning produced.
|
|
Execution of 2nd pipeline overwrites AutoTune configuration file of 1st pipeline.
|
|
"""
|
|
original_seed = ds.config.get_seed()
|
|
ds.config.set_seed(1)
|
|
at_final_json_filename = "test_autotune_save_overwrite_mnist_atfinal"
|
|
|
|
# Pipeline#1
|
|
original_autotune = ds.config.get_enable_autotune()
|
|
ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename))
|
|
|
|
data1 = ds.MnistDataset(MNIST_DATA_DIR, num_samples=100)
|
|
one_hot_encode = transforms.OneHot(10) # num_classes is input argument
|
|
data1 = data1.map(operations=one_hot_encode, input_columns="label")
|
|
data1 = data1.batch(batch_size=10, drop_remainder=True)
|
|
|
|
ds.serialize(data1, str(tmp_path / "test_autotune_save_overwrite_mnist_serialized1.json"))
|
|
|
|
for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
pass
|
|
|
|
ds.config.set_enable_autotune(False)
|
|
|
|
# Pipeline#2
|
|
ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename))
|
|
|
|
data1 = ds.MnistDataset(MNIST_DATA_DIR, num_samples=200)
|
|
data1 = data1.map(operations=one_hot_encode, input_columns="label")
|
|
data1 = data1.shuffle(40)
|
|
data1 = data1.batch(batch_size=20, drop_remainder=False)
|
|
|
|
ds.serialize(data1, str(tmp_path / "test_autotune_save_overwrite_mnist_serialized2.json"))
|
|
|
|
for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
|
pass
|
|
|
|
ds.config.set_enable_autotune(False)
|
|
|
|
# Confirm 2nd serialized file is identical to final AutoTune config file.
|
|
file1 = tmp_path / ("test_autotune_save_overwrite_mnist_atfinal_" + os.environ['RANK_ID'] + ".json")
|
|
file2 = tmp_path / "test_autotune_save_overwrite_mnist_serialized2.json"
|
|
assert data_pipeline_same(file1, file2)
|
|
|
|
# Confirm the serialized files for the 2 different pipelines are different
|
|
file1 = tmp_path / "test_autotune_save_overwrite_mnist_serialized1.json"
|
|
file2 = tmp_path / "test_autotune_save_overwrite_mnist_serialized2.json"
|
|
assert not data_pipeline_same(file1, file2)
|
|
|
|
ds.config.set_seed(original_seed)
|
|
ds.config.set_enable_autotune(original_autotune)
|