From 46e223e569f94fe965ce2d2764a1de3eeeb2e759 Mon Sep 17 00:00:00 2001 From: Cathy Wong Date: Fri, 18 Feb 2022 12:38:55 -0500 Subject: [PATCH] [MD] Update set_autotune_enable API to add save filepath --- .../python/bindings/dataset/core/bindings.cc | 5 +- .../minddata/dataset/core/config_manager.cc | 59 +++++- .../minddata/dataset/core/config_manager.h | 19 +- .../minddata/dataset/engine/perf/auto_tune.cc | 3 + .../minddata/dataset/engine/perf/auto_tune.h | 14 +- .../python/mindspore/dataset/core/config.py | 40 +++- tests/ut/python/dataset/test_autotune.py | 29 --- .../ut/python/dataset/test_autotune_config.py | 112 ++++++++++++ .../python/dataset/test_autotune_saveload.py | 172 ++++++++++++++++++ 9 files changed, 411 insertions(+), 42 deletions(-) create mode 100644 tests/ut/python/dataset/test_autotune_config.py create mode 100644 tests/ut/python/dataset/test_autotune_saveload.py diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc index 296627d0f77..3cf77a84ffc 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc @@ -61,7 +61,10 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) { .def("get_enable_shared_mem", &ConfigManager::enable_shared_mem) .def("set_auto_offload", &ConfigManager::set_auto_offload) .def("get_auto_offload", &ConfigManager::get_auto_offload) - .def("set_enable_autotune", &ConfigManager::set_enable_autotune) + .def("set_enable_autotune", + [](ConfigManager &c, bool enable, bool save_autoconfig, std::string json_filepath) { + THROW_IF_ERROR(c.set_enable_autotune(enable, save_autoconfig, json_filepath)); + }) .def("get_enable_autotune", &ConfigManager::enable_autotune) .def("set_autotune_interval", &ConfigManager::set_autotune_interval) .def("get_autotune_interval", &ConfigManager::autotune_interval) diff --git a/mindspore/ccsrc/minddata/dataset/core/config_manager.cc b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc index 86b91fdadfd..a982ef92fd9 100644 --- a/mindspore/ccsrc/minddata/dataset/core/config_manager.cc +++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2021 Huawei Technologies Co., Ltd + * Copyright 2019-2022 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #include "minddata/dataset/core/config_manager.h" +#include #include #include #include @@ -27,6 +28,7 @@ #else #include "mindspore/lite/src/common/log_adapter.h" #endif +#include "minddata/dataset/util/status.h" #include "minddata/dataset/util/system_pool.h" #include "utils/ms_utils.h" @@ -53,7 +55,9 @@ ConfigManager::ConfigManager() enable_shared_mem_(true), auto_offload_(false), enable_autotune_(false), + save_autoconfig_(false), autotune_interval_(kCfgAutoTuneInterval) { + autotune_json_filepath_ = kEmptyString; num_cpu_threads_ = num_cpu_threads_ > 0 ? num_cpu_threads_ : std::numeric_limits::max(); num_parallel_workers_ = num_parallel_workers_ < num_cpu_threads_ ? num_parallel_workers_ : num_cpu_threads_; std::string env_cache_host = common::GetEnv("MS_CACHE_HOST"); @@ -126,7 +130,7 @@ Status ConfigManager::set_num_parallel_workers(int32_t num_parallel_workers) { if (num_parallel_workers > num_cpu_threads_ || num_parallel_workers < 1) { std::string err_msg = "Invalid Parameter, num_parallel_workers exceeds the boundary between 1 and " + std::to_string(num_cpu_threads_) + ", as got " + std::to_string(num_parallel_workers) + "."; - RETURN_STATUS_UNEXPECTED(err_msg); + LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } num_parallel_workers_ = num_parallel_workers; return Status::OK(); @@ -162,5 +166,56 @@ void ConfigManager::set_num_connections(int32_t num_connections) { num_connectio void ConfigManager::set_cache_prefetch_size(int32_t cache_prefetch_size) { cache_prefetch_size_ = cache_prefetch_size; } +Status ConfigManager::set_enable_autotune(bool enable, bool save_autoconfig, const std::string &json_filepath) { + enable_autotune_ = enable; + save_autoconfig_ = save_autoconfig; + + // Check if not requested to save AutoTune config + if (!save_autoconfig_) { + // No need for further processing, like process json_filepath input + return Status::OK(); + } + + Path jsonpath(json_filepath); + + if (jsonpath.IsDirectory()) { + std::string err_msg = "Invalid json_filepath parameter. <" + json_filepath + "> is a directory, not filename."; + LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + + std::string parent_path = jsonpath.ParentPath(); + if (parent_path != "") { + if (!Path(parent_path).Exists()) { + std::string err_msg = "Invalid json_filepath parameter. Directory <" + parent_path + "> does not exist."; + LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + } else { + // Set parent_path to current working directory + parent_path = "."; + } + + std::string real_path; + if (Path::RealPath(parent_path, real_path).IsError()) { + std::string err_msg = "Invalid json_filepath parameter. Cannot get real json_filepath <" + real_path + ">."; + LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + + if (access(real_path.c_str(), W_OK) == -1) { + std::string err_msg = "Invalid json_filepath parameter. No access to write to <" + real_path + ">."; + LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + + if (jsonpath.Exists()) { + // Note: Allow file to be overwritten (like serialize) + std::string err_msg = "Invalid json_filepath parameter. File: <" + json_filepath + "> already exists." + + " File will be overwritten with the AutoTuned data pipeline configuration."; + MS_LOG(WARNING) << err_msg; + } + + // Save the final AutoTune configuration JSON filepath name + autotune_json_filepath_ = std::move(json_filepath); + return Status::OK(); +} + } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/core/config_manager.h b/mindspore/ccsrc/minddata/dataset/core/config_manager.h index b8cee901db6..234459aae1a 100644 --- a/mindspore/ccsrc/minddata/dataset/core/config_manager.h +++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.h @@ -1,5 +1,5 @@ /** - * Copyright 2019-2021 Huawei Technologies Co., Ltd + * Copyright 2019-2022 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,6 +36,8 @@ namespace mindspore { namespace dataset { +const char kEmptyString[] = ""; +const char kJsonExtension[] = ".json"; // The ConfigManager is a class for managing default values. When a user is constructing any objects // in the framework, often they may choose to omit some settings instead of overriding them. // This class manages some of the default values, for cases when the user does not manually specify @@ -232,12 +234,23 @@ class ConfigManager { // setter function // @param enable - To enable autotune - void set_enable_autotune(bool enable) { enable_autotune_ = enable; } + // @param bool save_autoconfig - True if should save AutoTune data pipeline configuration + // @param json_filepath - JSON filepath where the final AutoTune data pipeline will be generated + // @return Status error code + Status set_enable_autotune(bool enable, bool save_autoconfig, const std::string &json_filepath); // getter function // @return - Flag to indicate whether autotune is enabled bool enable_autotune() const { return enable_autotune_; } + // getter function + // @return - Flag to indicate whether to save AutoTune configuration + bool save_autoconfig() { return save_autoconfig_; } + + // getter function + // @return - The final AutoTune configuration JSON filepath + std::string get_autotune_json_filepath() { return autotune_json_filepath_; } + // getter function // @return - autotune interval in steps int64_t autotune_interval() const { return autotune_interval_; } @@ -270,6 +283,8 @@ class ConfigManager { bool enable_shared_mem_; bool auto_offload_; bool enable_autotune_; + bool save_autoconfig_; // True if should save AutoTune configuration + std::string autotune_json_filepath_; // Filepath name of the final AutoTune Configuration JSON file int64_t autotune_interval_; // Private helper function that takes a nlohmann json format and populates the settings // @param j - The json nlohmann json info diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/auto_tune.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/auto_tune.cc index 1d5ca1158b7..0704f1ab740 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/perf/auto_tune.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/perf/auto_tune.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #ifndef ENABLE_ANDROID #include "minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.h" @@ -39,6 +40,8 @@ AutoTune::AutoTune(TreeAdapter *tree_adap, ProfilingManager *profiling_mgr) tree_modifier_ = std::make_unique(tree_adapter_); max_workers_ = GlobalContext::config_manager()->num_cpu_threads(); step_gap_ = GlobalContext::config_manager()->autotune_interval(); + save_autoconfig_ = GlobalContext::config_manager()->save_autoconfig(); + autotune_json_filepath_ = GlobalContext::config_manager()->get_autotune_json_filepath(); } Status AutoTune::Main() { diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/auto_tune.h b/mindspore/ccsrc/minddata/dataset/engine/perf/auto_tune.h index f4add57b142..3246189863b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/perf/auto_tune.h +++ b/mindspore/ccsrc/minddata/dataset/engine/perf/auto_tune.h @@ -1,5 +1,5 @@ /** - * Copyright 2021 Huawei Technologies Co., Ltd + * Copyright 2021-2022 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,12 +14,13 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_AUTO_TUNE_H_ -#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_AUTO_TUNE_H_ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PERF_AUTO_TUNE_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PERF_AUTO_TUNE_H_ #include #include #include +#include #include #include "minddata/dataset/util/status.h" #include "minddata/dataset/util/log_adapter.h" @@ -191,7 +192,12 @@ class AutoTune { int64_t step_gap_; int32_t last_step_profiled_; bool skip_bool_; + /// True if should save AutoTune configuration + bool save_autoconfig_; + + /// Filepath name of the final AutoTune Configuration JSON file + std::string autotune_json_filepath_; }; } // namespace dataset } // namespace mindspore -#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_AUTO_TUNE_H_ +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PERF_AUTO_TUNE_H_ diff --git a/mindspore/python/mindspore/dataset/core/config.py b/mindspore/python/mindspore/dataset/core/config.py index 1286751b13a..ee8a7ec58b8 100644 --- a/mindspore/python/mindspore/dataset/core/config.py +++ b/mindspore/python/mindspore/dataset/core/config.py @@ -28,6 +28,7 @@ import random import numpy import mindspore._c_dataengine as cde from mindspore import log as logger +from .validator_helpers import replace_none __all__ = ['set_seed', 'get_seed', 'set_prefetch_size', 'get_prefetch_size', 'set_num_parallel_workers', 'get_num_parallel_workers', 'set_numa_enable', 'get_numa_enable', 'set_monitor_sampling_interval', @@ -421,24 +422,55 @@ def load(file): _config.load(file) -def set_enable_autotune(enable): +def set_enable_autotune(enable, json_filepath=None): """ - Set the default state of AutoTune flag. If it is True, will facilitate users to improve - performance for a given workload by automatically finding the better settings for data pipeline. + Set the default state of AutoTune flag. If it is True, will facilitate users to improve the + performance for a given workload by automatically finding better settings for data pipeline. + Optionally save the AutoTuned data pipeline configuration to a JSON file, which + can be loaded with deserialize(). Args: enable (bool): Whether to use AutoTune feature when running data pipeline. + json_filepath (str, optional): The filepath where the AutoTuned data pipeline + configuration will be generated as a JSON file. If the file already exists, + it will be overwritten. If no AutoTuned data pipeline configuration is desired, + then set json_filepath to None (Default=None). Raises: TypeError: If enable is not a boolean data type. + TypeError: If json_filepath is not a str value. + RuntimeError: If the value of json_filepath is the empty string. + RuntimeError: If json_filepath a directory. + RuntimeError: If parent path for json_filepath does not exist. + RuntimeError: If parent path for json_filepath does not have write permission. + + Note: + When using enable is False, the value of json_filepath is ignored. Examples: + >>> # Enable AutoTune and save AutoTuned data pipeline configuration + >>> ds.config.set_enable_autotune(True, "/path/to/autotune_out.json") + >>> >>> # Enable AutoTune >>> ds.config.set_enable_autotune(True) """ if not isinstance(enable, bool): raise TypeError("enable must be of type bool.") - _config.set_enable_autotune(enable) + + save_autoconfig = bool(enable and json_filepath is not None) + + if json_filepath and not isinstance(json_filepath, str): + raise TypeError("json_filepath must be a str value but was: {}.".format(json_filepath)) + + if enable and json_filepath == "": + raise RuntimeError("The value of json_filepath cannot be the empty string.") + + if not enable and json_filepath is not None: + logger.warning("The value of json_filepath is ignored when enable is False.") + + json_filepath = replace_none(json_filepath, "") + + _config.set_enable_autotune(enable, save_autoconfig, json_filepath) def get_enable_autotune(): diff --git a/tests/ut/python/dataset/test_autotune.py b/tests/ut/python/dataset/test_autotune.py index 1136795b192..75a61708b35 100644 --- a/tests/ut/python/dataset/test_autotune.py +++ b/tests/ut/python/dataset/test_autotune.py @@ -203,32 +203,3 @@ class TestAutotuneWithProfiler: pass ds.config.set_enable_autotune(False) - - def test_autotune_config(self): - """ - Feature: Autotuning - Description: test basic config of autotune - Expectation: config can be set successfully - """ - autotune_state = ds.config.get_enable_autotune() - assert autotune_state is False - - ds.config.set_enable_autotune(False) - autotune_state = ds.config.get_enable_autotune() - assert autotune_state is False - - with pytest.raises(TypeError): - ds.config.set_enable_autotune(1) - - autotune_interval = ds.config.get_autotune_interval() - assert autotune_interval == 0 - - ds.config.set_autotune_interval(200) - autotune_interval = ds.config.get_autotune_interval() - assert autotune_interval == 200 - - with pytest.raises(TypeError): - ds.config.set_autotune_interval(20.012) - - with pytest.raises(ValueError): - ds.config.set_autotune_interval(-999) diff --git a/tests/ut/python/dataset/test_autotune_config.py b/tests/ut/python/dataset/test_autotune_config.py new file mode 100644 index 00000000000..b9df6868fc6 --- /dev/null +++ b/tests/ut/python/dataset/test_autotune_config.py @@ -0,0 +1,112 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""" +Test Dataset AutoTune Configuration Support +""" +import pytest +import mindspore.dataset as ds + + +@pytest.mark.forked +class TestAutotuneConfig: + @staticmethod + def test_autotune_config_basic(): + """ + Feature: Autotuning + Description: Test basic config of AutoTune + Expectation: Config can be set successfully + """ + autotune_state = ds.config.get_enable_autotune() + assert autotune_state is False + + ds.config.set_enable_autotune(False) + autotune_state = ds.config.get_enable_autotune() + assert autotune_state is False + + with pytest.raises(TypeError): + ds.config.set_enable_autotune(1) + + autotune_interval = ds.config.get_autotune_interval() + assert autotune_interval == 0 + + ds.config.set_autotune_interval(200) + autotune_interval = ds.config.get_autotune_interval() + assert autotune_interval == 200 + + with pytest.raises(TypeError): + ds.config.set_autotune_interval(20.012) + + with pytest.raises(ValueError): + ds.config.set_autotune_interval(-999) + + @staticmethod + def test_autotune_config_filepath_invalid(): + """ + Feature: Autotuning + Description: Test set_enable_autotune() with invalid json_filepath + Expectation: Invalid input is detected + """ + with pytest.raises(TypeError): + ds.config.set_enable_autotune(True, 123) + + with pytest.raises(TypeError): + ds.config.set_enable_autotune(True, 0) + + with pytest.raises(TypeError): + ds.config.set_enable_autotune(True, True) + + with pytest.raises(TypeError): + ds.config.set_enable_autotune(False, 1.1) + + with pytest.raises(RuntimeError) as error_info: + ds.config.set_enable_autotune(True, "") + assert "cannot be the empty string" in str(error_info.value) + + with pytest.raises(RuntimeError) as error_info: + ds.config.set_enable_autotune(True, "/tmp") + assert "is a directory" in str(error_info.value) + + with pytest.raises(RuntimeError) as error_info: + ds.config.set_enable_autotune(True, ".") + assert "is a directory" in str(error_info.value) + + with pytest.raises(RuntimeError) as error_info: + ds.config.set_enable_autotune(True, "/JUNKPATH/at_out.json") + assert "Directory" in str(error_info.value) + assert "does not exist" in str(error_info.value) + + + @staticmethod + def test_autotune_config_filepath_success(): + """ + Feature: Autotuning + Description: Test set_enable_autotune() with valid filepath input + Expectation: set_enable_autotune() executes successfully + """ + # Note: No problem to have sequential calls to set_enable_autotune() + ds.config.set_enable_autotune(True, "file1.json") + ds.config.set_enable_autotune(True, "file1.json") + ds.config.set_enable_autotune(True, "file2.json") + + # Note: It is permissible to not have preferred '.json' extension for json_filepath + ds.config.set_enable_autotune(True, "at_out.JSON") + ds.config.set_enable_autotune(True, "/tmp/at_out.txt") + ds.config.set_enable_autotune(True, "at_out") + + # Note: When enable is false, the json_filepath parameter is ignored + ds.config.set_enable_autotune(False, "/NONEXISTDIR/junk.json") + ds.config.set_enable_autotune(False, "") + + ds.config.set_enable_autotune(False, None) diff --git a/tests/ut/python/dataset/test_autotune_saveload.py b/tests/ut/python/dataset/test_autotune_saveload.py new file mode 100644 index 00000000000..aaf087e91cb --- /dev/null +++ b/tests/ut/python/dataset/test_autotune_saveload.py @@ -0,0 +1,172 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""" +Test Dataset AutoTune's Save and Load Configuration support +""" +import filecmp +import numpy as np +import pytest +import mindspore.dataset as ds +import mindspore.dataset.transforms.c_transforms as c_transforms + +MNIST_DATA_DIR = "../data/dataset/testMnistData" + + +@pytest.mark.forked +class TestAutotuneSaveLoad: + # Note: Use pytest fixture tmp_path to create files within this temporary directory, + # which is automatically created for each test and deleted at the end of the test. + + @staticmethod + def test_autotune_generator_pipeline(tmp_path): + """ + Feature: Autotuning + Description: Test save final config with GeneratorDataset pipeline: Generator -> Shuffle -> Batch + Expectation: pipeline runs successfully + """ + ds.config.set_enable_autotune(True, str(tmp_path) + "test_autotune_generator_atfinal.json") + + source = [(np.array([x]),) for x in range(1024)] + data1 = ds.GeneratorDataset(source, ["data"]) + data1 = data1.shuffle(64) + data1 = data1.batch(32) + + ds.serialize(data1, str(tmp_path) + "test_autotune_generator_serialized.json") + + itr = data1.create_dict_iterator(num_epochs=5) + for _ in range(5): + for _ in itr: + pass + + ds.config.set_enable_autotune(False) + + @staticmethod + def skip_test_autotune_mnist_pipeline(tmp_path): + """ + Feature: Autotuning + Description: Test save final config with Mnist pipeline: Mnist -> Batch -> Map + Expectation: pipeline runs successfully + """ + ds.config.set_enable_autotune(True, str(tmp_path) + "test_autotune_mnist_pipeline_atfinal.json") + + ds.config.set_seed(1) + + data1 = ds.MnistDataset(MNIST_DATA_DIR, num_samples=100) + one_hot_encode = c_transforms.OneHot(10) # num_classes is input argument + data1 = data1.map(operations=one_hot_encode, input_columns="label") + + data1 = data1.batch(batch_size=10, drop_remainder=True) + + ds.serialize(data1, str(tmp_path) + "test_autotune_mnist_pipeline_serialized.json") + + for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): + pass + + ds.config.set_enable_autotune(False) + + # Confirm final AutoTune config file is identical to the serialized file. + assert filecmp.cmp(str(tmp_path) + "test_autotune_mnist_pipeline_atfinal.json", + str(tmp_path) + "test_autotune_mnist_pipeline_serialized.json") + + desdata1 = ds.deserialize(json_filepath=str(tmp_path) + "test_autotune_mnist_pipeline_atfinal.json") + desdata2 = ds.deserialize(json_filepath=str(tmp_path) + "test_autotune_mnist_pipeline_serialized.json") + + num = 0 + for newdata1, newdata2 in zip(desdata1.create_dict_iterator(num_epochs=1, output_numpy=True), + desdata2.create_dict_iterator(num_epochs=1, output_numpy=True)): + np.testing.assert_array_equal(newdata1['image'], newdata2['image']) + np.testing.assert_array_equal(newdata1['label'], newdata2['label']) + num += 1 + assert num == 10 + + @staticmethod + def test_autotune_save_overwrite_generator(tmp_path): + """ + Feature: Autotuning + Description: Test set_enable_autotune and existing json_filepath is overwritten + Expectation: set_enable_autotune() executes successfully with file-exist warning produced. + Execution of 2nd pipeline overwrites AutoTune configuration file of 1st pipeline. + """ + source = [(np.array([x]),) for x in range(1024)] + + at_final_json_filename = "test_autotune_save_overwrite_generator_atfinal.json" + + ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename) + + data1 = ds.GeneratorDataset(source, ["data"]) + + for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): + pass + + ds.config.set_enable_autotune(False) + + ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename) + + data2 = ds.GeneratorDataset(source, ["data"]) + data2 = data2.shuffle(64) + + for _ in data2.create_dict_iterator(num_epochs=1, output_numpy=True): + pass + + ds.config.set_enable_autotune(False) + + @staticmethod + def skip_test_autotune_save_overwrite_mnist(tmp_path): + """ + Feature: Autotuning + Description: Test set_enable_autotune and existing json_filepath is overwritten + Expectation: set_enable_autotune() executes successfully with file-exist warning produced. + Execution of 2nd pipeline overwrites AutoTune configuration file of 1st pipeline. + """ + ds.config.set_seed(1) + at_final_json_filename = "test_autotune_save_overwrite_mnist_atfinal.json" + + # Pipeline#1 + ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename) + + data1 = ds.MnistDataset(MNIST_DATA_DIR, num_samples=100) + one_hot_encode = c_transforms.OneHot(10) # num_classes is input argument + data1 = data1.map(operations=one_hot_encode, input_columns="label") + data1 = data1.batch(batch_size=10, drop_remainder=True) + + ds.serialize(data1, str(tmp_path) + "test_autotune_save_overwrite_mnist_serialized1.json") + + for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): + pass + + ds.config.set_enable_autotune(False) + + # Pipeline#2 + ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename) + + data1 = ds.MnistDataset(MNIST_DATA_DIR, num_samples=200) + data1 = data1.map(operations=one_hot_encode, input_columns="label") + data1 = data1.shuffle(40) + data1 = data1.batch(batch_size=20, drop_remainder=False) + + ds.serialize(data1, str(tmp_path) + "test_autotune_save_overwrite_mnist_serialized2.json") + + for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): + pass + + ds.config.set_enable_autotune(False) + + # Confirm 2nd serialized file is identical to final AutoTune config file. + assert filecmp.cmp(str(tmp_path) + "test_autotune_save_overwrite_mnist_atfinal.json", + str(tmp_path) + "test_autotune_save_overwrite_mnist_serialized2.json") + + # Confirm the serialized files for the 2 different pipelines are different + assert not filecmp.cmp(str(tmp_path) + "test_autotune_save_overwrite_mnist_serialized1.json", + str(tmp_path) + "test_autotune_save_overwrite_mnist_serialized2.json")