forked from mindspore-Ecosystem/mindspore
modify the at config file
This commit is contained in:
parent
22240df6c0
commit
accc32da6c
|
@ -22,6 +22,8 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#ifndef ENABLE_ANDROID
|
||||
#include "minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.h"
|
||||
#include "minddata/dataset/engine/serdes.h"
|
||||
|
@ -118,7 +120,13 @@ Status AutoTune::SaveAutotuneConfig(const std::string &file_name) {
|
|||
// The Execution Tree is built by visiting the optimized IR Tree in DFS order.
|
||||
// So we visit the optimized IR tree in DFS order and try to match each IR node with its corresponding dataset op.
|
||||
RETURN_IF_NOT_OK(Serdes::UpdateOptimizedIRTreeJSON(&autotune_config_json_, ops_));
|
||||
RETURN_IF_NOT_OK(Serdes::SaveJSONToFile(autotune_config_json_, file_name));
|
||||
std::vector<std::string> summary;
|
||||
RETURN_IF_NOT_OK(SummarizeTreeConfiguration(&summary));
|
||||
nlohmann::json out_json;
|
||||
out_json["summary"] = summary;
|
||||
out_json["pipeline"] = autotune_config_json_;
|
||||
out_json["remark"] = "The following file has been auto-generated by the Dataset Autotune.";
|
||||
RETURN_IF_NOT_OK(Serdes::SaveJSONToFile(out_json, file_name, true));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -139,6 +147,23 @@ Status AutoTune::SetAutotuneConfigJson() {
|
|||
}
|
||||
#endif
|
||||
|
||||
Status AutoTune::SummarizeTreeConfiguration(std::vector<std::string> *out) {
|
||||
constexpr int op_name_width = 20;
|
||||
constexpr int val_width = 2;
|
||||
auto num_ops = ops_.size();
|
||||
for (int i = 0; i < num_ops; ++i) {
|
||||
const auto op = ops_[i];
|
||||
if (!op->inlined() && op->Name() != "DeviceQueueOp") {
|
||||
std::stringstream s;
|
||||
s << std::left << std::setw(op_name_width) << op->NameWithID() << "(num_parallel_workers:" << std::right
|
||||
<< std::setw(val_width) << op->NumWorkers() << ", connector_queue_size:" << std::setw(val_width)
|
||||
<< op->ConnectorCapacity() << ")";
|
||||
(void)out->emplace_back(s.str());
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void AutoTune::PrintTreeConfiguration() const {
|
||||
ExecutionTree const *tree = tree_adapter_->tree_.get();
|
||||
for (auto itr = tree->begin(); itr != tree->end(); itr++) {
|
||||
|
|
|
@ -53,6 +53,11 @@ class AutoTune {
|
|||
/// \brief Helper to print the tree configuration
|
||||
void PrintTreeConfiguration() const;
|
||||
|
||||
/// \brief Helper to summarize the execution tree
|
||||
/// \param[out] out An output vector of string to store the summary
|
||||
/// \return Status object
|
||||
Status SummarizeTreeConfiguration(std::vector<std::string> *out);
|
||||
|
||||
#ifndef ENABLE_ANDROID
|
||||
/// \brief Serialize the dataset and save the AT config (workers and queue size) to a json file
|
||||
/// \param file_name Name of the file
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
#include <fstream>
|
||||
#include <stack>
|
||||
#include <iomanip>
|
||||
#include "minddata/dataset/engine/serdes.h"
|
||||
|
||||
#include "minddata/dataset/core/pybind_support.h"
|
||||
|
@ -62,7 +63,8 @@ Status Serdes::SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Serdes::SaveJSONToFile(const nlohmann::json &json_string, const std::string &file_name) {
|
||||
Status Serdes::SaveJSONToFile(const nlohmann::json &json_string, const std::string &file_name, bool pretty) {
|
||||
constexpr int field_width = 4;
|
||||
try {
|
||||
std::optional<std::string> dir = "";
|
||||
std::optional<std::string> local_file_name = "";
|
||||
|
@ -80,7 +82,10 @@ Status Serdes::SaveJSONToFile(const nlohmann::json &json_string, const std::stri
|
|||
FileUtils::ConcatDirAndFileName(&realpath, &local_file_name, &whole_path);
|
||||
|
||||
std::ofstream file(whole_path.value());
|
||||
file << json_string;
|
||||
if (pretty) {
|
||||
file << std::setw(field_width);
|
||||
}
|
||||
file << json_string << std::endl;
|
||||
file.close();
|
||||
|
||||
ChangeFileMode(whole_path.value(), S_IRUSR | S_IWUSR);
|
||||
|
@ -104,6 +109,10 @@ Status Serdes::Deserialize(const std::string &json_filepath, std::shared_ptr<Dat
|
|||
"Invalid file, failed to parse json file: " + json_filepath + ", error message: " + e.what());
|
||||
}
|
||||
json_in.close();
|
||||
// Handle config generated by dataset autotune
|
||||
if (json_obj.find("pipeline") != json_obj.end()) {
|
||||
json_obj = json_obj["pipeline"];
|
||||
}
|
||||
RETURN_IF_NOT_OK(ConstructPipeline(json_obj, ds));
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -195,8 +195,9 @@ class Serdes {
|
|||
/// \brief Helper function to save JSON to a file
|
||||
/// \param[in] json_string The JSON string to be saved to the file
|
||||
/// \param[in] file_name The file name
|
||||
/// \param[in] pretty Flag to control pretty printing of JSON string to the file
|
||||
/// \return Status The status code returned
|
||||
static Status SaveJSONToFile(const nlohmann::json &json_string, const std::string &file_name);
|
||||
static Status SaveJSONToFile(const nlohmann::json &json_string, const std::string &file_name, bool pretty = false);
|
||||
|
||||
protected:
|
||||
/// \brief Function to determine type of the node - dataset node if no dataset exists or operation node
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
"""
|
||||
Test Dataset AutoTune's Save and Load Configuration support
|
||||
"""
|
||||
import filecmp
|
||||
import json
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
@ -27,6 +27,17 @@ MNIST_DATA_DIR = "../data/dataset/testMnistData"
|
|||
DATA_DIR = "../data/dataset/testPK/data"
|
||||
|
||||
|
||||
def data_pipeline_same(file1, file2):
|
||||
assert file1.exists()
|
||||
assert file2.exists()
|
||||
with file1.open() as f1, file2.open() as f2:
|
||||
pipeline1 = json.load(f1)
|
||||
pipeline1 = pipeline1["pipeline"] if "pipeline" in pipeline1 else pipeline1
|
||||
pipeline2 = json.load(f2)
|
||||
pipeline2 = pipeline2["pipeline"] if "pipeline" in pipeline2 else pipeline2
|
||||
return pipeline1 == pipeline2
|
||||
|
||||
|
||||
@pytest.mark.forked
|
||||
class TestAutotuneSaveLoad:
|
||||
# Note: Use pytest fixture tmp_path to create files within this temporary directory,
|
||||
|
@ -40,14 +51,14 @@ class TestAutotuneSaveLoad:
|
|||
Expectation: pipeline runs successfully
|
||||
"""
|
||||
original_autotune = ds.config.get_enable_autotune()
|
||||
ds.config.set_enable_autotune(True, str(tmp_path) + "test_autotune_generator_atfinal.json")
|
||||
ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_generator_atfinal.json"))
|
||||
|
||||
source = [(np.array([x]),) for x in range(1024)]
|
||||
data1 = ds.GeneratorDataset(source, ["data"])
|
||||
data1 = data1.shuffle(64)
|
||||
data1 = data1.batch(32)
|
||||
|
||||
ds.serialize(data1, str(tmp_path) + "test_autotune_generator_serialized.json")
|
||||
ds.serialize(data1, str(tmp_path / "test_autotune_generator_serialized.json"))
|
||||
|
||||
itr = data1.create_dict_iterator(num_epochs=5)
|
||||
for _ in range(5):
|
||||
|
@ -64,7 +75,7 @@ class TestAutotuneSaveLoad:
|
|||
Expectation: pipeline runs successfully
|
||||
"""
|
||||
original_autotune = ds.config.get_enable_autotune()
|
||||
ds.config.set_enable_autotune(True, str(tmp_path) + "test_autotune_mnist_pipeline_atfinal.json")
|
||||
ds.config.set_enable_autotune(True, str(tmp_path / "test_autotune_mnist_pipeline_atfinal.json"))
|
||||
original_seed = ds.config.get_seed()
|
||||
ds.config.set_seed(1)
|
||||
|
||||
|
@ -74,19 +85,20 @@ class TestAutotuneSaveLoad:
|
|||
|
||||
data1 = data1.batch(batch_size=10, drop_remainder=True)
|
||||
|
||||
ds.serialize(data1, str(tmp_path) + "test_autotune_mnist_pipeline_serialized.json")
|
||||
ds.serialize(data1, str(tmp_path / "test_autotune_mnist_pipeline_serialized.json"))
|
||||
|
||||
for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
pass
|
||||
|
||||
ds.config.set_enable_autotune(original_autotune)
|
||||
|
||||
# Confirm final AutoTune config file is identical to the serialized file.
|
||||
assert filecmp.cmp(str(tmp_path) + "test_autotune_mnist_pipeline_atfinal.json",
|
||||
str(tmp_path) + "test_autotune_mnist_pipeline_serialized.json")
|
||||
# Confirm final AutoTune config file pipeline is identical to the serialized file pipeline.
|
||||
file1 = tmp_path / "test_autotune_mnist_pipeline_atfinal.json"
|
||||
file2 = tmp_path / "test_autotune_mnist_pipeline_serialized.json"
|
||||
assert data_pipeline_same(file1, file2)
|
||||
|
||||
desdata1 = ds.deserialize(json_filepath=str(tmp_path) + "test_autotune_mnist_pipeline_atfinal.json")
|
||||
desdata2 = ds.deserialize(json_filepath=str(tmp_path) + "test_autotune_mnist_pipeline_serialized.json")
|
||||
desdata1 = ds.deserialize(json_filepath=str(tmp_path / "test_autotune_mnist_pipeline_atfinal.json"))
|
||||
desdata2 = ds.deserialize(json_filepath=str(tmp_path / "test_autotune_mnist_pipeline_serialized.json"))
|
||||
|
||||
num = 0
|
||||
for newdata1, newdata2 in zip(desdata1.create_dict_iterator(num_epochs=1, output_numpy=True),
|
||||
|
@ -110,7 +122,7 @@ class TestAutotuneSaveLoad:
|
|||
|
||||
at_final_json_filename = "test_autotune_save_overwrite_generator_atfinal.json"
|
||||
original_autotune = ds.config.get_enable_autotune()
|
||||
ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename)
|
||||
ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename))
|
||||
|
||||
data1 = ds.GeneratorDataset(source, ["data"])
|
||||
|
||||
|
@ -143,14 +155,14 @@ class TestAutotuneSaveLoad:
|
|||
|
||||
# Pipeline#1
|
||||
original_autotune = ds.config.get_enable_autotune()
|
||||
ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename)
|
||||
ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename))
|
||||
|
||||
data1 = ds.MnistDataset(MNIST_DATA_DIR, num_samples=100)
|
||||
one_hot_encode = c_transforms.OneHot(10) # num_classes is input argument
|
||||
data1 = data1.map(operations=one_hot_encode, input_columns="label")
|
||||
data1 = data1.batch(batch_size=10, drop_remainder=True)
|
||||
|
||||
ds.serialize(data1, str(tmp_path) + "test_autotune_save_overwrite_mnist_serialized1.json")
|
||||
ds.serialize(data1, str(tmp_path / "test_autotune_save_overwrite_mnist_serialized1.json"))
|
||||
|
||||
for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
pass
|
||||
|
@ -158,14 +170,14 @@ class TestAutotuneSaveLoad:
|
|||
ds.config.set_enable_autotune(False)
|
||||
|
||||
# Pipeline#2
|
||||
ds.config.set_enable_autotune(True, str(tmp_path) + at_final_json_filename)
|
||||
ds.config.set_enable_autotune(True, str(tmp_path / at_final_json_filename))
|
||||
|
||||
data1 = ds.MnistDataset(MNIST_DATA_DIR, num_samples=200)
|
||||
data1 = data1.map(operations=one_hot_encode, input_columns="label")
|
||||
data1 = data1.shuffle(40)
|
||||
data1 = data1.batch(batch_size=20, drop_remainder=False)
|
||||
|
||||
ds.serialize(data1, str(tmp_path) + "test_autotune_save_overwrite_mnist_serialized2.json")
|
||||
ds.serialize(data1, str(tmp_path / "test_autotune_save_overwrite_mnist_serialized2.json"))
|
||||
|
||||
for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
pass
|
||||
|
@ -173,12 +185,14 @@ class TestAutotuneSaveLoad:
|
|||
ds.config.set_enable_autotune(False)
|
||||
|
||||
# Confirm 2nd serialized file is identical to final AutoTune config file.
|
||||
assert filecmp.cmp(str(tmp_path) + "test_autotune_save_overwrite_mnist_atfinal.json",
|
||||
str(tmp_path) + "test_autotune_save_overwrite_mnist_serialized2.json")
|
||||
file1 = tmp_path / "test_autotune_save_overwrite_mnist_atfinal.json"
|
||||
file2 = tmp_path / "test_autotune_save_overwrite_mnist_serialized2.json"
|
||||
assert data_pipeline_same(file1, file2)
|
||||
|
||||
# Confirm the serialized files for the 2 different pipelines are different
|
||||
assert not filecmp.cmp(str(tmp_path) + "test_autotune_save_overwrite_mnist_serialized1.json",
|
||||
str(tmp_path) + "test_autotune_save_overwrite_mnist_serialized2.json")
|
||||
file1 = tmp_path / "test_autotune_save_overwrite_mnist_serialized1.json"
|
||||
file2 = tmp_path / "test_autotune_save_overwrite_mnist_serialized2.json"
|
||||
assert not data_pipeline_same(file1, file2)
|
||||
|
||||
ds.config.set_seed(original_seed)
|
||||
ds.config.set_enable_autotune(original_autotune)
|
||||
|
|
Loading…
Reference in New Issue