From 1a6a42c08308e64dd42af0ef5609d2c89f493925 Mon Sep 17 00:00:00 2001 From: luoyang Date: Fri, 26 Nov 2021 15:59:31 +0800 Subject: [PATCH] Update minddata log message --- .../minddata/dataset/engine/data_schema.cc | 31 +++++----- .../engine/ir/datasetops/batch_node.cc | 13 ++-- .../engine/ir/datasetops/concat_node.cc | 14 ++--- .../engine/ir/datasetops/dataset_node.cc | 15 ++--- .../engine/ir/datasetops/dataset_node.h | 5 +- .../dataset/engine/ir/datasetops/map_node.cc | 21 +++---- .../engine/ir/datasetops/project_node.cc | 6 +- .../engine/ir/datasetops/rename_node.cc | 10 +-- .../engine/ir/datasetops/repeat_node.cc | 6 +- .../engine/ir/datasetops/shuffle_node.cc | 6 +- .../dataset/engine/ir/datasetops/skip_node.cc | 4 +- .../ir/datasetops/source/ag_news_node.cc | 17 ++---- .../engine/ir/datasetops/source/album_node.cc | 21 +++---- .../ir/datasetops/source/celeba_node.cc | 30 ++++----- .../ir/datasetops/source/cifar100_node.cc | 15 +++-- .../ir/datasetops/source/cifar10_node.cc | 15 +++-- .../engine/ir/datasetops/source/clue_node.cc | 41 +++++-------- .../engine/ir/datasetops/source/coco_node.cc | 36 ++++------- .../engine/ir/datasetops/source/csv_node.cc | 36 +++++------ .../ir/datasetops/source/dbpedia_node.cc | 13 ++-- .../ir/datasetops/source/fake_image_node.cc | 24 +++----- .../ir/datasetops/source/flickr_node.cc | 18 +++--- .../ir/datasetops/source/image_folder_node.cc | 19 +++--- .../ir/datasetops/source/manifest_node.cc | 33 ++++------ .../ir/datasetops/source/minddata_node.cc | 29 +++++---- .../engine/ir/datasetops/source/mnist_node.cc | 15 +++-- .../ir/datasetops/source/qmnist_node.cc | 18 +++--- .../ir/datasetops/source/random_node.cc | 14 ++--- .../source/samplers/distributed_sampler_ir.cc | 14 ++--- .../source/samplers/mindrecord_sampler_ir.cc | 2 +- .../source/samplers/pk_sampler_ir.cc | 6 +- .../source/samplers/prebuilt_sampler_ir.cc | 8 ++- .../source/samplers/random_sampler_ir.cc | 9 +-- .../datasetops/source/samplers/samplers_ir.h | 1 + .../source/samplers/sequential_sampler_ir.cc | 7 ++- .../samplers/subset_random_sampler_ir.cc | 6 +- .../source/samplers/subset_sampler_ir.cc | 6 +- .../samplers/weighted_random_sampler_ir.cc | 15 +++-- .../engine/ir/datasetops/source/sbu_node.cc | 10 +-- .../ir/datasetops/source/text_file_node.cc | 31 ++++------ .../ir/datasetops/source/tf_record_node.cc | 61 ++++--------------- .../engine/ir/datasetops/source/usps_node.cc | 13 ++-- .../engine/ir/datasetops/source/voc_node.cc | 37 +++++------ .../dataset/engine/ir/datasetops/take_node.cc | 4 +- .../engine/ir/datasetops/transfer_node.cc | 20 +++--- .../dataset/engine/python_runtime_context.cc | 6 +- .../dataset/engine/runtime_context.cc | 6 +- .../ccsrc/minddata/dataset/engine/serdes.cc | 12 ++-- .../minddata/dataset/kernels/ir/validators.h | 13 +++- .../ccsrc/minddata/dataset/util/validators.h | 8 +-- .../dataset/test_datasets_imagefolder.py | 2 +- 51 files changed, 365 insertions(+), 457 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc index e059956b072..17e50bc8db0 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc @@ -220,7 +220,7 @@ Status DataSchema::ColumnOrderLoad(nlohmann::json column_tree, const std::vector // Find the column in the json document auto column_info = column_tree.find(common::SafeCStr(curr_col_name)); if (column_info == column_tree.end()) { - RETURN_STATUS_UNEXPECTED("Invalid data, failed to find column name: " + curr_col_name + " in given json file."); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to find column: " + curr_col_name + " in JSON schema file."); } // At this point, columnInfo.value() is the subtree in the json document that contains // all of the data for a given column. This data will formulate our schema column. @@ -238,7 +238,8 @@ Status DataSchema::ColumnOrderLoad(nlohmann::json column_tree, const std::vector for (const auto &it_child : column_tree.items()) { auto name = it_child.value().find("name"); if (name == it_child.value().end()) { - RETURN_STATUS_UNEXPECTED("Invalid data, \"name\" field is missing for column: " + curr_col_name); + RETURN_STATUS_UNEXPECTED("Invalid data, \"name\" field is missing for column: " + curr_col_name + + " in JSON schema file."); } if (name.value() == curr_col_name) { index = i; @@ -247,7 +248,7 @@ Status DataSchema::ColumnOrderLoad(nlohmann::json column_tree, const std::vector i++; } if (index == -1) { - RETURN_STATUS_UNEXPECTED("Invalid data, failed to find column name: " + curr_col_name + " in given json file."); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to find column: " + curr_col_name + " in JSON schema file."); } nlohmann::json column_child_tree = column_tree[index]; RETURN_IF_NOT_OK(ColumnLoad(column_child_tree, curr_col_name)); @@ -301,14 +302,12 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin } if (!name.empty()) { if (!col_name.empty() && col_name != name) { - std::string err_msg = - "Invalid data, json schema file for column " + col_name + " has column name that does not match columnsToLoad"; + std::string err_msg = "Invalid data, failed to find column: " + col_name + " in JSON schema file."; RETURN_STATUS_UNEXPECTED(err_msg); } } else { if (col_name.empty()) { - std::string err_msg = - "Invalid data, json schema file for column " + col_name + " has invalid or missing column name."; + std::string err_msg = "Invalid data, \"name\" field is missing for column " + col_name + " in JSON schema file."; RETURN_STATUS_UNEXPECTED(err_msg); } else { name = col_name; @@ -317,12 +316,12 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin // data type is mandatory field if (type_str.empty()) return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, - "Invalid data, json schema file for column " + col_name + " has invalid or missing column type."); + "Invalid data, \"type\" field is missing for column " + col_name + " in JSON schema file."); // rank number is mandatory field if (rank_value <= -1) return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, - "Invalid data, json schema file for column " + col_name + " must define a positive rank value."); + "Invalid data, \"rank\" field of column " + col_name + " must have value >= 0 in JSON schema file."); // Create the column descriptor for this column from the data we pulled from the json file TensorShape col_shape = TensorShape(tmp_shape); @@ -349,12 +348,13 @@ Status DataSchema::LoadSchemaFile(const std::string &schema_file_path, num_rows_ = 0; } catch (nlohmann::json::exception &e) { in.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, unable to parse \"numRows\" from schema file: " + schema_file_path); + RETURN_STATUS_UNEXPECTED("Invalid data, unable to parse \"numRows\" field from JSON schema file: " + + schema_file_path + ", check syntax with JSON tool."); } nlohmann::json column_tree = js.at("columns"); if (column_tree.empty()) { in.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, \"columns\" field is missing in schema file: " + schema_file_path); + RETURN_STATUS_UNEXPECTED("Invalid data, \"columns\" field is missing in JSON schema file: " + schema_file_path); } if (columns_to_load.empty()) { // Parse the json tree and load the schema's columns in whatever order that the json @@ -375,7 +375,8 @@ Status DataSchema::LoadSchemaFile(const std::string &schema_file_path, in.close(); } catch (const std::exception &err) { // Catch any exception and convert to Status return code - RETURN_STATUS_UNEXPECTED("Invalid file, failed to load and parse schema file: " + schema_file_path); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to load and parse JSON schema file: " + schema_file_path + + ", check syntax with JSON tools."); } return Status::OK(); } @@ -389,7 +390,7 @@ Status DataSchema::LoadSchemaString(const std::string &schema_json_string, num_rows_ = js.value("numRows", 0); nlohmann::json column_tree = js.at("columns"); if (column_tree.empty()) { - RETURN_STATUS_UNEXPECTED("Invalid data, \"columns\" field is missing in schema string."); + RETURN_STATUS_UNEXPECTED("Invalid data, \"columns\" field is missing in JSON schema string."); } if (columns_to_load.empty()) { // Parse the json tree and load the schema's columns in whatever order that the json @@ -404,7 +405,7 @@ Status DataSchema::LoadSchemaString(const std::string &schema_json_string, } } catch (const std::exception &err) { // Catch any exception and convert to Status return code - RETURN_STATUS_UNEXPECTED("Invalid data, failed to load and parse schema string."); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to load and parse JSON schema string, check syntax with JSON tool."); } return Status::OK(); } @@ -446,7 +447,7 @@ Status DataSchema::PreLoadExceptionCheck(const nlohmann::json &js) { // Check if columns node exists. It is required for building schema from file. if (js.find("columns") == js.end()) return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, - "Invalid data, \"columns\" node is required in the schema json file."); + "Invalid data, \"columns\" field is missing in the JSON schema file."); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc index 220616dc2bb..8e3bbd68441 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/batch_node.cc @@ -72,18 +72,18 @@ void BatchNode::Print(std::ostream &out) const { Status BatchNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); if (batch_size_ <= 0) { - std::string err_msg = "BatchNode: batch_size should be positive integer, but got: " + std::to_string(batch_size_); + std::string err_msg = "Batch: 'batch_size' should be positive integer, but got: " + std::to_string(batch_size_); LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } #ifdef ENABLE_PYTHON if (batch_map_func_ && pad_) { - std::string err_msg = "BatchNode: per_batch_map and pad should not be used at the same time."; + std::string err_msg = "Batch: 'per_batch_map' and 'pad_info' should not be used at the same time."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } if (batch_map_func_ && in_col_names_.empty()) { - std::string err_msg = "BatchNode: in_col_names cannot be empty when per_batch_map is used."; + std::string err_msg = "Batch: 'in_col_names' cannot be empty when per_batch_map is used."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } #endif @@ -169,10 +169,9 @@ Status BatchNode::to_json(nlohmann::json *out_json) { Status BatchNode::from_json(nlohmann::json json_obj, std::shared_ptr ds, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("batch_size") != json_obj.end(), "Failed to find batch_size"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("drop_remainder") != json_obj.end(), "Failed to find drop_remainder"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kBatchNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "batch_size", kBatchNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "drop_remainder", kBatchNode)); int32_t batch_size = json_obj["batch_size"]; bool drop_remainder = json_obj["drop_remainder"]; *result = std::make_shared(ds, batch_size, drop_remainder); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.cc index 2bce17211ec..2e5ec849c01 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/concat_node.cc @@ -56,19 +56,19 @@ Status ConcatNode::ValidateParams() { constexpr size_t kMinChildrenSize = 2; RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); if (children_.size() < kMinChildrenSize) { - std::string err_msg = "ConcatNode: concatenated datasets are not specified."; + std::string err_msg = "Concat: concatenated datasets are not specified."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } if (find(children_.begin(), children_.end(), nullptr) != children_.end()) { - std::string err_msg = "ConcatNode: concatenated datasets should not be null."; + std::string err_msg = "Concat: concatenated datasets should not be null."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } // Either one of children_flag_and_nums_ or children_start_end_index_ should be non-empty. if ((children_flag_and_nums_.empty() && !children_start_end_index_.empty()) || (!children_flag_and_nums_.empty() && children_start_end_index_.empty())) { - std::string err_msg = "ConcatNode: children_flag_and_nums and children_start_end_index should be used together"; + std::string err_msg = "Concat: 'children_flag_and_nums' and 'children_start_end_index' should be used together"; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } return Status::OK(); @@ -162,11 +162,9 @@ Status ConcatNode::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status ConcatNode::from_json(nlohmann::json json_obj, std::vector> datasets, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("children_flag_and_nums") != json_obj.end(), - "Failed to find children_flag_and_nums"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("children_start_end_index") != json_obj.end(), - "Failed to find children_start_end_index"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "sampler", kConcatNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "children_flag_and_nums", kConcatNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "children_start_end_index", kConcatNode)); std::shared_ptr sampler; RETURN_IF_NOT_OK(Serdes::ConstructSampler(json_obj["sampler"], &sampler)); std::vector> children_flag_and_nums = json_obj["children_flag_and_nums"]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc index 7f689c689cb..21658d59136 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc @@ -94,7 +94,8 @@ Status ValidateDatasetDirParam(const std::string &dataset_name, std::string data } // Helper function to validate dataset files parameter -Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vector &dataset_files) { +Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vector &dataset_files, + const std::string &file_name) { if (dataset_files.empty()) { std::string err_msg = dataset_name + ": dataset_files is not specified."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); @@ -103,11 +104,11 @@ Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vec for (auto f : dataset_files) { Path dataset_file(f); if (!dataset_file.Exists()) { - std::string err_msg = dataset_name + ": dataset file: [" + f + "] is invalid or does not exist."; + std::string err_msg = dataset_name + ": " + file_name + ": [" + f + "] is invalid or does not exist."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } if (access(dataset_file.ToString().c_str(), R_OK) == -1) { - std::string err_msg = dataset_name + ": No access to specified dataset file: " + f; + std::string err_msg = dataset_name + ": No access to specified " + file_name + ": " + f; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } } @@ -158,12 +159,12 @@ Status ValidateStringValue(const std::string &dataset_name, const std::string &s Status ValidateDatasetColumnParam(const std::string &dataset_name, const std::string &column_param, const std::vector &columns) { if (columns.empty()) { - std::string err_msg = dataset_name + ":" + column_param + " should not be empty string"; + std::string err_msg = dataset_name + ": '" + column_param + "' should not be empty string"; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } for (uint32_t i = 0; i < columns.size(); ++i) { if (columns[i].empty()) { - std::string err_msg = dataset_name + ":" + column_param + "[" + std::to_string(i) + "] must not be empty"; + std::string err_msg = dataset_name + ": '" + column_param + "' [" + std::to_string(i) + "] must not be empty"; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } } @@ -171,8 +172,8 @@ Status ValidateDatasetColumnParam(const std::string &dataset_name, const std::st for (auto &column_name : columns) { auto result = columns_set.insert(column_name); if (result.second == false) { - std::string err_msg = dataset_name + ":" + column_param + - ": Invalid parameter, duplicate column names are not allowed: " + *result.first; + std::string err_msg = dataset_name + ": '" + column_param + + "' : Invalid parameter, duplicate column names are not allowed: " + *result.first; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h index 0494427a705..473e6ab518e 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h @@ -38,8 +38,10 @@ #include "minddata/dataset/engine/ir/cache/dataset_cache.h" #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h" #include "minddata/dataset/include/dataset/datasets.h" +#include "minddata/dataset/kernels/ir/validators.h" #include "minddata/dataset/util/path.h" #include "minddata/dataset/util/status.h" +#include "minddata/dataset/util/validators.h" namespace mindspore { namespace dataset { @@ -110,7 +112,8 @@ Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, in int32_t connector_que_size, std::shared_ptr *shuffle_op); // Helper function to validate dataset files parameter -Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vector &dataset_files); +Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vector &dataset_files, + const std::string &file_name = "dataset file"); // Helper function to validate dataset num_shards and shard_id parameters Status ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_shards, int32_t shard_id); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc index 97c1ddf9895..0052f33409a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc @@ -103,27 +103,27 @@ Status MapNode::Build(std::vector> *const node_ops) { Status MapNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); if (operations_.empty()) { - std::string err_msg = "MapNode: No operation is specified."; + std::string err_msg = "Map: No 'operations' are specified."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } for (const auto &op : operations_) { if (op == nullptr) { - std::string err_msg = "MapNode: operation must not be nullptr."; + std::string err_msg = "Map: 'operations' must not be nullptr."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } else { RETURN_IF_NOT_OK(op->ValidateParams()); } } if (!input_columns_.empty()) { - RETURN_IF_NOT_OK(ValidateDatasetColumnParam("MapNode", "input_columns", input_columns_)); + RETURN_IF_NOT_OK(ValidateDatasetColumnParam("Map", "input_columns", input_columns_)); } if (!output_columns_.empty()) { - RETURN_IF_NOT_OK(ValidateDatasetColumnParam("MapNode", "output_columns", output_columns_)); + RETURN_IF_NOT_OK(ValidateDatasetColumnParam("Map", "output_columns", output_columns_)); } if (!project_columns_.empty()) { - RETURN_IF_NOT_OK(ValidateDatasetColumnParam("MapNode", "project_columns", project_columns_)); + RETURN_IF_NOT_OK(ValidateDatasetColumnParam("Map", "project_columns", project_columns_)); } return Status::OK(); @@ -191,12 +191,11 @@ Status MapNode::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status MapNode::from_json(nlohmann::json json_obj, std::shared_ptr ds, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Failed to find input_columns"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Failed to find output_columns"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("project_columns") != json_obj.end(), "Failed to find project_columns"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("operations") != json_obj.end(), "Failed to find operations"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kMapNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "input_columns", kMapNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "output_columns", kMapNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "project_columns", kMapNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "operations", kMapNode)); std::vector input_columns = json_obj["input_columns"]; std::vector output_columns = json_obj["output_columns"]; std::vector project_columns = json_obj["project_columns"]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc index 9ac043567df..228a90aee49 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/project_node.cc @@ -42,11 +42,11 @@ void ProjectNode::Print(std::ostream &out) const { out << (Name() + "(column: " Status ProjectNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); if (columns_.empty()) { - std::string err_msg = "ProjectNode: No columns are specified."; + std::string err_msg = "Project: No 'columns' are specified."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } - RETURN_IF_NOT_OK(ValidateDatasetColumnParam("ProjectNode", "columns", columns_)); + RETURN_IF_NOT_OK(ValidateDatasetColumnParam("Project", "columns", columns_)); return Status::OK(); } @@ -68,7 +68,7 @@ Status ProjectNode::to_json(nlohmann::json *out_json) { Status ProjectNode::from_json(nlohmann::json json_obj, std::shared_ptr ds, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns") != json_obj.end(), "Failed to find columns"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "columns", kProjectNode)); std::vector columns = json_obj["columns"]; *result = std::make_shared(ds, columns); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc index d755839206e..736bf6a932a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/rename_node.cc @@ -45,13 +45,13 @@ void RenameNode::Print(std::ostream &out) const { Status RenameNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); if (input_columns_.size() != output_columns_.size()) { - std::string err_msg = "RenameNode: input and output columns must be the same size"; + std::string err_msg = "Rename: 'input columns' and 'output columns' must have the same size."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } - RETURN_IF_NOT_OK(ValidateDatasetColumnParam("RenameNode", "input_columns", input_columns_)); + RETURN_IF_NOT_OK(ValidateDatasetColumnParam("Rename", "input_columns", input_columns_)); - RETURN_IF_NOT_OK(ValidateDatasetColumnParam("RenameNode", "output_columns", output_columns_)); + RETURN_IF_NOT_OK(ValidateDatasetColumnParam("Rename", "output_columns", output_columns_)); return Status::OK(); } @@ -74,8 +74,8 @@ Status RenameNode::to_json(nlohmann::json *out_json) { Status RenameNode::from_json(nlohmann::json json_obj, std::shared_ptr ds, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Failed to find input_columns"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Failed to find output_columns"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "input_columns", kRenameNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "output_columns", kRenameNode)); std::vector input_columns = json_obj["input_columns"]; std::vector output_columns = json_obj["output_columns"]; *result = std::make_shared(ds, input_columns, output_columns); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc index a71cae4640a..d2214e902f2 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/repeat_node.cc @@ -60,8 +60,8 @@ Status RepeatNode::Build(std::vector> *const node_ops Status RepeatNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); if (repeat_count_ <= 0 && repeat_count_ != -1) { - std::string err_msg = "RepeatNode: repeat_count should be either -1 or positive integer, repeat_count_: " + - std::to_string(repeat_count_); + std::string err_msg = + "Repeat: 'repeat_count' should be either -1 or positive integer, but got: " + std::to_string(repeat_count_); LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } @@ -106,7 +106,7 @@ Status RepeatNode::to_json(nlohmann::json *out_json) { Status RepeatNode::from_json(nlohmann::json json_obj, std::shared_ptr ds, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "count", kRepeatNode)); int32_t count = json_obj["count"]; *result = std::make_shared(ds, count); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc index 22429356e8f..08a30541568 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc @@ -54,11 +54,7 @@ Status ShuffleNode::Build(std::vector> *const node_op // Function to validate the parameters for ShuffleNode Status ShuffleNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - if (shuffle_size_ <= 1) { - std::string err_msg = "ShuffleNode: Invalid input, shuffle_size: " + std::to_string(shuffle_size_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - + RETURN_IF_NOT_OK(ValidateScalar("Shuffle", "shuffle_size", shuffle_size_, {1}, true)); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc index e44058f9a09..433bf98fc34 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/skip_node.cc @@ -50,7 +50,7 @@ Status SkipNode::Build(std::vector> *const node_ops) Status SkipNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); if (skip_count_ <= -1) { - std::string err_msg = "SkipNode: skip_count should not be negative, skip_count: " + std::to_string(skip_count_); + std::string err_msg = "Skip: 'skip_count' should not be negative, but got: " + std::to_string(skip_count_); LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } @@ -95,7 +95,7 @@ Status SkipNode::to_json(nlohmann::json *out_json) { Status SkipNode::from_json(nlohmann::json json_obj, std::shared_ptr ds, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "count", kSkipNode)); int32_t count = json_obj["count"]; *result = std::make_shared(ds, count); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/ag_news_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/ag_news_node.cc index 004d772fd2e..27261dd12a5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/ag_news_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/ag_news_node.cc @@ -55,20 +55,13 @@ void AGNewsNode::Print(std::ostream &out) const { Status AGNewsNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("AGNewsNode", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateStringValue("AGNewsNode", usage_, {"train", "test", "all"})); - if (num_samples_ < 0) { - std::string err_msg = "AGNewsNode: Invalid number of samples: " + std::to_string(num_samples_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - if (num_shards_ < 1) { - std::string err_msg = "AGNewsNode: Invalid number of shards: " + std::to_string(num_shards_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - RETURN_IF_NOT_OK(ValidateDatasetShardParams("AGNewsNode", num_shards_, shard_id_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("AGNewsDataset", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateStringValue("AGNewsDataset", usage_, {"train", "test", "all"})); + RETURN_IF_NOT_OK(ValidateScalar("AGNewsDataset", "num_samples", num_samples_, {0}, false)); + RETURN_IF_NOT_OK(ValidateDatasetShardParams("AGNewsDataset", num_shards_, shard_id_)); if (!column_names_.empty()) { - RETURN_IF_NOT_OK(ValidateDatasetColumnParam("AGNewsNode", "column_names", column_names_)); + RETURN_IF_NOT_OK(ValidateDatasetColumnParam("AGNewsDataset", "column_names", column_names_)); } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc index 9b8fc05ecf3..de82d4b7aaa 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc @@ -54,14 +54,14 @@ void AlbumNode::Print(std::ostream &out) const { Status AlbumNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("AlbumNode", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("AlbumDataset", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateDatasetFilesParam("AlbumNode", {schema_path_})); + RETURN_IF_NOT_OK(ValidateDatasetFilesParam("AlbumDataset", {schema_path_})); - RETURN_IF_NOT_OK(ValidateDatasetSampler("AlbumNode", sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("AlbumDataset", sampler_)); if (!column_names_.empty()) { - RETURN_IF_NOT_OK(ValidateDatasetColumnParam("AlbumNode", "column_names", column_names_)); + RETURN_IF_NOT_OK(ValidateDatasetColumnParam("AlbumDataset", "column_names", column_names_)); } return Status::OK(); @@ -148,13 +148,12 @@ Status AlbumNode::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status AlbumNode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("data_schema") != json_obj.end(), "Failed to find data_schema"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("column_names") != json_obj.end(), "Failed to find column_names"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kAlbumNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_dir", kAlbumNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "data_schema", kAlbumNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "column_names", kAlbumNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "decode", kAlbumNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "sampler", kAlbumNode)); std::string dataset_dir = json_obj["dataset_dir"]; std::string data_schema = json_obj["data_schema"]; std::vector column_names = json_obj["column_names"]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc index 122db8a84bf..664b608ed4e 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc @@ -55,11 +55,11 @@ void CelebANode::Print(std::ostream &out) const { Status CelebANode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("CelebANode", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("CelebADataset", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateDatasetSampler("CelebANode", sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("CelebADataset", sampler_)); - RETURN_IF_NOT_OK(ValidateStringValue("CelebANode", usage_, {"all", "train", "valid", "test"})); + RETURN_IF_NOT_OK(ValidateStringValue("CelebADataset", usage_, {"all", "train", "valid", "test"})); return Status::OK(); } @@ -99,8 +99,9 @@ Status CelebANode::GetDatasetSize(const std::shared_ptr &size auto realpath = FileUtils::GetRealPath((folder_path / "list_attr_celeba.txt").ToString().data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Get real path failed, path=" << (folder_path / "list_attr_celeba.txt").ToString(); - RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + (folder_path / "list_attr_celeba.txt").ToString()); + MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << (folder_path / "list_attr_celeba.txt").ToString(); + RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + + (folder_path / "list_attr_celeba.txt").ToString()); } std::ifstream attr_file(realpath.value()); @@ -138,8 +139,10 @@ Status CelebANode::GetDatasetSize(const std::shared_ptr &size if (!partition_file.is_open()) { auto realpath_eval = FileUtils::GetRealPath((folder_path / "list_eval_partition.txt").ToString().data()); if (!realpath_eval.has_value()) { - MS_LOG(ERROR) << "Get real path failed, path=" << (folder_path / "list_eval_partition.txt").ToString(); - RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + (folder_path / "list_eval_partition.txt").ToString()); + MS_LOG(ERROR) << "Invalid file, get real path failed, path=" + << (folder_path / "list_eval_partition.txt").ToString(); + RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + + (folder_path / "list_eval_partition.txt").ToString()); } partition_file.open(realpath_eval.value()); @@ -188,13 +191,12 @@ Status CelebANode::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status CelebANode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Failed to find extension"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kCelebANode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_dir", kCelebANode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "usage", kCelebANode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "sampler", kCelebANode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "decode", kCelebANode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "extensions", kCelebANode)); std::string dataset_dir = json_obj["dataset_dir"]; std::string usage = json_obj["usage"]; std::shared_ptr sampler; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc index 91148fcb1a6..048884bec64 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc @@ -47,11 +47,11 @@ void Cifar100Node::Print(std::ostream &out) const { Status Cifar100Node::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("Cifar100Node", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("Cifar100Dataset", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateDatasetSampler("Cifar100Node", sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("Cifar100Dataset", sampler_)); - RETURN_IF_NOT_OK(ValidateStringValue("Cifar100Node", usage_, {"train", "test", "all"})); + RETURN_IF_NOT_OK(ValidateStringValue("Cifar100Dataset", usage_, {"train", "test", "all"})); return Status::OK(); } @@ -123,11 +123,10 @@ Status Cifar100Node::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status Cifar100Node::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kCifar100Node)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_dir", kCifar100Node)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "usage", kCifar100Node)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "sampler", kCifar100Node)); std::string dataset_dir = json_obj["dataset_dir"]; std::string usage = json_obj["usage"]; std::shared_ptr sampler; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc index da66b62597d..2da1b871fb2 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc @@ -47,11 +47,11 @@ void Cifar10Node::Print(std::ostream &out) const { Status Cifar10Node::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("Cifar10Node", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("Cifar10Dataset", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateDatasetSampler("Cifar10Node", sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("Cifar10Dataset", sampler_)); - RETURN_IF_NOT_OK(ValidateStringValue("Cifar10Node", usage_, {"train", "test", "all"})); + RETURN_IF_NOT_OK(ValidateStringValue("Cifar10Dataset", usage_, {"train", "test", "all"})); return Status::OK(); } @@ -124,11 +124,10 @@ Status Cifar10Node::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status Cifar10Node::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kCifar10Node)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_dir", kCifar10Node)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "usage", kCifar10Node)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "sampler", kCifar10Node)); std::string dataset_dir = json_obj["dataset_dir"]; std::string usage = json_obj["usage"]; std::shared_ptr sampler; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc index 4cfb7f24017..ea5db97c65c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc @@ -50,23 +50,13 @@ void CLUENode::Print(std::ostream &out) const { Status CLUENode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetFilesParam("CLUENode", dataset_files_)); - - RETURN_IF_NOT_OK(ValidateStringValue("CLUENode", task_, {"AFQMC", "TNEWS", "IFLYTEK", "CMNLI", "WSC", "CSL"})); - - RETURN_IF_NOT_OK(ValidateStringValue("CLUENode", usage_, {"train", "test", "eval"})); - - if (shuffle_ != ShuffleMode::kFalse && shuffle_ != ShuffleMode::kFiles && shuffle_ != ShuffleMode::kGlobal) { - std::string err_msg = "CLUENode: Invalid ShuffleMode, check input value of enum."; - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - - if (num_samples_ < 0) { - std::string err_msg = "CLUENode: Invalid number of samples: " + std::to_string(num_samples_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - - RETURN_IF_NOT_OK(ValidateDatasetShardParams("CLUENode", num_shards_, shard_id_)); + RETURN_IF_NOT_OK(ValidateDatasetFilesParam("CLUEDataset", dataset_files_)); + RETURN_IF_NOT_OK(ValidateStringValue("CLUEDataset", task_, {"AFQMC", "TNEWS", "IFLYTEK", "CMNLI", "WSC", "CSL"})); + RETURN_IF_NOT_OK(ValidateStringValue("CLUEDataset", usage_, {"train", "test", "eval"})); + RETURN_IF_NOT_OK(ValidateEnum("CLUEDataset", "ShuffleMode", shuffle_, + {ShuffleMode::kFalse, ShuffleMode::kFiles, ShuffleMode::kGlobal})); + RETURN_IF_NOT_OK(ValidateScalar("CLUEDataset", "num_samples", num_samples_, {0}, false)); + RETURN_IF_NOT_OK(ValidateDatasetShardParams("CLUEDataset", num_shards_, shard_id_)); return Status::OK(); } @@ -250,15 +240,14 @@ Status CLUENode::to_json(nlohmann::json *out_json) { } Status CLUENode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kCLUENode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_dir", kCLUENode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "task", kCLUENode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "usage", kCLUENode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_samples", kCLUENode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shuffle", kCLUENode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_shards", kCLUENode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shard_id", kCLUENode)); std::vector dataset_files = json_obj["dataset_dir"]; std::string task = json_obj["task"]; std::string usage = json_obj["usage"]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc index 46e6f82dce6..37fda7147f3 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc @@ -53,21 +53,10 @@ void CocoNode::Print(std::ostream &out) const { out << Name(); } Status CocoNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("CocoNode", dataset_dir_)); - - RETURN_IF_NOT_OK(ValidateDatasetSampler("CocoNode", sampler_)); - - Path annotation_file(annotation_file_); - if (!annotation_file.Exists()) { - std::string err_msg = "CocoNode: annotation_file is invalid or does not exist."; - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - if (access(annotation_file_.c_str(), R_OK) == -1) { - std::string err_msg = "CocoNode: No access to specified annotation file: " + annotation_file_; - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - - RETURN_IF_NOT_OK(ValidateStringValue("CocoNode", task_, {"Detection", "Stuff", "Panoptic", "Keypoint"})); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("CocoDataset", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("CocoDataset", sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetFilesParam("CocoDataset", {annotation_file_}, "annotation file")); + RETURN_IF_NOT_OK(ValidateStringValue("CocoDataset", task_, {"Detection", "Stuff", "Panoptic", "Keypoint"})); return Status::OK(); } @@ -164,7 +153,7 @@ Status CocoNode::GetDatasetSize(const std::shared_ptr &size_g int64_t num_rows = 0, sample_size; std::vector> ops; RETURN_IF_NOT_OK(Build(&ops)); - CHECK_FAIL_RETURN_UNEXPECTED(!ops.empty(), "Unable to build CocoOp."); + CHECK_FAIL_RETURN_UNEXPECTED(!ops.empty(), "[Internal ERROR] Unable to build CocoOp."); auto op = std::dynamic_pointer_cast(ops.front()); RETURN_IF_NOT_OK(op->CountTotalRows(&num_rows)); std::shared_ptr sampler_rt = nullptr; @@ -199,14 +188,13 @@ Status CocoNode::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status CocoNode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("annotation_file") != json_obj.end(), "Failed to find annotation_file"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extra_metadata") != json_obj.end(), "Failed to find extra_metadata"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kCocoNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_dir", kCocoNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "annotation_file", kCocoNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "task", kCocoNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "decode", kCocoNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "sampler", kCocoNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "extra_metadata", kCocoNode)); std::string dataset_dir = json_obj["dataset_dir"]; std::string annotation_file = json_obj["annotation_file"]; std::string task = json_obj["task"]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc index 3b4556c2f32..b456bd80e17 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc @@ -66,25 +66,18 @@ Status CSVNode::ValidateParams() { LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } - if (shuffle_ != ShuffleMode::kFalse && shuffle_ != ShuffleMode::kFiles && shuffle_ != ShuffleMode::kGlobal) { - std::string err_msg = "CSVNode: Invalid ShuffleMode, check input value of enum."; - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - - if (num_samples_ < 0) { - std::string err_msg = "CSVNode: Invalid number of samples: " + std::to_string(num_samples_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - - RETURN_IF_NOT_OK(ValidateDatasetShardParams("CSVNode", num_shards_, shard_id_)); + RETURN_IF_NOT_OK(ValidateEnum("CSVDataset", "ShuffleMode", shuffle_, + {ShuffleMode::kFalse, ShuffleMode::kFiles, ShuffleMode::kGlobal})); + RETURN_IF_NOT_OK(ValidateScalar("CSVDataset", "num_samples", num_samples_, {0}, false)); + RETURN_IF_NOT_OK(ValidateDatasetShardParams("CSVDataset", num_shards_, shard_id_)); if (find(column_defaults_.begin(), column_defaults_.end(), nullptr) != column_defaults_.end()) { - std::string err_msg = "CSVNode: column_default should not be null."; + std::string err_msg = "CSVDataset: column_default should not be null."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } if (!column_names_.empty()) { - RETURN_IF_NOT_OK(ValidateDatasetColumnParam("CSVNode", "column_names", column_names_)); + RETURN_IF_NOT_OK(ValidateDatasetColumnParam("CSVDataset", "column_names", column_names_)); } return Status::OK(); @@ -187,15 +180,14 @@ Status CSVNode::to_json(nlohmann::json *out_json) { } Status CSVNode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("field_delim") != json_obj.end(), "Failed to find field_delim"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("column_names") != json_obj.end(), "Failed to find column_names"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kCSVNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_files", kCSVNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "field_delim", kCSVNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "column_names", kCSVNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_samples", kCSVNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shuffle", kCSVNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_shards", kCSVNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shard_id", kCSVNode)); std::vector dataset_files = json_obj["dataset_files"]; std::string field_delim = json_obj["field_delim"]; std::vector> column_defaults = {}; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/dbpedia_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/dbpedia_node.cc index ddbd732ccda..fba36d2cff7 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/dbpedia_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/dbpedia_node.cc @@ -56,15 +56,10 @@ void DBpediaNode::Print(std::ostream &out) const { Status DBpediaNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("DBpediaNode", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateStringValue("DBpediaNode", usage_, {"train", "test", "all"})); - - if (num_samples_ < 0) { - std::string err_msg = "DBpediaNode: Invalid number of samples: " + std::to_string(num_samples_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - - RETURN_IF_NOT_OK(ValidateDatasetShardParams("DBpediaNode", num_shards_, shard_id_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("DBpediaDataset", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateStringValue("DBpediaDataset", usage_, {"train", "test", "all"})); + RETURN_IF_NOT_OK(ValidateScalar("DBpediaDataset", "num_samples", num_samples_, {0}, false)); + RETURN_IF_NOT_OK(ValidateDatasetShardParams("DBpediaDataset", num_shards_, shard_id_)); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/fake_image_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/fake_image_node.cc index a250b360601..6cd51581085 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/fake_image_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/fake_image_node.cc @@ -49,29 +49,21 @@ void FakeImageNode::Print(std::ostream &out) const { Status FakeImageNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetSampler("FakeImageNode", sampler_)); - if (num_images_ <= 0) { - std::string err_msg = "FakeImageNode: num_images must be greater than 0, but got: " + std::to_string(num_images_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } + RETURN_IF_NOT_OK(ValidateDatasetSampler("FakeImageDataset", sampler_)); + RETURN_IF_NOT_OK(ValidateScalar("FakeImageDataset", "num_images", num_images_, {0}, true)); if (image_size_.size() != 3) { - std::string err_msg = - "FakeImageNode: image_size expecting size 3, but got image_size.size(): " + std::to_string(image_size_.size()); + std::string err_msg = "FakeImageDataset: 'image_size' expecting size 3, but got image_size.size(): " + + std::to_string(image_size_.size()); LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } for (auto i = 0; i < 3; i++) { - if (image_size_[i] <= 0) { - std::string err_msg = "FakeImageNode: image_size[" + std::to_string(i) + - "] must be greater than 0, but got: " + std::to_string(image_size_[i]); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - } - if (num_classes_ <= 0) { - std::string err_msg = "FakeImageNode: num_classes must be greater than 0, but got: " + std::to_string(num_classes_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); + RETURN_IF_NOT_OK( + ValidateScalar("FakeImageDataset", "image_size[" + std::to_string(i) + "]", image_size_[i], {0}, true)); } + + RETURN_IF_NOT_OK(ValidateScalar("FakeImageDataset", "num_classes", num_classes_, {0}, true)); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/flickr_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/flickr_node.cc index 56ea62275eb..7ff09bcf280 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/flickr_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/flickr_node.cc @@ -59,10 +59,10 @@ void FlickrNode::Print(std::ostream &out) const { Status FlickrNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("FlickrNode", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("FlickrDataset", dataset_dir_)); if (annotation_file_.empty()) { - std::string err_msg = "FlickrNode: annotation_file is not specified."; + std::string err_msg = "FlickrDataset: 'annotation_file' is not specified."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } @@ -70,17 +70,14 @@ Status FlickrNode::ValidateParams() { for (char c : annotation_file_) { auto p = std::find(forbidden_symbols.begin(), forbidden_symbols.end(), c); if (p != forbidden_symbols.end()) { - std::string err_msg = "FlickrNode: annotation_file: [" + annotation_file_ + "] should not contain :*?\"<>|`&;\'."; + std::string err_msg = + "FlickrDataset: 'annotation_file': [" + annotation_file_ + "] should not contain :*?\"<>|`&;\'."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } } - Path annotation_file(annotation_file_); - if (!annotation_file.Exists()) { - std::string err_msg = "FlickrNode: annotation_file: [" + annotation_file_ + "] is invalid or not exist."; - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - RETURN_IF_NOT_OK(ValidateDatasetSampler("FlickrNode", sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetFilesParam("FlickrDataset", {annotation_file_}, "annotation file")); + RETURN_IF_NOT_OK(ValidateDatasetSampler("FlickrDataset", sampler_)); return Status::OK(); } @@ -149,8 +146,7 @@ Status FlickrNode::to_json(nlohmann::json *out_json) { } Status FlickrNode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kFlickrNode)); CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("annotation_file") != json_obj.end(), "Failed to find annotation_file"); CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc index 7e64c0f0d1f..cb8abe9cac2 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc @@ -57,9 +57,9 @@ void ImageFolderNode::Print(std::ostream &out) const { Status ImageFolderNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("ImageFolderNode", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("ImageFolderDataset", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateDatasetSampler("ImageFolderNode", sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("ImageFolderDataset", sampler_)); return Status::OK(); } @@ -131,14 +131,13 @@ Status ImageFolderNode::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status ImageFolderNode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("recursive") != json_obj.end(), "Failed to find recursive"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Failed to find extension"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kImageFolderNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_dir", kImageFolderNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "decode", kImageFolderNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "sampler", kImageFolderNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "recursive", kImageFolderNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "extensions", kImageFolderNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "class_indexing", kImageFolderNode)); std::string dataset_dir = json_obj["dataset_dir"]; bool decode = json_obj["decode"]; std::shared_ptr sampler; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc index 3e48da72a0f..116e6d81118 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc @@ -65,24 +65,18 @@ Status ManifestNode::ValidateParams() { for (char c : dataset_file_) { auto p = std::find(forbidden_symbols.begin(), forbidden_symbols.end(), c); if (p != forbidden_symbols.end()) { - std::string err_msg = "ManifestNode: filename should not contain :*?\"<>|`&;\'"; + std::string err_msg = + "ManifestDataset: filename of 'dataset_file' should not contain :*?\"<>|`&;\', check dataset_file: " + + dataset_file_; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } } - Path manifest_file(dataset_file_); - if (!manifest_file.Exists()) { - std::string err_msg = "ManifestNode: dataset file: [" + dataset_file_ + "] is invalid or not exist"; - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - if (access(dataset_file_.c_str(), R_OK) == -1) { - std::string err_msg = "ManifestNode: No access to specified annotation file: " + dataset_file_; - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } + RETURN_IF_NOT_OK(ValidateDatasetFilesParam("ManifestDataset", {dataset_file_}, "annotation file")); - RETURN_IF_NOT_OK(ValidateDatasetSampler("ManifestNode", sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("ManifestDataset", sampler_)); - RETURN_IF_NOT_OK(ValidateStringValue("ManifestNode", usage_, {"train", "eval", "inference"})); + RETURN_IF_NOT_OK(ValidateStringValue("ManifestDataset", usage_, {"train", "eval", "inference"})); return Status::OK(); } @@ -125,7 +119,7 @@ Status ManifestNode::GetDatasetSize(const std::shared_ptr &si int64_t num_rows, sample_size; std::vector> ops; RETURN_IF_NOT_OK(Build(&ops)); - CHECK_FAIL_RETURN_UNEXPECTED(!ops.empty(), "Unable to build op."); + CHECK_FAIL_RETURN_UNEXPECTED(!ops.empty(), "[Internal ERROR] Unable to build op."); auto op = std::dynamic_pointer_cast(ops.front()); RETURN_IF_NOT_OK(op->CountTotalRows(&num_rows)); std::shared_ptr sampler_rt = nullptr; @@ -160,13 +154,12 @@ Status ManifestNode::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status ManifestNode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_file") != json_obj.end(), "Failed to find dataset_file"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kManifestNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_file", kManifestNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "usage", kManifestNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "sampler", kManifestNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "class_indexing", kManifestNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "decode", kManifestNode)); std::string dataset_file = json_obj["dataset_file"]; std::string usage = json_obj["usage"]; std::shared_ptr sampler; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.cc index 70f98bfaf03..fc941c35df4 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.cc @@ -80,40 +80,39 @@ Status MindDataNode::ValidateParams() { constexpr size_t max_len = 4096; if (!search_for_pattern_ && dataset_files_.size() > max_len) { std::string err_msg = - "MindDataNode: length of dataset_file must be less than or equal to 4096, dataset_file length: " + + "MindDataset: length of dataset_file must be less than or equal to 4096, dataset_file length: " + std::to_string(dataset_file_.size()); LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } - if (shuffle_mode_ != ShuffleMode::kFalse && shuffle_mode_ != ShuffleMode::kFiles && - shuffle_mode_ != ShuffleMode::kGlobal && shuffle_mode_ != ShuffleMode::kInfile) { - std::string err_msg = "MindDataNode: Invalid ShuffleMode, check input value of enum."; - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } + RETURN_IF_NOT_OK( + ValidateEnum("MindDataset", "ShuffleMode", shuffle_mode_, + {ShuffleMode::kFalse, ShuffleMode::kFiles, ShuffleMode::kGlobal, ShuffleMode::kInfile})); std::vector dataset_file_vec = search_for_pattern_ ? std::vector{dataset_file_} : dataset_files_; - RETURN_IF_NOT_OK(ValidateDatasetFilesParam("MindDataNode", dataset_file_vec)); + RETURN_IF_NOT_OK(ValidateDatasetFilesParam("MindDataset", dataset_file_vec)); - RETURN_IF_NOT_OK(ValidateDatasetSampler("MindDataNode", input_sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("MindDataset", input_sampler_)); if (!columns_list_.empty()) { - RETURN_IF_NOT_OK(ValidateDatasetColumnParam("MindDataNode", "columns_list", columns_list_)); + RETURN_IF_NOT_OK(ValidateDatasetColumnParam("MindDataset", "columns_list", columns_list_)); } if (padded_sample_ != nullptr) { if (num_padded_ < 0 || num_padded_ > INT_MAX) { std::string err_msg = - "MindDataNode: num_padded must to be between 0 and INT32_MAX, but got: " + std::to_string(num_padded_); + "MindDataset: 'num_padded' must to be between 0 and INT32_MAX, but got: " + std::to_string(num_padded_); LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } if (columns_list_.empty()) { - std::string err_msg = "MindDataNode: padded_sample is specified and requires columns_list as well"; + std::string err_msg = "MindDataset: 'padded_sample' is specified and requires 'columns_list' as well"; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } for (std::string &column : columns_list_) { if (padded_sample_.find(column) == padded_sample_.end()) { - std::string err_msg = "MindDataNode: " + column + " in columns_list does not match any column in padded_sample"; + std::string err_msg = + "MindDataset: " + column + " in 'columns_list' does not match any column in 'padded_sample'"; MS_LOG(ERROR) << err_msg << ", padded_sample: " << padded_sample_; return Status(StatusCode::kMDSyntaxError, err_msg); } @@ -121,7 +120,7 @@ Status MindDataNode::ValidateParams() { } if (num_padded_ > 0) { if (padded_sample_ == nullptr) { - std::string err_msg = "MindDataNode: num_padded is specified but padded_sample is not"; + std::string err_msg = "MindDataset: num_padded is specified but padded_sample is not"; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } } @@ -136,7 +135,7 @@ Status MindDataNode::BuildMindDatasetSamplerChain(const std::shared_ptr op = sampler->BuildForMindDataset(); if (op == nullptr) { std::string err_msg = - "MindDataNode: Unsupported sampler is supplied for MindDataset. Supported sampler list: " + "MindDataset: Unsupported sampler is supplied for MindDataset. Supported sampler list: " "SubsetRandomSampler, PkSampler, RandomSampler, SequentialSampler and DistributedSampler"; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } @@ -149,7 +148,7 @@ Status MindDataNode::BuildMindDatasetSamplerChain(const std::shared_ptrGetNumSamples() != 0 && (op->GetShuffleMode() == ShuffleMode::kFiles || op->GetShuffleMode() == ShuffleMode::kInfile)) { std::string err_msg = - "MindDataNode: Shuffle.FILES or Shuffle.INFILE and num_samples cannot be specified at the same time."; + "MindDataset: Shuffle.kFiles or Shuffle.kInfile and num_samples cannot be specified at the same time."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc index 88c91a27ec2..b29535e2a1e 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc @@ -44,11 +44,11 @@ void MnistNode::Print(std::ostream &out) const { out << Name(); } Status MnistNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("MnistNode", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("MnistDataset", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateDatasetSampler("MnistNode", sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("MnistDataset", sampler_)); - RETURN_IF_NOT_OK(ValidateStringValue("MnistNode", usage_, {"train", "test", "all"})); + RETURN_IF_NOT_OK(ValidateStringValue("MnistDataset", usage_, {"train", "test", "all"})); return Status::OK(); } @@ -117,11 +117,10 @@ Status MnistNode::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status MnistNode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kMnistNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_dir", kMnistNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "usage", kMnistNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "sampler", kMnistNode)); std::string dataset_dir = json_obj["dataset_dir"]; std::string usage = json_obj["usage"]; std::shared_ptr sampler; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/qmnist_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/qmnist_node.cc index 7f0bfa8a741..c620ad86860 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/qmnist_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/qmnist_node.cc @@ -51,9 +51,10 @@ void QMnistNode::Print(std::ostream &out) const { Status QMnistNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("QMnistNode", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateDatasetSampler("QMnistNode", sampler_)); - RETURN_IF_NOT_OK(ValidateStringValue("QMnistNode", usage_, {"train", "test", "test10k", "test50k", "nist", "all"})); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("QMnistDataset", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("QMnistDataset", sampler_)); + RETURN_IF_NOT_OK( + ValidateStringValue("QMnistDataset", usage_, {"train", "test", "test10k", "test50k", "nist", "all"})); return Status::OK(); } @@ -128,12 +129,11 @@ Status QMnistNode::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status QMnistNode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("compat") != json_obj.end(), "Failed to find compat"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kQMnistNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_dir", kQMnistNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "usage", kQMnistNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "compat", kQMnistNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "sampler", kQMnistNode)); std::string dataset_dir = json_obj["dataset_dir"]; std::string usage = json_obj["usage"]; bool compat = json_obj["compat"]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc index b0286b87941..b4d0235e6b0 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc @@ -41,20 +41,16 @@ void RandomNode::Print(std::ostream &out) const { // ValidateParams for RandomNode Status RandomNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - if (total_rows_ < 0) { - std::string err_msg = - "RandomNode: total_rows must be greater than or equal 0, now get " + std::to_string(total_rows_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } + RETURN_IF_NOT_OK(ValidateScalar("RandomDataset", "total_rows", total_rows_, {0}, false)); if (!columns_list_.empty()) { - RETURN_IF_NOT_OK(ValidateDatasetColumnParam("RandomNode", "columns_list", columns_list_)); + RETURN_IF_NOT_OK(ValidateDatasetColumnParam("RandomDataset", "columns_list", columns_list_)); } // allow total_rows == 0 for now because RandomOp would generate a random row when it gets a 0 CHECK_FAIL_RETURN_UNEXPECTED(total_rows_ == 0 || total_rows_ >= num_workers_, - "RandomNode needs total_rows >= num_workers, total_rows=" + std::to_string(total_rows_) + - ", num_workers=" + std::to_string(num_workers_) + "."); + "RandomDataset needs 'total_rows' >= 'num_workers', total_rows=" + + std::to_string(total_rows_) + ", num_workers=" + std::to_string(num_workers_) + "."); return Status::OK(); } @@ -72,7 +68,7 @@ Status RandomNode::Build(std::vector> *const node_ops if (!schema_path_.empty()) { schema_obj = Schema(schema_path_); if (schema_obj == nullptr) { - std::string err_msg = "RandomNode::Build : Invalid schema path"; + std::string err_msg = "RandomDataset: Invalid schema path, check schema path:" + schema_path_; MS_LOG(ERROR) << err_msg; RETURN_STATUS_UNEXPECTED(err_msg); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.cc index 41bb5b63284..b9fae5723e0 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.cc @@ -110,12 +110,12 @@ Status DistributedSamplerObj::to_json(nlohmann::json *const out_json) { #ifndef ENABLE_ANDROID Status DistributedSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("seed") != json_obj.end(), "Failed to find seed"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("offset") != json_obj.end(), "Failed to find offset"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("even_dist") != json_obj.end(), "Failed to find even_dist"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_shards", "DistributedSampler")); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shard_id", "DistributedSampler")); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shuffle", "DistributedSampler")); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "seed", "DistributedSampler")); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "offset", "DistributedSampler")); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "even_dist", "DistributedSampler")); int64_t num_shards = json_obj["num_shards"]; int64_t shard_id = json_obj["shard_id"]; bool shuffle = json_obj["shuffle"]; @@ -135,7 +135,7 @@ std::shared_ptr DistributedSamplerObj::SamplerCopy() { std::make_shared(num_shards_, shard_id_, shuffle_, num_samples_, seed_, offset_, even_dist_); for (const auto &child : children_) { Status rc = sampler->AddChildSampler(child); - if (rc.IsError()) MS_LOG(ERROR) << "Error in copying the sampler. Message: " << rc; + if (rc.IsError()) MS_LOG(ERROR) << "[Internal ERROR] Error in copying the sampler. Message: " << rc; } return sampler; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/mindrecord_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/mindrecord_sampler_ir.cc index d2babdf39a4..78eaf8e1f04 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/mindrecord_sampler_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/mindrecord_sampler_ir.cc @@ -45,7 +45,7 @@ std::shared_ptr MindRecordSamplerObj::SamplerCopy() { // Note this function can only be called after SamplerBuild is finished, and can only be called once. Otherwise this // function will return error status. Status MindRecordSamplerObj::GetShardReader(std::unique_ptr *shard_reader) { - CHECK_FAIL_RETURN_UNEXPECTED(shard_reader_ != nullptr, "Internal error. Attempt to get an empty shard reader."); + CHECK_FAIL_RETURN_UNEXPECTED(shard_reader_ != nullptr, "[Internal ERROR] Attempt to get an empty shard reader."); *shard_reader = std::move(shard_reader_); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.cc index a14ebd6b41a..c2d2dd142ba 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.cc @@ -62,8 +62,8 @@ Status PKSamplerObj::to_json(nlohmann::json *const out_json) { #ifndef ENABLE_ANDROID Status PKSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_val") != json_obj.end(), "Failed to find num_val"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_val", "PKSampler")); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shuffle", "PKSampler")); int64_t num_val = json_obj["num_val"]; bool shuffle = json_obj["shuffle"]; *sampler = std::make_shared(num_val, shuffle, num_samples); @@ -100,7 +100,7 @@ std::shared_ptr PKSamplerObj::SamplerCopy() { auto sampler = std::make_shared(num_val_, shuffle_, num_samples_); for (const auto &child : children_) { Status rc = sampler->AddChildSampler(child); - if (rc.IsError()) MS_LOG(ERROR) << "Error in copying the sampler. Message: " << rc; + if (rc.IsError()) MS_LOG(ERROR) << "[Internal ERROR] Error in copying the sampler. Message: " << rc; } return sampler; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/prebuilt_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/prebuilt_sampler_ir.cc index 125abe218ff..86d9dc023fd 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/prebuilt_sampler_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/prebuilt_sampler_ir.cc @@ -62,7 +62,9 @@ std::shared_ptr PreBuiltSamplerObj::SamplerCopy() { auto sampler = std::make_shared(sp_minddataset_); for (const auto &child : children_) { Status rc = sampler->AddChildSampler(child); - if (rc.IsError()) MS_LOG(ERROR) << "Error in copying the sampler. Message: " << rc; + if (rc.IsError()) { + MS_LOG(ERROR) << "[Internal ERROR] Error in copying the sampler. Message: " << rc; + } } return sampler; } @@ -70,7 +72,9 @@ std::shared_ptr PreBuiltSamplerObj::SamplerCopy() { auto sampler = std::make_shared(sp_); for (const auto &child : children_) { Status rc = sampler->AddChildSampler(child); - if (rc.IsError()) MS_LOG(ERROR) << "Error in copying the sampler. Message: " << rc; + if (rc.IsError()) { + MS_LOG(ERROR) << "[Internal ERROR] Error in copying the sampler. Message: " << rc; + } } return sampler; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.cc index 48b004b9b1c..495be00a011 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.cc @@ -58,9 +58,8 @@ Status RandomSamplerObj::to_json(nlohmann::json *const out_json) { #ifndef ENABLE_ANDROID Status RandomSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Failed to find replacement"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("reshuffle_each_epoch") != json_obj.end(), - "Failed to find reshuffle_each_epoch"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "replacement", "RandomSampler")); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "reshuffle_each_epoch", "RandomSampler")); bool replacement = json_obj["replacement"]; bool reshuffle_each_epoch = json_obj["reshuffle_each_epoch"]; *sampler = std::make_shared(replacement, num_samples, reshuffle_each_epoch); @@ -92,7 +91,9 @@ std::shared_ptr RandomSamplerObj::SamplerCopy() { auto sampler = std::make_shared(replacement_, num_samples_, reshuffle_each_epoch_); for (const auto &child : children_) { Status rc = sampler->AddChildSampler(child); - if (rc.IsError()) MS_LOG(ERROR) << "Error in copying the sampler. Message: " << rc; + if (rc.IsError()) { + MS_LOG(ERROR) << "[Internal ERROR] Error in copying the sampler. Message: " << rc; + } } return sampler; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h index df2c80c08f3..81ecedf3949 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h @@ -25,6 +25,7 @@ #include #include "include/api/status.h" +#include "minddata/dataset/util/validators.h" #ifndef ENABLE_ANDROID #include "minddata/mindrecord/include/shard_operator.h" #endif diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.cc index 3fe80140d48..50fbc5b2c01 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.cc @@ -64,7 +64,7 @@ Status SequentialSamplerObj::to_json(nlohmann::json *const out_json) { #ifndef ENABLE_ANDROID Status SequentialSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("start_index") != json_obj.end(), "Failed to find start_index"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "start_index", "SequentialSampler")); int64_t start_index = json_obj["start_index"]; *sampler = std::make_shared(start_index, num_samples); // Run common code in super class to add children samplers @@ -89,11 +89,14 @@ std::shared_ptr SequentialSamplerObj::BuildForMindDat return mind_sampler; } #endif + std::shared_ptr SequentialSamplerObj::SamplerCopy() { auto sampler = std::make_shared(start_index_, num_samples_); for (const auto &child : children_) { Status rc = sampler->AddChildSampler(child); - if (rc.IsError()) MS_LOG(ERROR) << "Error in copying the sampler. Message: " << rc; + if (rc.IsError()) { + MS_LOG(ERROR) << "[Internal ERROR] Error in copying the sampler. Message: " << rc; + } } return sampler; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.cc index cebe26ed615..2a7a44ead73 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.cc @@ -67,7 +67,7 @@ Status SubsetRandomSamplerObj::to_json(nlohmann::json *const out_json) { #ifndef ENABLE_ANDROID Status SubsetRandomSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("indices") != json_obj.end(), "Failed to find indices"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "indices", "SubsetRandomSampler")); std::vector indices = json_obj["indices"]; *sampler = std::make_shared(indices, num_samples); // Run common code in super class to add children samplers @@ -80,7 +80,9 @@ std::shared_ptr SubsetRandomSamplerObj::SamplerCopy() { auto sampler = std::make_shared(indices_, num_samples_); for (const auto &child : children_) { Status rc = sampler->AddChildSampler(child); - if (rc.IsError()) MS_LOG(ERROR) << "Error in copying the sampler. Message: " << rc; + if (rc.IsError()) { + MS_LOG(ERROR) << "[Internal ERROR] Error in copying the sampler. Message: " << rc; + } } return sampler; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.cc index 420babf365b..2ea95d87762 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.cc @@ -74,7 +74,7 @@ Status SubsetSamplerObj::to_json(nlohmann::json *const out_json) { #ifndef ENABLE_ANDROID Status SubsetSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("indices") != json_obj.end(), "Failed to find indices"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "indices", "SubsetSampler")); std::vector indices = json_obj["indices"]; *sampler = std::make_shared(indices, num_samples); // Run common code in super class to add children samplers @@ -87,7 +87,9 @@ std::shared_ptr SubsetSamplerObj::SamplerCopy() { auto sampler = std::make_shared(indices_, num_samples_); for (const auto &child : children_) { Status rc = sampler->AddChildSampler(child); - if (rc.IsError()) MS_LOG(ERROR) << "Error in copying the sampler. Message: " << rc; + if (rc.IsError()) { + MS_LOG(ERROR) << "[Internal ERROR] Error in copying the sampler. Message: " << rc; + } } return sampler; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.cc index c78dbb14e76..f8b0b6d3d0e 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.cc @@ -35,8 +35,10 @@ Status WeightedRandomSamplerObj::ValidateParams() { int32_t zero_elem = 0; for (int32_t i = 0; i < weights_.size(); ++i) { if (weights_[i] < 0) { - RETURN_STATUS_UNEXPECTED("WeightedRandomSampler: weights vector must not contain negative number, got: " + - std::to_string(weights_[i])); + RETURN_STATUS_UNEXPECTED( + "WeightedRandomSampler: weights vector must not contain negative numbers, got: " + "weights[" + + std::to_string(i) + "] = " + std::to_string(weights_[i])); } if (weights_[i] == 0.0) { zero_elem++; @@ -66,8 +68,8 @@ Status WeightedRandomSamplerObj::to_json(nlohmann::json *const out_json) { #ifndef ENABLE_ANDROID Status WeightedRandomSamplerObj::from_json(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("weights") != json_obj.end(), "Failed to find weights"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Failed to find replacement"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "weights", "WeightedRandomSampler")); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "replacement", "WeightedRandomSampler")); std::vector weights = json_obj["weights"]; bool replacement = json_obj["replacement"]; *sampler = std::make_shared(weights, num_samples, replacement); @@ -83,11 +85,14 @@ Status WeightedRandomSamplerObj::SamplerBuild(std::shared_ptr *sample sampler = s.IsOk() ? sampler : nullptr; return s; } + std::shared_ptr WeightedRandomSamplerObj::SamplerCopy() { auto sampler = std::make_shared(weights_, num_samples_, replacement_); for (const auto &child : children_) { Status rc = sampler->AddChildSampler(child); - if (rc.IsError()) MS_LOG(ERROR) << "Error in copying the sampler. Message: " << rc; + if (rc.IsError()) { + MS_LOG(ERROR) << "[Internal ERROR] Error in copying the sampler. Message: " << rc; + } } return sampler; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/sbu_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/sbu_node.cc index f609b6b3c1e..0f2d3b6b0c7 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/sbu_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/sbu_node.cc @@ -43,8 +43,8 @@ void SBUNode::Print(std::ostream &out) const { Status SBUNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("SBUNode", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateDatasetSampler("SBUNode", sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("SBUDataset", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("SBUDataset", sampler_)); Path root_dir(dataset_dir_); @@ -52,9 +52,9 @@ Status SBUNode::ValidateParams() { Path caption_path = root_dir / Path("SBU_captioned_photo_dataset_captions.txt"); Path image_path = root_dir / Path("sbu_images"); - RETURN_IF_NOT_OK(ValidateDatasetFilesParam("SBUNode", {url_path.ToString()})); - RETURN_IF_NOT_OK(ValidateDatasetFilesParam("SBUNode", {caption_path.ToString()})); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("SBUNode", {image_path.ToString()})); + RETURN_IF_NOT_OK(ValidateDatasetFilesParam("SBUDataset", {url_path.ToString()}, "url file")); + RETURN_IF_NOT_OK(ValidateDatasetFilesParam("SBUDataset", {caption_path.ToString()}, "caption file")); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("SBUDataset", {image_path.ToString()})); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc index 48960d3fc73..5de7ffd2e67 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc @@ -53,19 +53,11 @@ void TextFileNode::Print(std::ostream &out) const { Status TextFileNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetFilesParam("TextFileNode", dataset_files_)); - - if (shuffle_ != ShuffleMode::kFalse && shuffle_ != ShuffleMode::kFiles && shuffle_ != ShuffleMode::kGlobal) { - std::string err_msg = "TextFileNode: Invalid ShuffleMode, check input value of enum."; - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - - if (num_samples_ < 0) { - std::string err_msg = "TextFileNode: Invalid number of samples: " + std::to_string(num_samples_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - - RETURN_IF_NOT_OK(ValidateDatasetShardParams("TextFileNode", num_shards_, shard_id_)); + RETURN_IF_NOT_OK(ValidateDatasetFilesParam("TextFileDataset", dataset_files_)); + RETURN_IF_NOT_OK(ValidateEnum("TextFileDataset", "ShuffleMode", shuffle_, + {ShuffleMode::kFalse, ShuffleMode::kFiles, ShuffleMode::kGlobal})); + RETURN_IF_NOT_OK(ValidateScalar("TextFileDataset", "num_samples", num_samples_, {0}, false)); + RETURN_IF_NOT_OK(ValidateDatasetShardParams("TextFileDataset", num_shards_, shard_id_)); return Status::OK(); } @@ -155,13 +147,12 @@ Status TextFileNode::to_json(nlohmann::json *out_json) { } Status TextFileNode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kTextFileNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_files", kTextFileNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_samples", kTextFileNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shuffle", kTextFileNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_shards", kTextFileNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shard_id", kTextFileNode)); std::vector dataset_files = json_obj["dataset_files"]; int64_t num_samples = json_obj["num_samples"]; ShuffleMode shuffle = static_cast(json_obj["shuffle"]); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc index 44927ae934c..15f749f3130 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc @@ -52,45 +52,11 @@ void TFRecordNode::Print(std::ostream &out) const { // Validator for TFRecordNode Status TFRecordNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - - if (shuffle_ != ShuffleMode::kFalse && shuffle_ != ShuffleMode::kFiles && shuffle_ != ShuffleMode::kGlobal) { - std::string err_msg = "TFRecordNode: Invalid ShuffleMode, check input value of enum."; - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - - if (dataset_files_.empty()) { - std::string err_msg = "TFRecordNode: dataset_files is not specified."; - MS_LOG(ERROR) << err_msg; - return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); - } - - for (const auto &f : dataset_files_) { - auto realpath = FileUtils::GetRealPath(f.data()); - CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), - "TFRecordNode: dataset file: [" + f + "] is invalid or does not exist."); - } - - if (num_samples_ < 0) { - std::string err_msg = "TFRecordNode: Invalid number of samples: " + std::to_string(num_samples_); - MS_LOG(ERROR) << err_msg; - - return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); - } - - if (num_shards_ <= 0) { - std::string err_msg = "TFRecordNode: Invalid num_shards: " + std::to_string(num_shards_); - MS_LOG(ERROR) << err_msg; - - return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); - } - - if (shard_id_ < 0 || shard_id_ >= num_shards_) { - std::string err_msg = "TFRecordNode: Invalid input, shard_id: " + std::to_string(shard_id_) + - ", num_shards: " + std::to_string(num_shards_); - MS_LOG(ERROR) << err_msg; - - return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); - } + RETURN_IF_NOT_OK(ValidateEnum("TFRecordDataset", "ShuffleMode", shuffle_, + {ShuffleMode::kFalse, ShuffleMode::kFiles, ShuffleMode::kGlobal})); + RETURN_IF_NOT_OK(ValidateDatasetFilesParam("TFRecordDataset", dataset_files_)); + RETURN_IF_NOT_OK(ValidateScalar("TFRecordDataset", "num_samples", num_samples_, {0}, false)); + RETURN_IF_NOT_OK(ValidateDatasetShardParams("TFRecordDataset", num_shards_, shard_id_)); std::vector invalid_files(dataset_files_.size()); auto it = std::copy_if(dataset_files_.begin(), dataset_files_.end(), invalid_files.begin(), @@ -239,15 +205,14 @@ Status TFRecordNode::to_json(nlohmann::json *out_json) { } Status TFRecordNode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns_list") != json_obj.end(), "Failed to find columns_list"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_equal_rows") != json_obj.end(), "Failed to find shard_equal_rows"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_files", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "columns_list", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_samples", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shuffle", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_shards", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shard_id", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "shard_equal_rows", kTFRecordNode)); std::vector dataset_files = json_obj["dataset_files"]; std::vector columns_list = json_obj["columns_list"]; int64_t num_samples = json_obj["num_samples"]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/usps_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/usps_node.cc index 08336a451f5..599009f6922 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/usps_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/usps_node.cc @@ -56,15 +56,10 @@ void USPSNode::Print(std::ostream &out) const { Status USPSNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("USPSNode", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateStringValue("USPSNode", usage_, {"train", "test", "all"})); - - if (num_samples_ < 0) { - std::string err_msg = "USPSNode: Invalid number of samples: " + std::to_string(num_samples_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } - - RETURN_IF_NOT_OK(ValidateDatasetShardParams("USPSNode", num_shards_, shard_id_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("USPSDataset", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateStringValue("USPSDataset", usage_, {"train", "test", "all"})); + RETURN_IF_NOT_OK(ValidateScalar("USPSDataset", "num_samples", num_samples_, {0}, false)); + RETURN_IF_NOT_OK(ValidateDatasetShardParams("USPSDataset", num_shards_, shard_id_)); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc index 2fc0df8694f..c1e485c2513 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc @@ -57,30 +57,32 @@ Status VOCNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); Path dir(dataset_dir_); - RETURN_IF_NOT_OK(ValidateDatasetDirParam("VOCNode", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("VOCDataset", dataset_dir_)); - RETURN_IF_NOT_OK(ValidateDatasetSampler("VOCNode", sampler_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("VOCDataset", sampler_)); if (task_ == "Segmentation") { if (!class_index_.empty()) { - std::string err_msg = "VOCNode: class_indexing is invalid in Segmentation task."; + std::string err_msg = "VOCDataset: 'class_indexing' is invalid in Segmentation task."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } Path imagesets_file = dir / "ImageSets" / "Segmentation" / usage_ + ".txt"; if (!imagesets_file.Exists()) { - std::string err_msg = "VOCNode: Invalid usage: " + usage_ + ", file does not exist"; - MS_LOG(ERROR) << "VOCNode: Invalid usage: " << usage_ << ", file \"" << imagesets_file << "\" does not exist!"; + std::string err_msg = "VOCDataset: Invalid 'usage': " + usage_ + ", file does not exist"; + MS_LOG(ERROR) << "VOCDataset: Invalid 'usage': " << usage_ << ", file \"" << imagesets_file + << "\" does not exist!"; return Status(StatusCode::kMDSyntaxError, err_msg); } } else if (task_ == "Detection") { Path imagesets_file = dir / "ImageSets" / "Main" / usage_ + ".txt"; if (!imagesets_file.Exists()) { - std::string err_msg = "VOCNode: Invalid usage: " + usage_ + ", file does not exist"; - MS_LOG(ERROR) << "VOCNode: Invalid usage: " << usage_ << ", file \"" << imagesets_file << "\" does not exist!"; + std::string err_msg = "VOCDataset: Invalid 'usage': " + usage_ + ", file does not exist"; + MS_LOG(ERROR) << "VOCDataset: Invalid 'usage': " << usage_ << ", file \"" << imagesets_file + << "\" does not exist!"; return Status(StatusCode::kMDSyntaxError, err_msg); } } else { - std::string err_msg = "VOCNode: Invalid task: " + task_; + std::string err_msg = "VOCDataset: Invalid 'task': " + task_ + ", expected Segmentation or Detection."; LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } @@ -146,7 +148,7 @@ Status VOCNode::GetDatasetSize(const std::shared_ptr &size_ge int64_t num_rows = 0, sample_size; std::vector> ops; RETURN_IF_NOT_OK(Build(&ops)); - CHECK_FAIL_RETURN_UNEXPECTED(!ops.empty(), "Unable to build VocOp."); + CHECK_FAIL_RETURN_UNEXPECTED(!ops.empty(), "[Internal ERROR] Unable to build VocOp."); auto op = std::dynamic_pointer_cast(ops.front()); RETURN_IF_NOT_OK(op->CountTotalRows(&num_rows)); std::shared_ptr sampler_rt = nullptr; @@ -182,15 +184,14 @@ Status VOCNode::to_json(nlohmann::json *out_json) { #ifndef ENABLE_ANDROID Status VOCNode::from_json(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), - "Failed to find num_parallel_workers"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extra_metadata") != json_obj.end(), "Failed to find extra_metadata"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "num_parallel_workers", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "dataset_dir", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "task", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "usage", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "class_indexing", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "decode", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "sampler", kTFRecordNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "extra_metadata", kTFRecordNode)); std::string dataset_dir = json_obj["dataset_dir"]; std::string task = json_obj["task"]; std::string usage = json_obj["usage"]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc index c4906708cd0..8f983413672 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/take_node.cc @@ -52,7 +52,7 @@ Status TakeNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); if (take_count_ <= 0 && take_count_ != -1) { std::string err_msg = - "TakeNode: take_count should be either -1 or positive integer, take_count: " + std::to_string(take_count_); + "TakeNode: 'take_count' should be either -1 or positive integer, but got: " + std::to_string(take_count_); LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } return Status::OK(); @@ -93,7 +93,7 @@ Status TakeNode::to_json(nlohmann::json *out_json) { Status TakeNode::from_json(nlohmann::json json_obj, std::shared_ptr ds, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "count", kTakeNode)); int32_t count = json_obj["count"]; *result = std::make_shared(ds, count); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc index d5dcec86f5d..cc56a73e547 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc @@ -57,10 +57,7 @@ void TransferNode::Print(std::ostream &out) const { // Validator for TransferNode Status TransferNode::ValidateParams() { RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); - if (total_batch_ < 0) { - std::string err_msg = "TransferNode: Total batches should be >= 0, value given: " + std::to_string(total_batch_); - LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); - } + RETURN_IF_NOT_OK(ValidateScalar("Transfer", "Total batches", total_batch_, {0}, false)); return Status::OK(); } @@ -89,7 +86,7 @@ Status TransferNode::Build(std::vector> *const node_o } else if (device_type_ == kAscendDevice) { type = DeviceQueueOp::DeviceType::Ascend; } else { - std::string err_msg = "Unknown device target."; + std::string err_msg = "Unknown device target, support CPU, GPU or Ascend"; MS_LOG(ERROR) << err_msg; RETURN_STATUS_UNEXPECTED(err_msg); } @@ -128,13 +125,12 @@ Status TransferNode::to_json(nlohmann::json *out_json) { Status TransferNode::from_json(nlohmann::json json_obj, std::shared_ptr ds, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("queue_name") != json_obj.end(), "Failed to find queue_name"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("device_type") != json_obj.end(), "Failed to find device_type"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("device_id") != json_obj.end(), "Failed to find device_id"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("send_epoch_end") != json_obj.end(), "Failed to find send_epoch_end"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("total_batch") != json_obj.end(), "Failed to find total_batch"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("create_data_info_queue") != json_obj.end(), - "Failed to find create_data_info_queue"); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "queue_name", kTransferNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "device_type", kTransferNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "device_id", kTransferNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "send_epoch_end", kTransferNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "total_batch", kTransferNode)); + RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "create_data_info_queue", kTransferNode)); std::string queue_name = json_obj["queue_name"]; std::string device_type = json_obj["device_type"]; int32_t device_id = json_obj["device_id"]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/python_runtime_context.cc b/mindspore/ccsrc/minddata/dataset/engine/python_runtime_context.cc index 3d91c91fbf8..17f7c9fae1a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/python_runtime_context.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/python_runtime_context.cc @@ -23,7 +23,7 @@ Status PythonRuntimeContext::Terminate() { if (tree_consumer_ != nullptr) { return TerminateImpl(); } - MS_LOG(WARNING) << "Dataset TreeConsumer was not initialized."; + MS_LOG(INFO) << "Dataset TreeConsumer was not initialized."; return Status::OK(); } @@ -36,7 +36,9 @@ Status PythonRuntimeContext::TerminateImpl() { PythonRuntimeContext::~PythonRuntimeContext() { Status rc = PythonRuntimeContext::Terminate(); - if (rc.IsError()) MS_LOG(ERROR) << "Error while terminating the consumer. Message:" << rc; + if (rc.IsError()) { + MS_LOG(ERROR) << "Error while terminating the consumer. Message:" << rc; + } { py::gil_scoped_acquire gil_acquire; tree_consumer_.reset(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/runtime_context.cc b/mindspore/ccsrc/minddata/dataset/engine/runtime_context.cc index 94a111eb358..5d989192858 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/runtime_context.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/runtime_context.cc @@ -24,7 +24,7 @@ Status NativeRuntimeContext::Terminate() { if (tree_consumer_ != nullptr) { return TerminateImpl(); } - MS_LOG(WARNING) << "Dataset TreeConsumer was not initialized."; + MS_LOG(INFO) << "Dataset TreeConsumer was not initialized."; return Status::OK(); } @@ -35,7 +35,9 @@ Status NativeRuntimeContext::TerminateImpl() { NativeRuntimeContext::~NativeRuntimeContext() { Status rc = NativeRuntimeContext::Terminate(); - if (rc.IsError()) MS_LOG(ERROR) << "Error while terminating the consumer. Message:" << rc; + if (rc.IsError()) { + MS_LOG(ERROR) << "Error while terminating the consumer. Message:" << rc; + } } TreeConsumer *RuntimeContext::GetConsumer() { return tree_consumer_.get(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/serdes.cc b/mindspore/ccsrc/minddata/dataset/engine/serdes.cc index 25b59d543fd..51aac061cf1 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/serdes.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/serdes.cc @@ -65,8 +65,8 @@ Status Serdes::SaveJSONToFile(nlohmann::json json_string, const std::string &fil } auto realpath = FileUtils::GetRealPath(dir.value().data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Get real path failed, path=" << file_name; - RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + file_name); + MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_name; + RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_name); } std::optional whole_path = ""; @@ -78,7 +78,8 @@ Status Serdes::SaveJSONToFile(nlohmann::json json_string, const std::string &fil ChangeFileMode(whole_path.value(), S_IRUSR | S_IWUSR); } catch (const std::exception &err) { - RETURN_STATUS_UNEXPECTED("Invalid data, failed to save json string into file: " + file_name); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to save json string into file: " + file_name + + ", error message: " + err.what()); } return Status::OK(); } @@ -91,7 +92,8 @@ Status Serdes::Deserialize(const std::string &json_filepath, std::shared_ptr> json_obj; } catch (const std::exception &e) { - return Status(StatusCode::kMDSyntaxError, "Invalid file, failed to parse json file: " + json_filepath); + return Status(StatusCode::kMDSyntaxError, + "Invalid file, failed to parse json file: " + json_filepath + ", error message: " + e.what()); } RETURN_IF_NOT_OK(ConstructPipeline(json_obj, ds)); return Status::OK(); @@ -337,7 +339,7 @@ Status Serdes::ParseMindIRPreprocess(const std::string &dataset_json, const std: try { dataset_js = nlohmann::json::parse(dataset_json); } catch (const std::exception &err) { - MS_LOG(ERROR) << "Invalid json content, failed to parse JSON data."; + MS_LOG(ERROR) << "Invalid json content, failed to parse JSON data, error message: " << err.what(); RETURN_STATUS_UNEXPECTED("Invalid json content, failed to parse JSON data."); } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h index 72bbaf570e3..08d5871d097 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h @@ -50,8 +50,8 @@ Status ValidateScalar(const std::string &op_name, const std::string &scalar_name } if ((left_open_interval && scalar <= range[0]) || (!left_open_interval && scalar < range[0])) { std::string interval_description = left_open_interval ? " greater than " : " greater than or equal to "; - std::string err_msg = op_name + ":" + scalar_name + " must be" + interval_description + std::to_string(range[0]) + - ", got: " + std::to_string(scalar); + std::string err_msg = op_name + ": '" + scalar_name + "' must be" + interval_description + + std::to_string(range[0]) + ", got: " + std::to_string(scalar); MS_LOG(ERROR) << err_msg; return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); } @@ -69,6 +69,15 @@ Status ValidateScalar(const std::string &op_name, const std::string &scalar_name return Status::OK(); } +// Helper function to validate enum +template +Status ValidateEnum(const std::string &op_name, const std::string &enum_name, const T enumeration, + const std::vector &enum_list) { + auto existed = std::find(enum_list.begin(), enum_list.end(), enumeration); + std::string err_msg = op_name + ": Invalid " + enum_name + ", check input value of enum."; + return existed != enum_list.end() ? Status::OK() : Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); +} + // Helper function to validate color attribute Status ValidateVectorColorAttribute(const std::string &op_name, const std::string &attr_name, const std::vector &attr, const std::vector &range); diff --git a/mindspore/ccsrc/minddata/dataset/util/validators.h b/mindspore/ccsrc/minddata/dataset/util/validators.h index fac8baaf912..603c42f18ba 100644 --- a/mindspore/ccsrc/minddata/dataset/util/validators.h +++ b/mindspore/ccsrc/minddata/dataset/util/validators.h @@ -25,11 +25,11 @@ namespace mindspore { namespace dataset { // validator Parameter in json file -inline Status ValidateParamInJson(nlohmann::json op_params, const std::string ¶m_name, +inline Status ValidateParamInJson(const nlohmann::json &json_obj, const std::string ¶m_name, const std::string &operator_name) { - if (op_params.find(param_name) == op_params.end()) { - std::string err_msg = "Failed to find parameter '" + param_name + "' of '" + operator_name + - "' operator in input json file or input dict, check input parameter of API 'deserialize."; + if (json_obj.find(param_name) == json_obj.end()) { + std::string err_msg = "Failed to find key '" + param_name + "' in " + operator_name + + "' JSON file or input dict, check input content of deserialize()."; RETURN_STATUS_UNEXPECTED(err_msg); } return Status::OK(); diff --git a/tests/ut/python/dataset/test_datasets_imagefolder.py b/tests/ut/python/dataset/test_datasets_imagefolder.py index 2ebccdcd168..53d522f2bbc 100644 --- a/tests/ut/python/dataset/test_datasets_imagefolder.py +++ b/tests/ut/python/dataset/test_datasets_imagefolder.py @@ -407,7 +407,7 @@ def test_weighted_random_sampler_exception(): sampler = ds.WeightedRandomSampler(weights) sampler.parse() - error_msg_4 = "WeightedRandomSampler: weights vector must not contain negative number, got: " + error_msg_4 = "WeightedRandomSampler: weights vector must not contain negative numbers, got: " with pytest.raises(RuntimeError, match=error_msg_4): weights = [1.0, 0.1, 0.02, 0.3, -0.4] sampler = ds.WeightedRandomSampler(weights)