diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc index 20906619812..f77d39e3e5a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc @@ -181,7 +181,8 @@ Status BarrierOp::blockCond() { py::object ret_py_obj = condition_function_(); // Process the return value if (!py::isinstance(ret_py_obj)) { - return Status(StatusCode::kPyFuncException, "Condition wait function should return true/false"); + return Status(StatusCode::kPyFuncException, + "Invalid parameter, condition wait function should return true/false."); } } catch (const py::error_already_set &e) { return Status(StatusCode::kPyFuncException, e.what()); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc index 647f4c434bb..26a077fdd33 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc @@ -51,9 +51,15 @@ Status BatchOp::Builder::Build(std::shared_ptr *ptr) { Status BatchOp::Builder::SanityCheck() { std::string err; - err += builder_op_connector_size_ <= 0 ? "connector size <= 0\n" : ""; - err += builder_batch_size_ <= 0 ? "batch size <= 0\n" : ""; - err += builder_num_workers_ <= 0 ? "batch num_parallel_workers <= 0\n" : ""; + err += builder_op_connector_size_ <= 0 ? "Invalid parameter, connector_size must be greater than 0, but got " + + std::to_string(builder_op_connector_size_) + ".\n" + : ""; + err += builder_batch_size_ <= 0 ? "Invalid parameter, batch_size must be greater than 0, but got " + + std::to_string(builder_batch_size_) + ".\n" + : ""; + err += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err)); } @@ -184,7 +190,9 @@ Status BatchOp::BatchRows(const std::unique_ptr *src, const std::u } // Don't do anything if the tensor has no data } else { - RETURN_STATUS_UNEXPECTED("[Batch ERROR] Inconsistent TensorShapes of Column " + std::to_string(i)); + RETURN_STATUS_UNEXPECTED( + "Invalid data, expect same shape for each data row, but got inconsistent data shapes in column " + + std::to_string(i)); } } } else { // handle string column differently @@ -239,7 +247,9 @@ Status BatchOp::MakeBatchedBuffer(std::pair, CBatc } Status BatchOp::LaunchThreadsAndInitOp() { - RETURN_UNEXPECTED_IF_NULL(tree_); + if (tree_ == nullptr) { + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set."); + } RETURN_IF_NOT_OK(worker_queues_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&BatchOp::WorkerEntry, this, std::placeholders::_1))); return Status::OK(); @@ -258,7 +268,7 @@ Status BatchOp::MapColumns(std::pair, CBatchInfo> input_table.reserve(pyfunc_column_names_.size()); for (std::string col_name : pyfunc_column_names_) { if (column_name_id_map_.find(col_name) == column_name_id_map_.end()) { - RETURN_STATUS_UNEXPECTED("column : '" + col_name + "' does not exist\n"); + RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: '" + col_name + "' does not exist.\n"); } TensorBatch tensor_batch; tensor_batch.reserve(table_pair->first->size()); @@ -310,12 +320,14 @@ Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { py::object size = batch_size_func_(info); *batch_size = size.cast(); if (*batch_size <= 0) { - return Status(StatusCode::kPyFuncException, "Batch size function should return an integer > 0"); + return Status(StatusCode::kPyFuncException, + "Invalid parameter, batch size function should return an integer greater than 0."); } } catch (const py::error_already_set &e) { return Status(StatusCode::kPyFuncException, e.what()); } catch (const py::cast_error &e) { - return Status(StatusCode::kPyFuncException, "Batch size function should return an integer > 0"); + return Status(StatusCode::kPyFuncException, + "Invalid parameter, batch size function should return an integer greater than 0."); } } return Status(StatusCode::kOK, "Batch size func call succeed"); @@ -346,7 +358,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou // Parse batch map return value py::tuple ret_tuple = py::cast(ret_py_obj); if (ret_tuple.size() != pyfunc_column_names_.size() || !py::isinstance(ret_tuple)) { - return Status(StatusCode::kPyFuncException, "Batch map function should return a tuple"); + return Status(StatusCode::kPyFuncException, "Invalid parameter, batch map function should return a tuple."); } for (size_t i = 0; i < ret_tuple.size(); i++) { TensorBatch output_batch; @@ -361,7 +373,8 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou } catch (const py::error_already_set &e) { return Status(StatusCode::kPyFuncException, e.what()); } catch (const py::cast_error &e) { - return Status(StatusCode::kPyFuncException, "Batch map function should return an tuple of list of numpy array"); + return Status(StatusCode::kPyFuncException, + "Invalid parameter, batch map function should return a tuple of list of numpy array."); } } return Status(StatusCode::kOK); @@ -371,7 +384,10 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou Status BatchOp::PadColumns(std::unique_ptr *table, const PadInfo &pad_info, const std::unordered_map &column_name_id_map) { RETURN_UNEXPECTED_IF_NULL(table); // placeholder for now, might need this in the future - CHECK_FAIL_RETURN_UNEXPECTED((*table)->front().size() == column_name_id_map.size(), "col_name_map mismatch"); + CHECK_FAIL_RETURN_UNEXPECTED( + (*table)->front().size() == column_name_id_map.size(), + "Invaid parameter, size of column_name_id_map must be equal to num of data columns. map size: " + + std::to_string(column_name_id_map.size()) + ", column nums: " + std::to_string((*table)->front().size())); std::vector> pad_vals(column_name_id_map.size(), 0); // value to pad each column's tensor with, default 0 std::set pad_cols; @@ -383,14 +399,19 @@ Status BatchOp::PadColumns(std::unique_ptr *table, const PadInfo & for (size_t col_id : pad_cols) { max_shapes[col_id] = std::vector((*table)->front()[col_id]->Rank(), -1); if (pad_shapes[col_id].empty()) pad_shapes[col_id] = max_shapes[col_id]; // fill pad shape with -1 - CHECK_FAIL_RETURN_UNEXPECTED(pad_shapes[col_id].size() == max_shapes[col_id].size(), "wrong rank in pad_shape"); + CHECK_FAIL_RETURN_UNEXPECTED( + pad_shapes[col_id].size() == max_shapes[col_id].size(), + "Invalid data, rank of pad_shape must be equal to rank of specified column. pad_shapes rank:" + + std::to_string(pad_shapes[col_id].size()) + ", column rank: " + std::to_string(max_shapes[col_id].size())); } // calculate maximum shape for each column that needs to be padded for (const TensorRow &row : **table) { // iterator each row in a batch for (size_t col_id : pad_cols) { // iterator each tensor in a row - CHECK_FAIL_RETURN_UNEXPECTED(row[col_id]->Rank() == max_shapes[col_id].size(), - "Tensor to be padded together need to have the same rank"); + CHECK_FAIL_RETURN_UNEXPECTED( + row[col_id]->Rank() == max_shapes[col_id].size(), + "Invalid data, data to be padded together need to have the same rank, got shape 1: " + + std::to_string(row[col_id]->Rank()) + ", shape 2: " + std::to_string(max_shapes[col_id].size())); for (size_t dim = 0; dim < row[col_id]->Rank(); dim++) { // pick the largest number in each dimension max_shapes[col_id][dim] = std::max(max_shapes[col_id][dim], row[col_id]->shape()[dim]); } @@ -426,9 +447,13 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info, } else { for (const auto &p : pad_info) { auto location = column_name_id_map.find(p.first); - CHECK_FAIL_RETURN_UNEXPECTED(location != column_name_id_map.end(), "no column exists with name:" + p.first); + CHECK_FAIL_RETURN_UNEXPECTED(location != column_name_id_map.end(), + "Invalid parameter, column name: " + p.first + " does not exist."); auto col_id = static_cast(location->second); - CHECK_FAIL_RETURN_UNEXPECTED(col_id < pad_vals->size() && col_id < pad_shapes->size(), "col_id out of bound"); + CHECK_FAIL_RETURN_UNEXPECTED( + col_id < pad_vals->size() && col_id < pad_shapes->size(), + "Invalid parameter, column id must be less than the size of pad_val and pad_shape, but got: " + + std::to_string(col_id)); pad_cols->insert(col_id); (*pad_vals)[col_id] = p.second.second; // set pad values (*pad_shapes)[col_id] = p.second.first.AsVector(); // empty vector if shape is unknown diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc index 06bcdd09e73..c23537c6f3d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc @@ -52,15 +52,16 @@ Status BucketBatchByLengthOp::Builder::SanityCheck() { std::string error_message; if (builder_length_dependent_columns_.empty()) { - error_message += "At least 1 column must be specified for element length calculation.\n"; + error_message += "Invalid parameter, at least 1 column must be specified for element length calculation.\n"; } if (builder_bucket_boundaries_.empty()) { - error_message += "At least 1 bucket boundary must be specified.\n"; + error_message += "Invalid parameter, at least 1 bucket boundary must be specified.\n"; } if (builder_bucket_batch_sizes_.size() != builder_bucket_boundaries_.size() + 1) { - error_message += "There must be exactly one bucket batch size specified for each bucket boundary.\n"; + error_message += + "Invalid parameter, there must be exactly one bucket batch size specified for each bucket boundary.\n"; } CHECK_FAIL_RETURN_UNEXPECTED(error_message.empty(), error_message); @@ -168,7 +169,8 @@ Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, T RETURN_IF_NOT_OK(element_length_function_->Compute(input, &output)); RETURN_IF_NOT_OK(output.at(0)->GetItemAt(out_element_length, {0})); if (*out_element_length < 0) { - RETURN_STATUS_UNEXPECTED("BucketBatchByLength: element_length_function returned negative integer"); + RETURN_STATUS_UNEXPECTED( + "Invalid parameter, element_length_function must return an integer greater than or equal to 0."); } } else { *out_element_length = element[0]->shape()[0]; @@ -187,7 +189,8 @@ Status BucketBatchByLengthOp::PadAndBatchBucket(int32_t bucket_index, int32_t ba for (size_t i = 0; i < pad_shape.size(); i++) { if (pad_shape[i] == TensorShape::kDimUnknown) { if (bucket_index + 1 >= bucket_boundaries_.size()) { - std::string error_message = "Requested to pad to bucket boundary, element falls in last bucket"; + std::string error_message = + "Invalid data, requested to pad to bucket boundary, element falls in last bucket."; return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, error_message); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc index 379adc1fc75..2674b566734 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc @@ -41,7 +41,9 @@ BuildSentencePieceVocabOp::BuildSentencePieceVocabOp(std::shared_ptrRegister(tree_->AllTasks())); RETURN_IF_NOT_OK( tree_->AllTasks()->CreateAsyncTask("sentenceTask", std::bind(&BuildSentencePieceVocabOp::SentenceThread, this))); @@ -69,12 +71,12 @@ Status BuildSentencePieceVocabOp::SentenceThread() { TaskManager::FindMe()->Post(); if (col_names_.empty() == true) { auto itr = column_name_id_map_.find("text"); - CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), - "'text' column doesn't exist when column name is empty"); + CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid data, 'text' column does not exist."); col_id_ = itr->second; } else { auto itr = column_name_id_map_.find(col_names_[0]); - CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), col_names_[0] + "column doesn't exist"); + CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), + "Invalid parameter, column name: " + col_names_[0] + "does not exist."); col_id_ = itr->second; } std::unique_ptr sentence_iter = std::make_unique(this); @@ -85,7 +87,8 @@ Status BuildSentencePieceVocabOp::SentenceThread() { return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, s_status.message()); } else { if (vocab_ == nullptr) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "sentencepiece vocab ptr must not be nullptr"); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Invalid parameter, sentencepiece vocab not set."); } vocab_->set_model_proto(model_proto); } @@ -141,8 +144,10 @@ void BuildSentencePieceVocabOp::Next(std::string *sentence) { } if (new_row[col_id_]->type().IsNumeric() || new_row[col_id_]->Rank() > 1) { - ret_status_ = Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "for dataset only words on string columns or must bu scalar"); + ret_status_ = + Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Invalid data, build_sentence_piece_vocab only works on string data with rank equal to 1, got type: " + + new_row[col_id_]->type().ToString() + "and rank: " + std::to_string(new_row[col_id_]->Rank())); read_done_ = true; return; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc index 5ab4f7251be..17369e51608 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc @@ -54,7 +54,9 @@ Status BuildVocabOp::WorkerEntry(int32_t worker_id) { int32_t row_cnt = 0; while (!new_row.empty()) { for (int32_t col : col_ids_) { - CHECK_FAIL_RETURN_UNEXPECTED(!new_row[col]->type().IsNumeric(), "from_dataset only works on string columns"); + CHECK_FAIL_RETURN_UNEXPECTED(!new_row[col]->type().IsNumeric(), + "Invalid data, build_vocab only works on string data, but got numeric data type: " + + new_row[col]->type().ToString()); for (auto itr = new_row[col]->begin(); itr != new_row[col]->end(); itr++) { (*wrkr_map)[std::string(*itr)] += 1; } @@ -77,7 +79,9 @@ Status BuildVocabOp::WorkerEntry(int32_t worker_id) { Status BuildVocabOp::operator()() { // launch the collector thread - RETURN_UNEXPECTED_IF_NULL(tree_); + if (tree_ == nullptr) { + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set."); + } RETURN_IF_NOT_OK(distributor_queue_->Register(tree_->AllTasks())); RETURN_IF_NOT_OK(collector_queue_->Register(tree_->AllTasks())); // launch worker threads and collector thread @@ -92,7 +96,8 @@ Status BuildVocabOp::operator()() { col_ids_.reserve(col_names_.size()); for (std::string col : col_names_) { auto itr = column_name_id_map_.find(col); - CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), col + " column doesn't exist"); + CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), + "Invalid parameter, column name: " + col + " does not exist."); col_ids_.push_back(itr->second); } } else { @@ -131,7 +136,7 @@ Status BuildVocabOp::CollectorThread() { ++num_quited_worker; } } // all frequencies are obtained - CHECK_FAIL_RETURN_UNEXPECTED(!word_cnt_.empty(), "word_cnt is empty"); + CHECK_FAIL_RETURN_UNEXPECTED(!word_cnt_.empty(), "Invalid data, no words in the dataset."); std::vector words; // make sure enough is reserved, this will become a partially sorted list eventually words.reserve(wrkr_map->size()); @@ -151,7 +156,8 @@ Status BuildVocabOp::CollectorThread() { err_msg += (word_cnt_.find(sp_tk) != word_cnt_.end() ? sp_tk + "\t" : ""); } - CHECK_FAIL_RETURN_UNEXPECTED(err_msg.empty(), "These specials words are already in the dataset: " + err_msg + "."); + CHECK_FAIL_RETURN_UNEXPECTED(err_msg.empty(), + "Invalid data, these special words are already in the dataset: " + err_msg + "."); int64_t num_words = std::min(static_cast(words.size()), top_k_); if (num_words == 0) { @@ -185,10 +191,13 @@ Status BuildVocabOp::CollectorThread() { } Status BuildVocabOp::Builder::Build(std::shared_ptr *op) { - CHECK_FAIL_RETURN_UNEXPECTED(builder_num_workers_ > 0, "builder num_workers need to be greater than 0"); - CHECK_FAIL_RETURN_UNEXPECTED(builder_top_k_ > 0, "top_k needs to be positive number"); + CHECK_FAIL_RETURN_UNEXPECTED( + builder_num_workers_ > 0, + "Invalid parameter, num_parallel_workers must be greater than 0, but got " + std::to_string(builder_num_workers_)); + CHECK_FAIL_RETURN_UNEXPECTED( + builder_top_k_ > 0, "Invalid parameter, top_k must be greater than 0, but got " + std::to_string(builder_top_k_)); CHECK_FAIL_RETURN_UNEXPECTED(builder_max_freq_ >= builder_min_freq_ && builder_min_freq_ >= 0, - "frequency range [a,b] should be 0 <= a <= b (a,b are inclusive)"); + "Invalid parameter, frequency range [a,b] must be 0 <= a <= b (a,b are inclusive)."); (*op) = std::make_shared( builder_vocab_, builder_col_names_, std::make_pair(builder_min_freq_, builder_max_freq_), builder_top_k_, builder_speical_tokens_, builder_special_first_, builder_num_workers_, builder_connector_size_); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc index 0a9b7544bae..5fd882dce7b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc @@ -35,12 +35,13 @@ CacheLookupOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_ // Check if the required parameters are set by the builder. Status CacheLookupOp::Builder::SanityCheck() const { if (build_cache_client_ == nullptr) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CacheLookupOp requires a CacheClient"); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Invalid parameter, CacheLookupOp requires a CacheClient, but got nullptr."); } // Make sure the cache client has a valid session if (!build_cache_client_->session_id()) { return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "Cache client for CacheLookupOp is missing session id"); + "Invalid parameter, cache client for CacheLookupOp requires a session id which is not equal to 0."); } return Status::OK(); } @@ -55,7 +56,7 @@ Status CacheLookupOp::Builder::Build(std::shared_ptr *ptr) { Status CacheLookupOp::operator()() { if (!sampler_) { return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "CacheLookupOp requires a sampler before it can be executed!"); + "Invalid parameter, CacheLookupOp requires a sampler before it can be executed, but got nullptr."); } RETURN_IF_NOT_OK(RegisterResources()); // Kick off the workers diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc index aa6c93ba6ea..2de9f30b5aa 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc @@ -200,11 +200,11 @@ Status CacheMergeOp::PrepareNodePostAction() { // Run any common code from supe } Status CacheMergeOp::ComputeColMap() { - CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "Cache miss stream empty"); + CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "Invalid data, cache miss stream empty."); if (column_name_id_map().empty()) { column_name_id_map_ = child_[kCacheMissChildIdx]->column_name_id_map(); } - CHECK_FAIL_RETURN_UNEXPECTED(!column_name_id_map().empty(), "No column map detected"); + CHECK_FAIL_RETURN_UNEXPECTED(!column_name_id_map().empty(), "Invalid data, column_name_id_map is empty."); return Status::OK(); } @@ -219,12 +219,13 @@ CacheMergeOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_( // Check if the required parameters are set by the builder. Status CacheMergeOp::Builder::SanityCheck() const { if (build_cache_client_ == nullptr) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CacheMergeOp requires a CacheClient"); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Invalid parameter, CacheMergeOp requires a CacheClient, but got nullptr."); } // Make sure the cache client has a valid session if (!build_cache_client_->session_id()) { return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "Cache client for CacheMergeOp is missing session id"); + "Invalid parameter, cache client for CacheMergeOp requires a session id which is not equal to 0."); } return Status::OK(); } @@ -287,7 +288,7 @@ Status CacheMergeOp::GetRq(row_id_type row_id, CacheMergeOp::TensorRowCacheReque RETURN_IF_NOT_OK(mem.allocate(1)); *out = mem.GetMutablePointer(); } else { - RETURN_STATUS_UNEXPECTED("Map insert fail."); + RETURN_STATUS_UNEXPECTED("Invalid data, map insert fail."); } } return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc index 8971841a23d..f5aaa545d2a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc @@ -40,11 +40,13 @@ CacheOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullp // Check if the required parameters are set by the builder. Status CacheOp::Builder::SanityCheck() const { if (build_cache_client_ == nullptr) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CacheOp requires a CacheClient"); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Invalid parameter, CacheOp requires a CacheClient, but got nullptr."); } // Make sure the cache client has a valid session if (!build_cache_client_->session_id()) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Cache client for CacheOp is missing session id"); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Invalid parameter, cache client for CacheOp requires a session id which is not equal to 0."); } return Status::OK(); } @@ -76,7 +78,7 @@ Status CacheOp::InitCache() { return Status::OK(); } Status CacheOp::operator()() { if (!sampler_) { return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "CacheOp requires a sampler before it can be executed!"); + "Invalid parameter, CacheOp requires a sampler before it can be executed, but got nullptr."); } RETURN_IF_NOT_OK(RegisterResources()); // Kick off the workers diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc index 6f12c36b489..9f2294bf16b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc @@ -162,7 +162,7 @@ Status ConcatOp::Verify(int32_t id, const std::unique_ptr &buf) { int32_t index = 0; for (auto item : new_row) { if ((item->type() != data_type_[index]) || item->Rank() != data_rank_[index++]) { - RETURN_STATUS_UNEXPECTED("The data type or data rank is not the same with previous dataset."); + RETURN_STATUS_UNEXPECTED("Invalid data, data type or data rank is not the same with previous dataset."); } } } @@ -180,7 +180,7 @@ Status ConcatOp::ComputeColMap() { // Verify all children have the same column name map for (int32_t i = 0; i < child_.size(); ++i) { if (child_[i]->column_name_id_map() != column_name_id_map_) { - RETURN_STATUS_UNEXPECTED("The column name or column order is not the same with previous dataset."); + RETURN_STATUS_UNEXPECTED("Invalid data, column name or column order is not the same with previous dataset."); } } } else { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc index 46e8a97bcfb..1b877779274 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc @@ -262,7 +262,8 @@ Status DatasetOp::GetNextInput(std::unique_ptr *p_buffer, int32_t wo if (child_.size() == 0) { return this->GetNextBuffer(p_buffer, worker_id); } - CHECK_FAIL_RETURN_UNEXPECTED(child_index < child_.size(), "Child index too big : " + std::to_string(child_index)); + CHECK_FAIL_RETURN_UNEXPECTED(child_index < child_.size(), + "Invalid data, child index too big : " + std::to_string(child_index)); std::shared_ptr child = child_[child_index]; std::unique_ptr buf; RETURN_IF_NOT_OK(child->GetNextBuffer(&buf, worker_id)); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc index 4f54f89e3af..2f99262d095 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc @@ -68,8 +68,8 @@ Status DeviceQueueOp::CheckExceptions(const std::unique_ptr &buffer) TensorRow row; buffer->GetRow(0, &row); for (const auto &item : row) { - CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Cannot send tensor of string type to device."); - CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Cannot send tensor with no data."); + CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Invalid data, cannot send string tensor to device."); + CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Invalid data, cannot send tensor with no data to device."); } } return Status::OK(); @@ -206,7 +206,7 @@ Status DeviceQueueOp::SendDataToGPU() { if (!is_open) { handle = GpuBufferMgr::GetInstance().Open(0, channel_name_, data_size, release_function); if (handle == INVALID_HANDLE) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "open failed"); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Failed to open channel for sending data."); } is_open = true; } @@ -249,7 +249,7 @@ Status DeviceQueueOp::RetryPushGPUData(const std::vector &data_size, con ReleaseData(items[i].data_ptr_); } if (ret == BlockQueueStatus_T::ERROR_INPUT) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "invalid input Data, please check it."); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Invalid input data, please check it."); } else { if (!stop_send_) { MS_LOG(DEBUG) << "Retry pushing data..."; @@ -269,7 +269,7 @@ Status DeviceQueueOp::MallocForGPUData(std::vector *items, for (auto &sub_item : *items) { RETURN_IF_NOT_OK(pool_->Allocate(sub_item.data_len_, &sub_item.data_ptr_)); if (sub_item.data_ptr_ == nullptr) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "memory malloc failed."); + return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__, "Memory malloc failed."); } (void)memset_s(sub_item.data_ptr_, sub_item.data_len_, 0, sub_item.data_len_); const unsigned char *column_data = curr_row[i]->GetBuffer(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc index 10dd4f71d76..dd672468b11 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc @@ -37,8 +37,12 @@ namespace dataset { Status FilterOp::Builder::SanityCheck() { std::string err; - err += builder_op_connector_size_ <= 0 ? "connector size <= 0\n" : ""; - err += builder_num_workers_ <= 0 ? "filter num_parallel_workers <= 0\n" : ""; + err += builder_op_connector_size_ <= 0 ? "Invalid parameter, connector_size must be greater than 0, but got " + + std::to_string(builder_op_connector_size_) + ".\n" + : ""; + err += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err)); } @@ -61,7 +65,9 @@ FilterOp::FilterOp(const std::vector &in_col_names, int32_t num_wor Status FilterOp::operator()() { // The operator class just starts off threads by calling the tree_ function. - RETURN_UNEXPECTED_IF_NULL(tree_); + if (tree_ == nullptr) { + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set."); + } filter_queues_.Init(num_workers_, oc_queue_size_); RETURN_IF_NOT_OK(filter_queues_.Register(tree_->AllTasks())); Status rc = tree_->LaunchWorkers(num_workers_, std::bind(&FilterOp::WorkerEntry, this, std::placeholders::_1)); @@ -81,7 +87,7 @@ Status FilterOp::ValidateInColumns(const std::vector *input_columns for (const auto &inCol : *input_columns) { bool found = column_name_id_map_.find(inCol) != column_name_id_map_.end() ? true : false; if (!found) { - std::string err_msg = "input column name: " + inCol + " doesn't exist in the dataset columns."; + std::string err_msg = "Invalid parameter, column name: " + inCol + " does not exist in the dataset columns."; RETURN_STATUS_UNEXPECTED(err_msg); } } @@ -224,7 +230,7 @@ Status FilterOp::CheckColumns(const DataBuffer *in_buf, const std::vector &columns_to_project) Status ProjectOp::Builder::SanityCheck() const { if (builder_columns_to_project_.empty()) { - std::string err_msg("Columns to project is empty."); + std::string err_msg("Invalid parameter, no column is specified for project."); RETURN_STATUS_UNEXPECTED(err_msg); } return Status::OK(); @@ -144,7 +144,7 @@ Status ProjectOp::ComputeColMap() { for (size_t i = 0; i < columns_to_project_.size(); i++) { std::string ¤t_column = columns_to_project_[i]; if (child_column_name_mapping.find(current_column) == child_column_name_mapping.end()) { - std::string err_msg = "ProjectOp: column " + current_column + " does not exist in child operator."; + std::string err_msg = "Invalid parameter, column name: " + current_column + " does not exist."; RETURN_STATUS_UNEXPECTED(err_msg); } // Setup the new column name mapping for ourself (base class field) diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc index 0eeccea50a3..deb4027436b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc @@ -126,7 +126,7 @@ Status RenameOp::ComputeColMap() { // only checks number of renamed columns have been found, this input check doesn't check everything if (found != in_columns_.size()) { MS_LOG(DEBUG) << "Rename operator column names found: " << found << " out of " << in_columns_.size() << "."; - std::string err_msg = "Renamed column doesn't exist in dataset"; + std::string err_msg = "Invalid parameter, column to be renamed does not exist in dataset."; RETURN_STATUS_UNEXPECTED(err_msg); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc index 123cb4451cf..0d512b19541 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc @@ -32,7 +32,7 @@ RepeatOp::Builder::Builder(int32_t count) : build_num_repeats_(count) {} Status RepeatOp::Builder::SanityCheck() const { if (build_num_repeats_ < kInfiniteRepeat || build_num_repeats_ == 0) { - std::string err_msg("Repeat count must be > 0 or -1."); + std::string err_msg("Invalid parameter, repeat count must be greater than 0 or equal to -1."); RETURN_STATUS_UNEXPECTED(err_msg); } return Status::OK(); @@ -83,7 +83,7 @@ void RepeatOp::Print(std::ostream &out, bool show_all) const { // this function will retry to pop the connector again and will get the non-EOE buffer if any. Status RepeatOp::GetNextBuffer(std::unique_ptr *p_buffer, int32_t worker_id, bool retry_if_eoe) { if (child_.empty()) { - RETURN_STATUS_UNEXPECTED("RepeatOp can't be the leaf node."); + RETURN_STATUS_UNEXPECTED("Pipeline init failed, RepeatOp can't be the first op in pipeline."); } std::unique_ptr buf; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc index 2b4e64cfadf..282f57b8a9a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc @@ -52,7 +52,7 @@ ShuffleOp::Builder::Builder() : build_shuffle_size_(0), build_reshuffle_each_epo Status ShuffleOp::Builder::SanityCheck() const { if (build_shuffle_size_ < 2) { - RETURN_STATUS_UNEXPECTED("Shuffle buffer size must be greater than 1."); + RETURN_STATUS_UNEXPECTED("Invalid parameter, shuffle buffer size must be greater than 1."); } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc index d25e66ee7b2..012e23b46a1 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc @@ -36,7 +36,7 @@ SkipOp::Builder::Builder(int32_t count) : build_max_skips_(count) { Status SkipOp::Builder::SanityCheck() const { if (build_max_skips_ < 0) { - std::string err_msg("Skip count must be positive integer or 0."); + std::string err_msg("Invalid parameter, skip count should be greater than or equal to 0."); RETURN_STATUS_UNEXPECTED(err_msg); } return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc index 7fac0a181ab..efc6acece78 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc @@ -44,7 +44,7 @@ Status AlbumOp::Builder::Build(std::shared_ptr *ptr) { builder_schema_ = std::make_unique(); Path schema_file(builder_schema_file_); if (builder_schema_file_ == "" || !schema_file.Exists()) { - RETURN_STATUS_UNEXPECTED("Schema not provided"); + RETURN_STATUS_UNEXPECTED("Invalid file, schema_file is invalid or not set: " + builder_schema_file_); } else { MS_LOG(INFO) << "Schema file provided: " << builder_schema_file_ << "."; builder_schema_->LoadSchemaFile(builder_schema_file_, builder_columns_to_load_); @@ -58,8 +58,12 @@ Status AlbumOp::Builder::Build(std::shared_ptr *ptr) { Status AlbumOp::Builder::SanityCheck() { Path dir(builder_dir_); std::string err_msg; - err_msg += dir.IsDirectory() == false ? "Album path is invalid or not set\n" : ""; - err_msg += builder_num_workers_ <= 0 ? "Num of parallel workers is set to 0\n" : ""; + err_msg += dir.IsDirectory() == false + ? "Invalid parameter, Album path is invalid or not set, path: " + builder_dir_ + ".\n" + : ""; + err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } @@ -99,7 +103,7 @@ Status AlbumOp::PrescanEntry() { dirname_offset_ = folder_path_.length(); std::shared_ptr dirItr = Path::DirIterator::OpenDirectory(&folder); if (folder.Exists() == false || dirItr == nullptr) { - RETURN_STATUS_UNEXPECTED("Error unable to open: " + folder_path_); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_); } MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << "."; @@ -192,7 +196,7 @@ Status AlbumOp::WorkerEntry(int32_t worker_id) { } RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); } - RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker"); + RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker."); } // Only support JPEG/PNG/GIF/BMP @@ -203,14 +207,14 @@ Status AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { *valid = false; file_handle.open(file_name, std::ios::binary | std::ios::in); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Can not open image file " + file_name); + RETURN_STATUS_UNEXPECTED("Invalid file, can not open image file: " + file_name); } unsigned char file_type[read_num]; (void)file_handle.read(reinterpret_cast(file_type), read_num); if (file_handle.fail()) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Read image file failed " + file_name); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name); } file_handle.close(); if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) { @@ -250,7 +254,7 @@ Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col if (decode_ && valid) { Status rc = Decode(image, &image); if (rc.IsError()) { - std::string err = "Fail to decode image:" + image_file_path; + std::string err = "Invalid data, failed to decode image: " + image_file_path; RETURN_STATUS_UNEXPECTED(err); } } @@ -302,7 +306,8 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_ MS_LOG(INFO) << "Int array found: " << data << "."; RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label)); } else { - RETURN_STATUS_UNEXPECTED("Error in Load Int Tensor"); + RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither int32 nor int64, it is " + + data_schema_->column(col_num).type().ToString()); } row->push_back(std::move(label)); return Status::OK(); @@ -361,7 +366,7 @@ Status AlbumOp::LoadTensorRow(const std::string &file, TensorRow *row) { std::ifstream file_handle(folder_path_ + file); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Json file " + folder_path_ + file + " can not open."); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file); } std::string line; while (getline(file_handle, line)) { @@ -425,7 +430,7 @@ Status AlbumOp::LoadTensorRow(const std::string &file, TensorRow *row) { } } catch (const std::exception &err) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Parse Json file failed"); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file); } } file_handle.close(); @@ -476,7 +481,9 @@ Status AlbumOp::InitSampler() { } Status AlbumOp::LaunchThreadsAndInitOp() { - RETURN_UNEXPECTED_IF_NULL(tree_); + if (tree_ == nullptr) { + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set."); + } // registers QueueList and individual Queues for interrupt services RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc index 3466bb0fac2..f4a2f24cb44 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc @@ -54,7 +54,7 @@ Status CelebAOp::Builder::Build(std::shared_ptr *op) { builder_op_connector_size_, builder_decode_, builder_usage_, builder_extensions_, std::move(builder_schema_), std::move(builder_sampler_)); if (*op == nullptr) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CelebAOp is null"); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "CelebAOp init failed."); } return Status::OK(); @@ -63,8 +63,12 @@ Status CelebAOp::Builder::Build(std::shared_ptr *op) { Status CelebAOp::Builder::SanityCheck() { Path dir(builder_dir_); std::string err_msg; - err_msg += dir.IsDirectory() ? "" : "CelebA path is invalid or not set\n"; - err_msg += builder_num_workers_ <= 0 ? "Num of parallel workers is smaller than 1\n" : ""; + err_msg += dir.IsDirectory() == false + ? "Invalid parameter, CelebA path is invalid or not set, path: " + builder_dir_ + ".\n" + : ""; + err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } @@ -85,7 +89,7 @@ CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::stri Status CelebAOp::LaunchThreadsAndInitOp() { if (tree_ == nullptr) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "tree_ not set"); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set."); } RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); @@ -106,7 +110,9 @@ Status CelebAOp::ParseAttrFile() { Path folder_path(folder_path_); std::ifstream attr_file((folder_path / "list_attr_celeba.txt").toString()); if (!attr_file.is_open()) { - return Status(StatusCode::kFileNotExist, __LINE__, __FILE__, "Celeba attr file does not exist"); + std::string attr_file_name = (folder_path / "list_attr_celeba.txt").toString(); + return Status(StatusCode::kFileNotExist, __LINE__, __FILE__, + "Invalid file, failed to open Celeba attr file: " + attr_file_name); } const auto PushBackToQueue = [this](std::vector &vec, std::ifstream &attr_file, @@ -125,9 +131,11 @@ Status CelebAOp::ParseAttrFile() { try { num_rows_in_attr_file_ = static_cast(std::stoul(rows_num)); // First line is rows number in attr file } catch (std::invalid_argument &e) { - RETURN_STATUS_UNEXPECTED("Conversion to ulong failed, invalid argument."); + RETURN_STATUS_UNEXPECTED( + "Invalid data, failed to convert rows_num from attr_file to unsigned long, invalid argument: " + rows_num); } catch (std::out_of_range &e) { - RETURN_STATUS_UNEXPECTED("Conversion to ulong failed, out of range."); + RETURN_STATUS_UNEXPECTED( + "Invalid data, failed to convert rows_num from attr_file to unsigned long, out of range: " + rows_num); } (void)getline(attr_file, attr_name); // Second line is attribute name,ignore it @@ -172,10 +180,10 @@ bool CelebAOp::CheckDatasetTypeValid() { try { type = std::stoi(vec[1]); } catch (std::invalid_argument &e) { - MS_LOG(WARNING) << "Conversion to unsigned long failed, invalid argument, " << vec[0] << "."; + MS_LOG(WARNING) << "Invalid data, failed to convert to unsigned long, invalid argument: " << vec[1] << "."; return false; } catch (std::out_of_range &e) { - MS_LOG(WARNING) << "Conversion to unsigned long failed, out of range, " << vec[0] << "."; + MS_LOG(WARNING) << "Invalid data, failed to convert to unsigned long, out of range: " << vec[1] << "."; return false; } // train:0, valid=1, test=2 @@ -213,9 +221,9 @@ Status CelebAOp::ParseImageAttrInfo() { try { value = std::stoi(split[label_index]); } catch (std::invalid_argument &e) { - RETURN_STATUS_UNEXPECTED("Conversion to int failed, invalid argument."); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to convert to ulong, invalid argument: " + split[label_index]); } catch (std::out_of_range &e) { - RETURN_STATUS_UNEXPECTED("Conversion to int failed, out of range."); + RETURN_STATUS_UNEXPECTED("Conversion to int failed, out of range: " + split[label_index]); } image_labels.second.push_back(value); } @@ -229,8 +237,8 @@ Status CelebAOp::ParseImageAttrInfo() { num_rows_ = image_labels_vec_.size(); if (num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API CelebADataset.Please check file path or dataset API " - "validation first."); + "Invalid data, no valid data matching the dataset API CelebADataset. " + "Please check file path or dataset API validation first"); } MS_LOG(DEBUG) << "Celeba dataset rows number is " << num_rows_ << "."; return Status::OK(); @@ -338,7 +346,7 @@ Status CelebAOp::WorkerEntry(int32_t worker_id) { } RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); } - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Unexpected nullptr received in worker"); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Unexpected nullptr received in worker."); } Status CelebAOp::LoadBuffer(const std::vector &keys, std::unique_ptr *db) { @@ -365,7 +373,7 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, const std::pair valid = {"test", "train", "all", ""}; Path dir(dir_); std::string err_msg; - err_msg += dir.IsDirectory() == false ? "Cifar path is invalid or not set\n" : ""; - err_msg += num_workers_ <= 0 ? "Num of parallel workers is negative or 0\n" : ""; - err_msg += valid.find(usage_) == valid.end() ? "usage needs to be 'train','test' or 'all'\n" : ""; + err_msg += + dir.IsDirectory() == false ? "Invalid parameter, Cifar path is invalid or not set, path: " + dir_ + ".\n" : ""; + err_msg += num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(num_workers_) + ".\n" + : ""; + err_msg += valid.find(usage_) == valid.end() + ? "Invalid parameter, usage must be 'train','test' or 'all', but got " + usage_ + ".\n" + : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } @@ -148,7 +153,7 @@ Status CifarOp::operator()() { Status CifarOp::LaunchThreadsAndInitOp() { if (tree_ == nullptr) { - RETURN_STATUS_UNEXPECTED("tree_ not set"); + RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); } RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); @@ -188,7 +193,7 @@ Status CifarOp::WorkerEntry(int32_t worker_id) { } RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); } - RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker"); + RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker."); } // Load 1 TensorRow (image,label). 1 function call produces 1 TensorTow in a DataBuffer @@ -272,7 +277,8 @@ Status CifarOp::ReadCifar10BlockData() { for (auto &file : cifar_files_) { // check the validity of the file path Path file_path(file); - CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), "invalid file:" + file); + CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), + "Invalid file, failed to find cifar10 file: " + file); std::string file_name = file_path.Basename(); if (usage_ == "train") { @@ -284,11 +290,11 @@ Status CifarOp::ReadCifar10BlockData() { } std::ifstream in(file, std::ios::binary); - CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), file + " can not be opened."); + CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar10 file: " + file); for (uint32_t index = 0; index < num_cifar10_records / kCifarBlockImageNum; ++index) { (void)in.read(reinterpret_cast(&(image_data[0])), block_size * sizeof(unsigned char)); - CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Fail to read cifar file" + file); + CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar10 file: " + file); (void)cifar_raw_data_block_->EmplaceBack(image_data); } in.close(); @@ -307,7 +313,8 @@ Status CifarOp::ReadCifar100BlockData() { for (auto &file : cifar_files_) { // check the validity of the file path Path file_path(file); - CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), "invalid file:" + file); + CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), + "Invalid file, failed to find cifar100 file: " + file); std::string file_name = file_path.Basename(); // if usage is train/test, get only these 2 files @@ -319,15 +326,15 @@ Status CifarOp::ReadCifar100BlockData() { } else if (file_name.find("train") != std::string::npos) { num_cifar100_records = 50000; } else { - RETURN_STATUS_UNEXPECTED("Cifar 100 file not found!"); + RETURN_STATUS_UNEXPECTED("Invalid file, Cifar100 train/test file not found in: " + file_name); } std::ifstream in(file, std::ios::binary); - CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), file + " can not be opened."); + CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar100 file: " + file); for (uint32_t index = 0; index < num_cifar100_records / kCifarBlockImageNum; index++) { (void)in.read(reinterpret_cast(&(image_data[0])), block_size * sizeof(unsigned char)); - CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Fail to read cifar file" + file); + CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar100 file: " + file); (void)cifar_raw_data_block_->EmplaceBack(image_data); } in.close(); @@ -348,9 +355,9 @@ Status CifarOp::GetCifarFiles() { } } } else { - RETURN_STATUS_UNEXPECTED("Unable to open directory " + dir_path.toString()); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open directory: " + dir_path.toString()); } - CHECK_FAIL_RETURN_UNEXPECTED(!cifar_files_.empty(), "No .bin files found under " + folder_path_); + CHECK_FAIL_RETURN_UNEXPECTED(!cifar_files_.empty(), "Invalid file, no .bin files found under " + folder_path_); std::sort(cifar_files_.begin(), cifar_files_.end()); return Status::OK(); } @@ -390,8 +397,8 @@ Status CifarOp::ParseCifarData() { num_rows_ = cifar_image_label_pairs_.size(); if (num_rows_ == 0) { std::string api = cifar_type_ == kCifar10 ? "Cifar10Dataset" : "Cifar100Dataset"; - RETURN_STATUS_UNEXPECTED("There is no valid data matching the dataset API " + api + - ".Please check file path or dataset API validation first."); + RETURN_STATUS_UNEXPECTED("Invalid data, no valid data matching the dataset API " + api + + ". Please check file path or dataset API validation first."); } cifar_raw_data_block_->Reset(); return Status::OK(); @@ -400,7 +407,9 @@ Status CifarOp::ParseCifarData() { // Derived from RandomAccessOp Status CifarOp::GetClassIds(std::map> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty()) { - RETURN_STATUS_UNEXPECTED("ImageLabelPair not set"); + RETURN_STATUS_UNEXPECTED( + "Map for storaging image-index pair is nullptr or has been set in other place," + "it must be empty before using GetClassIds."); } for (uint64_t index = 0; index < cifar_image_label_pairs_.size(); ++index) { @@ -424,7 +433,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, constexpr int64_t num_cifar10_records = 10000; for (auto &file : op->cifar_files_) { Path file_path(file); - CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), "invalid file:" + file); + CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), + "Invalid file, failed to open cifar file: " + file); std::string file_name = file_path.Basename(); if (op->usage_ == "train") { @@ -447,7 +457,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, Path file_path(file); std::string file_name = file_path.Basename(); - CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), "invalid file:" + file); + CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), + "Invalid file, failed to find cifar file: " + file); if (op->usage_ == "train" && file_path.Basename().find("train") == std::string::npos) continue; if (op->usage_ == "test" && file_path.Basename().find("test") == std::string::npos) continue; @@ -458,7 +469,7 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, num_cifar100_records += 50000; } std::ifstream in(file, std::ios::binary); - CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), file + " can not be opened."); + CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open file: " + file); } *count = num_cifar100_records; return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc index 3ba902ceae7..5fd861ebc8f 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc @@ -41,8 +41,13 @@ ClueOp::Builder::Builder() Status ClueOp::Builder::ValidateInputs() const { std::string err; - err += builder_num_workers_ <= 0 ? "Number of parallel workers should be greater than 0\n" : ""; - err += (builder_device_id_ >= builder_num_devices_ || builder_num_devices_ < 1) ? "Wrong sharding configs\n" : ""; + err += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; + err += (builder_device_id_ >= builder_num_devices_ || builder_num_devices_ < 1) + ? "Invalid parameter, num_shard must be greater than shard_id and greater than 0, got num_shard: " + + std::to_string(builder_num_devices_) + ", shard_id: " + std::to_string(builder_device_id_) + ".\n" + : ""; return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err); } @@ -128,7 +133,7 @@ Status ClueOp::GetValue(const nlohmann::json &js, std::vector key_c if (cursor.find(key_chain[i]) != cursor.end()) { cursor = cursor[key_chain[i]]; } else { - RETURN_STATUS_UNEXPECTED("Failed to find key: " + key_chain[i]); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to find key: " + key_chain[i]); } } std::string final_str = key_chain.back(); @@ -158,7 +163,7 @@ Status ClueOp::LoadFile(const std::string &file, const int64_t start_offset, con const int32_t worker_id) { std::ifstream handle(file); if (!handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Failed to open file " + file); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + file); } int64_t rows_each_buffer = 0; @@ -186,7 +191,7 @@ Status ClueOp::LoadFile(const std::string &file, const int64_t start_offset, con js = nlohmann::json::parse(line); } catch (const std::exception &err) { // Catch any exception and convert to Status return code - RETURN_STATUS_UNEXPECTED("Failed to load json file"); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + line); } int cols_count = cols_to_keyword_.size(); TensorRow tRow(cols_count, nullptr); @@ -474,7 +479,7 @@ Status ClueOp::CalculateNumRowsPerShard() { } if (all_num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API CLUEDataset. Please check file path or dataset API " + "Invalid data, no valid data matching the dataset API CLUEDataset. Please check file path or dataset API " "validation first."); } @@ -486,7 +491,7 @@ Status ClueOp::CalculateNumRowsPerShard() { int64_t ClueOp::CountTotalRows(const std::string &file) { std::ifstream handle(file); if (!handle.is_open()) { - MS_LOG(ERROR) << "Failed to open file: " << file; + MS_LOG(ERROR) << "Invalid file, failed to open file: " << file; return 0; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc index cef19f9be33..2c48a227f9c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc @@ -97,7 +97,7 @@ Status CocoOp::Builder::Build(std::shared_ptr *ptr) { ColDescriptor(std::string(kJsonAnnoArea), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); break; default: - RETURN_STATUS_UNEXPECTED("Invalid task type"); + RETURN_STATUS_UNEXPECTED("Invalid parameter, task type shoule be Detection, Stuff, Keypoint or Panoptic."); } *ptr = std::make_shared(builder_task_type_, builder_dir_, builder_file_, builder_num_workers_, builder_rows_per_buffer_, builder_op_connector_size_, builder_decode_, @@ -109,9 +109,15 @@ Status CocoOp::Builder::SanityCheck() { Path dir(builder_dir_); Path file(builder_file_); std::string err_msg; - err_msg += dir.IsDirectory() == false ? "Coco image folder path is invalid or not set\n" : ""; - err_msg += file.Exists() == false ? "Coco annotation json path is invalid or not set\n" : ""; - err_msg += builder_num_workers_ <= 0 ? "Num of parallel workers is set to 0 or negative\n" : ""; + err_msg += dir.IsDirectory() == false + ? "Invalid parameter, Coco image folder path is invalid or not set, path: " + builder_dir_ + ".\n" + : ""; + err_msg += file.Exists() == false + ? "Invalid parameter, Coco annotation json path is invalid or not set, path: " + builder_dir_ + ".\n" + : ""; + err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } @@ -156,7 +162,8 @@ Status CocoOp::operator()() { std::shared_ptr sample_ids; RETURN_IF_NOT_OK(sampler_buffer->GetTensor(&sample_ids, 0, 0)); if (sample_ids->type() != DataType(DataType::DE_INT64)) { - RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't int64"); + RETURN_STATUS_UNEXPECTED("Invalid parameter, data type of Sampler Tensor isn't int64, got " + + sample_ids->type().ToString()); } RETURN_IF_NOT_OK(TraverseSampleIds(sample_ids, &keys)); RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); @@ -210,7 +217,10 @@ Status CocoOp::Reset() { Status CocoOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, TensorRow *trow) { std::shared_ptr image, coordinate; auto itr = coordinate_map_.find(image_id); - if (itr == coordinate_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); + if (itr == coordinate_map_.end()) { + RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + + " in annotation node is not found in image node in json file."); + } std::string kImageFile = image_folder_path_ + std::string("/") + image_id; RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); @@ -245,7 +255,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, Te } else if (task_type_ == TaskType::Panoptic) { RETURN_IF_NOT_OK(LoadMixTensorRow(row_id, image_id, image, coordinate, trow)); } else { - RETURN_STATUS_UNEXPECTED("Invalid task type."); + RETURN_STATUS_UNEXPECTED("Invalid parameter, task type shoule be Detection, Stuff or Panoptic."); } return Status::OK(); @@ -264,7 +274,10 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima std::vector category_id_row; std::vector iscrowd_row; auto itr_item = simple_item_map_.find(image_id); - if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); + if (itr_item == simple_item_map_.end()) { + RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + + " in annotation node is not found in image node in json file."); + } std::vector annotation = itr_item->second; for (int64_t i = 0; i < annotation.size(); i++) { @@ -293,7 +306,10 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_ std::shared_ptr item; std::vector item_queue; auto itr_item = simple_item_map_.find(image_id); - if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); + if (itr_item == simple_item_map_.end()) { + RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + + " in annotation node is not found in image node in json file."); + } item_queue = itr_item->second; std::vector bbox_dim = {static_cast(item_queue.size()), 1}; @@ -316,7 +332,10 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, std::vector iscrowd_row; std::vector area_row; auto itr_item = simple_item_map_.find(image_id); - if (itr_item == simple_item_map_.end()) RETURN_STATUS_UNEXPECTED("Invalid image_id found :" + image_id); + if (itr_item == simple_item_map_.end()) { + RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + + " in annotation node is not found in image node in json file."); + } std::vector annotation = itr_item->second; for (int64_t i = 0; i < annotation.size(); i++) { @@ -380,7 +399,7 @@ Status CocoOp::WorkerEntry(int32_t worker_id) { template Status CocoOp::SearchNodeInJson(const nlohmann::json &input_tree, std::string node_name, T *output_node) { auto node = input_tree.find(node_name); - CHECK_FAIL_RETURN_UNEXPECTED(node != input_tree.end(), "Invalid node found in json : " + node_name); + CHECK_FAIL_RETURN_UNEXPECTED(node != input_tree.end(), "Invalid data, invalid node found in json: " + node_name); (*output_node) = *node; return Status::OK(); } @@ -406,8 +425,10 @@ Status CocoOp::ParseAnnotationIds() { std::string file_name; RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonAnnoImageId), &image_id)); auto itr_file = image_index_.find(image_id); - if (itr_file == image_index_.end()) - RETURN_STATUS_UNEXPECTED("Invalid image id of annotations : " + std::to_string(image_id)); + if (itr_file == image_index_.end()) { + RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + std::to_string(image_id) + + " in annotation node is not found in image node in json file."); + } file_name = itr_file->second; switch (task_type_) { case TaskType::Detection: @@ -426,7 +447,7 @@ Status CocoOp::ParseAnnotationIds() { RETURN_IF_NOT_OK(PanopticColumnLoad(annotation, file_name, image_id)); break; default: - RETURN_STATUS_UNEXPECTED("Invalid task type"); + RETURN_STATUS_UNEXPECTED("Invalid parameter, task type shoule be Detection, Stuff, Keypoint or Panoptic."); } } for (auto img : image_que) { @@ -438,7 +459,7 @@ Status CocoOp::ParseAnnotationIds() { Status CocoOp::ImageColumnLoad(const nlohmann::json &image_tree, std::vector *image_vec) { if (image_tree.size() == 0) { - RETURN_STATUS_UNEXPECTED("No images found in " + annotation_path_); + RETURN_STATUS_UNEXPECTED("Invalid data, no \"image\" node found in json file: " + annotation_path_); } for (auto img : image_tree) { std::string file_name; @@ -461,7 +482,8 @@ Status CocoOp::DetectionColumnLoad(const nlohmann::json &annotation_tree, const RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoCategoryId), &category_id)); auto search_category = category_set_.find(category_id); if (search_category == category_set_.end()) - RETURN_STATUS_UNEXPECTED("category_id can't find in categories where category_id: " + std::to_string(category_id)); + RETURN_STATUS_UNEXPECTED("Invalid data, category_id can't find in categories where category_id: " + + std::to_string(category_id)); auto node_iscrowd = annotation_tree.find(kJsonAnnoIscrowd); if (node_iscrowd != annotation_tree.end()) iscrowd = *node_iscrowd; bbox.insert(bbox.end(), node_bbox.begin(), node_bbox.end()); @@ -498,11 +520,12 @@ Status CocoOp::KeypointColumnLoad(const nlohmann::json &annotation_tree, const s const int32_t &unique_id) { auto itr_num_keypoint = annotation_tree.find(kJsonAnnoNumKeypoints); if (itr_num_keypoint == annotation_tree.end()) - RETURN_STATUS_UNEXPECTED("No num_keypoint found in annotations where id: " + std::to_string(unique_id)); + RETURN_STATUS_UNEXPECTED("Invalid data, no num_keypoint found in annotations where id: " + + std::to_string(unique_id)); simple_item_map_[image_file].push_back(*itr_num_keypoint); auto itr_keypoint = annotation_tree.find(kJsonAnnoKeypoints); if (itr_keypoint == annotation_tree.end()) - RETURN_STATUS_UNEXPECTED("No keypoint found in annotations where id: " + std::to_string(unique_id)); + RETURN_STATUS_UNEXPECTED("Invalid data, no keypoint found in annotations where id: " + std::to_string(unique_id)); coordinate_map_[image_file].push_back(*itr_keypoint); return Status::OK(); } @@ -511,27 +534,31 @@ Status CocoOp::PanopticColumnLoad(const nlohmann::json &annotation_tree, const s const int32_t &image_id) { auto itr_segments = annotation_tree.find(kJsonAnnoSegmentsInfo); if (itr_segments == annotation_tree.end()) - RETURN_STATUS_UNEXPECTED("No segments_info found in annotations where image_id: " + std::to_string(image_id)); + RETURN_STATUS_UNEXPECTED("Invalid data, no segments_info found in annotations where image_id: " + + std::to_string(image_id)); for (auto info : *itr_segments) { std::vector bbox; uint32_t category_id = 0; auto itr_bbox = info.find(kJsonAnnoBbox); if (itr_bbox == info.end()) - RETURN_STATUS_UNEXPECTED("No bbox found in segments_info where image_id: " + std::to_string(image_id)); + RETURN_STATUS_UNEXPECTED("Invalid data, no bbox found in segments_info where image_id: " + + std::to_string(image_id)); bbox.insert(bbox.end(), itr_bbox->begin(), itr_bbox->end()); coordinate_map_[image_file].push_back(bbox); RETURN_IF_NOT_OK(SearchNodeInJson(info, std::string(kJsonAnnoCategoryId), &category_id)); auto search_category = category_set_.find(category_id); if (search_category == category_set_.end()) - RETURN_STATUS_UNEXPECTED("category_id can't find in categories where category_id: " + + RETURN_STATUS_UNEXPECTED("Invalid data, category_id can't find in categories where category_id: " + std::to_string(category_id)); auto itr_iscrowd = info.find(kJsonAnnoIscrowd); if (itr_iscrowd == info.end()) - RETURN_STATUS_UNEXPECTED("No iscrowd found in segments_info where image_id: " + std::to_string(image_id)); + RETURN_STATUS_UNEXPECTED("Invalid data, no iscrowd found in segments_info where image_id: " + + std::to_string(image_id)); auto itr_area = info.find(kJsonAnnoArea); if (itr_area == info.end()) - RETURN_STATUS_UNEXPECTED("No area found in segments_info where image_id: " + std::to_string(image_id)); + RETURN_STATUS_UNEXPECTED("Invalid data, no area found in segments_info where image_id: " + + std::to_string(image_id)); simple_item_map_[image_file].push_back(category_id); simple_item_map_[image_file].push_back(*itr_iscrowd); simple_item_map_[image_file].push_back(*itr_area); @@ -540,26 +567,31 @@ Status CocoOp::PanopticColumnLoad(const nlohmann::json &annotation_tree, const s } Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) { - if (categories_tree.size() == 0) RETURN_STATUS_UNEXPECTED("No categories found in " + annotation_path_); + if (categories_tree.size() == 0) { + RETURN_STATUS_UNEXPECTED("Invalid file, no categories found in annotation_path: " + annotation_path_); + } for (auto category : categories_tree) { int32_t id = 0; std::string name; std::vector label_info; auto itr_id = category.find(kJsonId); - if (itr_id == category.end()) RETURN_STATUS_UNEXPECTED("No id found in categories of " + annotation_path_); + if (itr_id == category.end()) { + RETURN_STATUS_UNEXPECTED("Invalid data, no json id found in categories of " + annotation_path_); + } id = *itr_id; label_info.push_back(id); category_set_.insert(id); auto itr_name = category.find(kJsonCategoriesName); - CHECK_FAIL_RETURN_UNEXPECTED(itr_name != category.end(), - "No name found in categories where id: " + std::to_string(id)); + CHECK_FAIL_RETURN_UNEXPECTED( + itr_name != category.end(), + "Invalid data, no categories name found in categories where id: " + std::to_string(id)); name = *itr_name; if (task_type_ == TaskType::Panoptic) { auto itr_isthing = category.find(kJsonCategoriesIsthing); CHECK_FAIL_RETURN_UNEXPECTED(itr_isthing != category.end(), - "No isthing found in categories of " + annotation_path_); + "Invalid data, no isthing found in categories of " + annotation_path_); label_info.push_back(*itr_isthing); } label_index_.emplace_back(std::make_pair(name, label_info)); @@ -574,7 +606,7 @@ Status CocoOp::InitSampler() { Status CocoOp::LaunchThreadsAndInitOp() { if (tree_ == nullptr) { - RETURN_STATUS_UNEXPECTED("tree_ not set"); + RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); } RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); @@ -590,7 +622,7 @@ Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &c if (decode_ == true) { Status rc = Decode(*tensor, tensor); - CHECK_FAIL_RETURN_UNEXPECTED(rc.IsOk(), "fail to decode file: " + path); + CHECK_FAIL_RETURN_UNEXPECTED(rc.IsOk(), "Invalid data, failed to decode image: " + path); } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc index 14f4042f4d9..67dd7cacf54 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc @@ -37,8 +37,13 @@ CsvOp::Builder::Builder() Status CsvOp::Builder::ValidateInputs() const { std::string err; - err += builder_num_workers_ <= 0 ? "Number of parallel workers should be greater than 0\n" : ""; - err += (builder_device_id_ >= builder_num_devices_ || builder_num_devices_ < 1) ? "Wrong sharding configs\n" : ""; + err += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; + err += (builder_device_id_ >= builder_num_devices_ || builder_num_devices_ < 1) + ? "Invalid parameter, num_shard must be greater than shard_id and greater than 0, got num_shard: " + + std::to_string(builder_num_devices_) + ", shard_id: " + std::to_string(builder_device_id_) + ".\n" + : ""; return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err); } @@ -501,16 +506,17 @@ Status CsvOp::LoadFile(const std::string &file, const int64_t start_offset, cons // int to receive its return value. int chr = ifs.get(); if (csv_parser.ProcessMessage(chr) != 0) { - RETURN_STATUS_UNEXPECTED("Failed to parse file " + file + ":" + std::to_string(csv_parser.GetTotalRows() + 1) + - ". error message: " + csv_parser.GetErrorMessage()); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse file: " + file + ":" + + std::to_string(csv_parser.GetTotalRows() + 1) + + ". Error message: " + csv_parser.GetErrorMessage()); } } } catch (std::invalid_argument &ia) { std::string err_row = std::to_string(csv_parser.GetTotalRows() + 1); - RETURN_STATUS_UNEXPECTED(file + ":" + err_row + ", type does not match"); + RETURN_STATUS_UNEXPECTED("Invalid data, " + file + ":" + err_row + ", type does not match."); } catch (std::out_of_range &oor) { std::string err_row = std::to_string(csv_parser.GetTotalRows() + 1); - RETURN_STATUS_UNEXPECTED(file + ":" + err_row + ", out of range"); + RETURN_STATUS_UNEXPECTED("Invalid data, " + file + ":" + err_row + ", out of range."); } return Status::OK(); } @@ -771,7 +777,7 @@ Status CsvOp::CalculateNumRowsPerShard() { } if (all_num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API CsvDataset. Please check file path or CSV format " + "Invalid data, no valid data matching the dataset API CsvDataset. Please check file path or CSV format " "validation first."); } @@ -849,7 +855,7 @@ Status CsvOp::ComputeColMap() { if (column_name_id_map_.find(col_names[i]) == column_name_id_map_.end()) { column_name_id_map_[col_names[i]] = i; } else { - RETURN_STATUS_UNEXPECTED("Duplicate column names are not allowed"); + RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column names are not allowed: " + col_names[i]); } } } else { @@ -857,7 +863,8 @@ Status CsvOp::ComputeColMap() { if (column_name_id_map_.find(column_name_list_[i]) == column_name_id_map_.end()) { column_name_id_map_[column_name_list_[i]] = i; } else { - RETURN_STATUS_UNEXPECTED("Duplicate column names are not allowed"); + RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column names are not allowed: " + + column_name_list_[i]); } } } @@ -870,7 +877,10 @@ Status CsvOp::ComputeColMap() { } } if (column_default_list_.size() != column_name_id_map_.size()) { - RETURN_STATUS_UNEXPECTED("The number of column names does not match the column defaults"); + RETURN_STATUS_UNEXPECTED( + "Invalid parameter, the number of column names does not match the column defaults, column_default_list: " + + std::to_string(column_default_list_.size()) + + ", column_name_id_map: " + std::to_string(column_name_id_map_.size())); } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc index 4bd3fd1c551..0dee92730f3 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc @@ -112,26 +112,29 @@ Status GeneratorOp::Init() { Status GeneratorOp::PyRowToTensorRow(py::object py_data, TensorRow *tensor_row) { if (!py::isinstance(py_data)) { - return Status(StatusCode::kPyFuncException, __LINE__, __FILE__, "Generator should return a tuple of numpy arrays."); + return Status(StatusCode::kPyFuncException, __LINE__, __FILE__, + "Invalid parameter, Generator should return a tuple of numpy arrays."); } py::tuple py_row = py_data.cast(); // Check if returned number of columns matches with column names if (py_row.size() != column_names_.size()) { - return Status(StatusCode::kPyFuncException, __LINE__, __FILE__, - "Generator should return same number of numpy arrays as specified in column names."); + return Status( + StatusCode::kPyFuncException, __LINE__, __FILE__, + "Invalid parameter, Generator should return same number of numpy arrays as specified in column names."); } // Iterate over two containers simultaneously for memory copy for (int i = 0; i < py_row.size(); ++i) { py::object ret_py_ele = py_row[i]; if (!py::isinstance(ret_py_ele)) { return Status(StatusCode::kPyFuncException, __LINE__, __FILE__, - "Generator should return a tuple of numpy arrays."); + "Invalid parameter, Generator should return a tuple of numpy arrays."); } std::shared_ptr tensor; RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_ele.cast(), &tensor)); if ((!column_types_.empty()) && (column_types_[i] != DataType::DE_UNKNOWN) && (column_types_[i] != tensor->type())) { - return Status(StatusCode::kPyFuncException, __LINE__, __FILE__, "Generator type check failed."); + return Status(StatusCode::kPyFuncException, __LINE__, __FILE__, + "Invalid parameter, input column type is not same with output tensor type."); } tensor_row->push_back(tensor); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc index 16168cbce9f..32fa5eceebe 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc @@ -56,8 +56,12 @@ Status ImageFolderOp::Builder::Build(std::shared_ptr *ptr) { Status ImageFolderOp::Builder::SanityCheck() { Path dir(builder_dir_); std::string err_msg; - err_msg += dir.IsDirectory() == false ? "ImageFolder path is invalid or not set\n" : ""; - err_msg += builder_num_workers_ <= 0 ? "Num of parallel workers is set to 0\n" : ""; + err_msg += dir.IsDirectory() == false + ? "Invalid parameter, ImageFolder path is invalid or not set, path: " + builder_dir_ + ".\n" + : ""; + err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } @@ -113,7 +117,7 @@ Status ImageFolderOp::PrescanMasterEntry(const std::string &filedir) { num_rows_ = image_label_pairs_.size(); if (num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API ImageFolderDataset. Please check file path or dataset " + "Invalid data, no valid data matching the dataset API ImageFolderDataset. Please check file path or dataset " "API validation first."); } // free memory of two queues used for pre-scan @@ -207,7 +211,7 @@ Status ImageFolderOp::LoadTensorRow(row_id_type row_id, ImageLabelPair pairPtr, if (decode_ == true) { Status rc = Decode(image, &image); if (rc.IsError()) { - std::string err = "Fail to decode image:" + folder_path_ + (pairPtr->first); + std::string err = "Invalid data, failed to decode image: " + folder_path_ + (pairPtr->first); RETURN_STATUS_UNEXPECTED(err); } } @@ -258,7 +262,13 @@ Status ImageFolderOp::InitSampler() { // Derived from RandomAccessOp Status ImageFolderOp::GetClassIds(std::map> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { - RETURN_STATUS_UNEXPECTED("ImageLabelPair not set"); + if (image_label_pairs_.empty()) { + RETURN_STATUS_UNEXPECTED("No images found in dataset, please check if Op read images successfully or not."); + } else { + RETURN_STATUS_UNEXPECTED( + "Map for storaging image-index pair is nullptr or has been set in other place," + "it must be empty before using GetClassIds."); + } } for (size_t i = 0; i < image_label_pairs_.size(); ++i) { (*cls_ids)[image_label_pairs_[i]->second].push_back(i); @@ -286,7 +296,7 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) { Path folder(folder_path_ + folder_name); std::shared_ptr dirItr = Path::DirIterator::OpenDirectory(&folder); if (folder.Exists() == false || dirItr == nullptr) { - RETURN_STATUS_UNEXPECTED("Error unable to open: " + folder_name); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_name); } std::set imgs; // use this for ordering while (dirItr->hasNext()) { @@ -335,7 +345,7 @@ Status ImageFolderOp::startAsyncWalk() { TaskManager::FindMe()->Post(); Path dir(folder_path_); if (dir.Exists() == false || dir.IsDirectory() == false) { - RETURN_STATUS_UNEXPECTED("Error unable to open: " + folder_path_); + RETURN_STATUS_UNEXPECTED("Invalid parameter, failed to open image folder: " + folder_path_); } dirname_offset_ = folder_path_.length(); RETURN_IF_NOT_OK(RecursiveWalkFolder(&dir)); @@ -348,7 +358,9 @@ Status ImageFolderOp::startAsyncWalk() { } Status ImageFolderOp::LaunchThreadsAndInitOp() { - RETURN_UNEXPECTED_IF_NULL(tree_); + if (tree_ == nullptr) { + RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); + } // Registers QueueList and individual Queues for interrupt services RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(folder_name_queue_->Register(tree_->AllTasks())); @@ -375,9 +387,15 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se Path dir(path); std::string err_msg = ""; int64_t row_cnt = 0; - err_msg += (dir.Exists() == false || dir.IsDirectory() == false) ? "unable to open dir " + path : ""; - err_msg += (num_classes == nullptr || num_rows == nullptr) ? "num_class/num_rows is null\n" : ""; - err_msg += (dev_id >= num_dev || num_dev <= 0) ? "invalid sharding config\n" : ""; + err_msg += (dir.Exists() == false || dir.IsDirectory() == false) + ? "Invalid parameter, image folde path is invalid or not set, path: " + path + : ""; + err_msg += + (num_classes == nullptr || num_rows == nullptr) ? "Invalid parameter, num_class or num_rows cannot be null.\n" : ""; + err_msg += (dev_id >= num_dev || num_dev <= 0) + ? "Invalid parameter, num_shard must be greater than shard_id and greater than 0, got num_shard: " + + std::to_string(num_dev) + ", shard_id: " + std::to_string(dev_id) + ".\n" + : ""; if (err_msg.empty() == false) { RETURN_STATUS_UNEXPECTED(err_msg); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc index cd3fa6426ce..0f36186e7b5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc @@ -57,8 +57,10 @@ Status ManifestOp::Builder::Build(std::shared_ptr *ptr) { Status ManifestOp::Builder::SanityCheck() { std::string err_msg; - err_msg += builder_file_.empty() ? "Manifest file is not set\n" : ""; - err_msg += builder_num_workers_ <= 0 ? "Num of parallel workers smaller than 1\n" : ""; + err_msg += builder_file_.empty() ? "Invalid parameter, Manifest file is not set.\n" : ""; + err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } @@ -135,7 +137,7 @@ Status ManifestOp::AddIoBlock(std::unique_ptr *sampler_buffer) { Status ManifestOp::LaunchThreadsAndInitOp() { if (tree_ == nullptr) { - RETURN_STATUS_UNEXPECTED("tree_ not set"); + RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); } RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); @@ -175,7 +177,7 @@ Status ManifestOp::WorkerEntry(int32_t worker_id) { } RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&io_block)); } - RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker"); + RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker."); } // Load 1 TensorRow (image,label) using 1 ImageLabelPair. 1 function call produces 1 TensorTow in a DataBuffer @@ -197,7 +199,7 @@ Status ManifestOp::LoadTensorRow(row_id_type row_id, const std::pair> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty() || image_labelname_.empty()) { - RETURN_STATUS_UNEXPECTED("Class indexing is invalid."); + if (image_labelname_.empty()) { + RETURN_STATUS_UNEXPECTED("No image found in dataset, please check if Op read images successfully or not."); + } else { + RETURN_STATUS_UNEXPECTED( + "Map for storaging image-index pair is nullptr or has been set in other place," + "it must be empty before using GetClassIds."); + } } for (size_t i = 0; i < image_labelname_.size(); i++) { @@ -272,7 +280,7 @@ Status ManifestOp::GetClassIds(std::map> *cls_ids) Status ManifestOp::ParseManifestFile() { std::ifstream file_handle(file_); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Manifest file " + file_ + " can not open."); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Manifest file: " + file_); } std::string line; while (getline(file_handle, line)) { @@ -297,7 +305,7 @@ Status ManifestOp::ParseManifestFile() { std::string label_name = annotation.value("name", ""); if (label_name == "") { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Label name is not found in manifest file for " + image_file_path); + RETURN_STATUS_UNEXPECTED("Invalid data, label name is not found in Manifest file: " + image_file_path); } if (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) { if (label_index_.find(label_name) == label_index_.end()) { @@ -311,7 +319,7 @@ Status ManifestOp::ParseManifestFile() { } } catch (const std::exception &err) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Parse manifest file failed"); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse manifest file: " + line); } } file_handle.close(); @@ -326,14 +334,14 @@ Status ManifestOp::CheckImageType(const std::string &file_name, bool *valid) { *valid = false; file_handle.open(file_name, std::ios::binary | std::ios::in); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Can not open image file " + file_name); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open image file: " + file_name); } unsigned char file_type[read_num]; (void)file_handle.read(reinterpret_cast(file_type), read_num); if (file_handle.fail()) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Read image file failed " + file_name); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name); } file_handle.close(); if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) { @@ -364,7 +372,7 @@ Status ManifestOp::CountDatasetInfo() { num_rows_ = static_cast(image_labelname_.size()); if (num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API ManifestDataset.Please check file path or dataset API " + "Invalid data, no valid data matching the dataset API ManifestDataset.Please check file path or dataset API " "validation first."); } return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc index d64c28f7294..15fe89b69b5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc @@ -63,7 +63,7 @@ Status MindRecordOp::Builder::Build(std::shared_ptr *ptr) { if (build_dataset_file_.empty()) { return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "Building a MindRecordOp that has not provided a file."); + "Invalid file, MindRecord path is invalid or not set."); } mindrecord::json sample_json; if (build_num_padded_ > 0) { @@ -138,13 +138,12 @@ Status MindRecordOp::Init() { auto rc = shard_reader_->Open(dataset_file_, load_dataset_, num_mind_record_workers_, columns_to_load_, operators_, num_padded_); - CHECK_FAIL_RETURN_UNEXPECTED(rc == MSRStatus::SUCCESS, - "MindRecordOp init failed. Error message: " + ErrnoToMessage(rc)); + CHECK_FAIL_RETURN_UNEXPECTED(rc == MSRStatus::SUCCESS, "MindRecordOp init failed, " + ErrnoToMessage(rc)); data_schema_ = std::make_unique(); std::vector col_names = shard_reader_->GetShardColumn()->GetColumnName(); - CHECK_FAIL_RETURN_UNEXPECTED(!col_names.empty(), "No schema found"); + CHECK_FAIL_RETURN_UNEXPECTED(!col_names.empty(), "Invalid data, no column names are specified."); std::vector col_data_types = shard_reader_->GetShardColumn()->GeColumnDataType(); std::vector> col_shapes = shard_reader_->GetShardColumn()->GetColumnShape(); @@ -183,7 +182,8 @@ Status MindRecordOp::Init() { if (!load_all_cols) { std::unique_ptr tmp_schema = std::make_unique(); for (std::string colname : columns_to_load_) { - CHECK_FAIL_RETURN_UNEXPECTED(colname_to_ind.find(colname) != colname_to_ind.end(), colname + ": doesn't exist"); + CHECK_FAIL_RETURN_UNEXPECTED(colname_to_ind.find(colname) != colname_to_ind.end(), + "Invalid parameter, column name: " + colname + " does not exist."); RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->column(colname_to_ind[colname]))); } data_schema_ = std::move(tmp_schema); @@ -257,7 +257,7 @@ Status MindRecordOp::WorkerEntry(int32_t worker_id) { RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(fetched_buffer))); RETURN_IF_NOT_OK(io_blk_queues_[worker_id]->PopFront(&io_block)); } - RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker"); + RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker."); } Status MindRecordOp::GetBufferFromReader(std::unique_ptr *fetched_buffer, int64_t buffer_id, @@ -310,23 +310,23 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vectorGetColumnTypeByName(column_name, &column_data_type, &column_data_type_size, &column_shape); if (rc.first != MSRStatus::SUCCESS) { - RETURN_STATUS_UNEXPECTED("Failed to retrieve data type."); + RETURN_STATUS_UNEXPECTED("Invalid parameter, column_name: " + column_name + "does not exist in dataset."); } if (rc.second == mindrecord::ColumnInRaw) { auto has_column = shard_column->GetColumnFromJson(column_name, sample_json_, &data_ptr, &n_bytes); if (has_column == MSRStatus::FAILED) { - RETURN_STATUS_UNEXPECTED("Failed to retrieve raw data from padding sample."); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to retrieve raw data from padding sample."); } } else if (rc.second == mindrecord::ColumnInBlob) { if (sample_bytes_.find(column_name) == sample_bytes_.end()) { - RETURN_STATUS_UNEXPECTED("Failed to retrieve blob data from padding sample."); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to retrieve blob data from padding sample."); } std::string ss(sample_bytes_[column_name]); n_bytes = ss.size(); data_ptr = std::make_unique(n_bytes); std::copy(ss.begin(), ss.end(), data_ptr.get()); } else { - RETURN_STATUS_UNEXPECTED("Retrieved data type is unknown."); + RETURN_STATUS_UNEXPECTED("Invalid data, retrieved data type is unknown."); } if (data == nullptr) { data = reinterpret_cast(data_ptr.get()); @@ -336,7 +336,7 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vectorGetColumnValueByName(column_name, columns_blob, columns_json, &data, &data_ptr, &n_bytes, &column_data_type, &column_data_type_size, &column_shape); if (has_column == MSRStatus::FAILED) { - RETURN_STATUS_UNEXPECTED("Failed to retrieve data from mindrecord reader."); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to retrieve data from mindrecord reader."); } } @@ -416,7 +416,7 @@ Status MindRecordOp::Reset() { Status MindRecordOp::LaunchThreadAndInitOp() { if (tree_ == nullptr) { - RETURN_STATUS_UNEXPECTED("tree_ not set"); + RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); } RETURN_IF_NOT_OK(io_blk_queues_.Register(tree_->AllTasks())); @@ -436,7 +436,7 @@ Status MindRecordOp::CountTotalRows(const std::vector dataset_path, std::unique_ptr shard_reader = std::make_unique(); MSRStatus rc = shard_reader->CountTotalRows(dataset_path, load_dataset, op, count, num_padded); if (rc == MSRStatus::FAILED) { - RETURN_STATUS_UNEXPECTED("MindRecordOp count total rows failed."); + RETURN_STATUS_UNEXPECTED("Invalid data, MindRecordOp failed to count total rows."); } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc index cc297f07395..24d8635eb5a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc @@ -62,9 +62,15 @@ Status MnistOp::Builder::SanityCheck() { const std::set valid = {"test", "train", "all", ""}; Path dir(builder_dir_); std::string err_msg; - err_msg += dir.IsDirectory() == false ? "MNIST path is invalid or not set\n" : ""; - err_msg += builder_num_workers_ <= 0 ? "Number of parallel workers is set to 0 or negative\n" : ""; - err_msg += valid.find(builder_usage_) == valid.end() ? "usage needs to be 'train','test' or 'all'\n" : ""; + err_msg += dir.IsDirectory() == false + ? "Invalid parameter, MNIST path is invalid or not set, path: " + builder_dir_ + ".\n" + : ""; + err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; + err_msg += valid.find(builder_usage_) == valid.end() + ? "Invalid parameter, usage must be 'train','test' or 'all', but got " + builder_usage_ + ".\n" + : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } @@ -106,7 +112,8 @@ Status MnistOp::operator()() { std::shared_ptr sample_ids; RETURN_IF_NOT_OK(sampler_buffer->GetTensor(&sample_ids, 0, 0)); if (sample_ids->type() != DataType(DataType::DE_INT64)) { - RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't UINT64"); + RETURN_STATUS_UNEXPECTED("Invalid parameter, data type of Sampler Tensor isn't int64, got " + + sample_ids->type().ToString()); } RETURN_IF_NOT_OK(TraversalSampleIds(sample_ids, &keys)); RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); @@ -159,7 +166,7 @@ Status MnistOp::WorkerEntry(int32_t worker_id) { } RETURN_IF_NOT_OK(io_block_queues_[worker_id]->PopFront(&iOBlock)); } - RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker"); + RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker."); } // Load 1 TensorRow (image,label) using 1 MnistLabelPair. @@ -216,7 +223,13 @@ Status MnistOp::InitSampler() { // Derived from RandomAccessOp Status MnistOp::GetClassIds(std::map> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { - RETURN_STATUS_UNEXPECTED("ImageLabelPair not set"); + if (image_label_pairs_.empty()) { + RETURN_STATUS_UNEXPECTED("No image found in dataset, please check if Op read images successfully or not."); + } else { + RETURN_STATUS_UNEXPECTED( + "Map for storaging image-index pair is nullptr or has been set in other place," + "it must be empty before using GetClassIds."); + } } for (size_t i = 0; i < image_label_pairs_.size(); ++i) { (*cls_ids)[image_label_pairs_[i].second].push_back(i); @@ -230,7 +243,7 @@ Status MnistOp::GetClassIds(std::map> *cls_ids) co Status MnistOp::ReadFromReader(std::ifstream *reader, uint32_t *result) { uint32_t res = 0; reader->read(reinterpret_cast(&res), 4); - CHECK_FAIL_RETURN_UNEXPECTED(!reader->fail(), "Failed to read 4 bytes from file"); + CHECK_FAIL_RETURN_UNEXPECTED(!reader->fail(), "Invalid data, failed to read 4 bytes from file."); *result = SwapEndian(res); return Status::OK(); } @@ -241,16 +254,16 @@ uint32_t MnistOp::SwapEndian(uint32_t val) const { } Status MnistOp::CheckImage(const std::string &file_name, std::ifstream *image_reader, uint32_t *num_images) { - CHECK_FAIL_RETURN_UNEXPECTED(image_reader->is_open(), "Cannot open mnist image file: " + file_name); + CHECK_FAIL_RETURN_UNEXPECTED(image_reader->is_open(), "Invalid file, failed to open mnist image file: " + file_name); int64_t image_len = image_reader->seekg(0, std::ios::end).tellg(); (void)image_reader->seekg(0, std::ios::beg); // The first 16 bytes of the image file are type, number, row and column - CHECK_FAIL_RETURN_UNEXPECTED(image_len >= 16, "Mnist file is corrupted."); + CHECK_FAIL_RETURN_UNEXPECTED(image_len >= 16, "Invalid file, Mnist file is corrupted: " + file_name); uint32_t magic_number; RETURN_IF_NOT_OK(ReadFromReader(image_reader, &magic_number)); CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kMnistImageFileMagicNumber, - "This is not the mnist image file: " + file_name); + "Invalid file, this is not the mnist image file: " + file_name); uint32_t num_items; RETURN_IF_NOT_OK(ReadFromReader(image_reader, &num_items)); @@ -259,25 +272,28 @@ Status MnistOp::CheckImage(const std::string &file_name, std::ifstream *image_re uint32_t cols; RETURN_IF_NOT_OK(ReadFromReader(image_reader, &cols)); // The image size of the Mnist dataset is fixed at [28,28] - CHECK_FAIL_RETURN_UNEXPECTED((rows == kMnistImageRows) && (cols == kMnistImageCols), "Wrong shape of image."); - CHECK_FAIL_RETURN_UNEXPECTED((image_len - 16) == num_items * rows * cols, "Wrong number of image."); + CHECK_FAIL_RETURN_UNEXPECTED((rows == kMnistImageRows) && (cols == kMnistImageCols), + "Invalid data, shape of image is not equal to (28, 28)."); + CHECK_FAIL_RETURN_UNEXPECTED((image_len - 16) == num_items * rows * cols, + "Invalid data, got truncated data len: " + std::to_string(image_len - 16) + + ", which is not equal to real data len: " + std::to_string(num_items * rows * cols)); *num_images = num_items; return Status::OK(); } Status MnistOp::CheckLabel(const std::string &file_name, std::ifstream *label_reader, uint32_t *num_labels) { - CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(), "Cannot open mnist label file: " + file_name); + CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(), "Invalid file, failed to open mnist label file: " + file_name); int64_t label_len = label_reader->seekg(0, std::ios::end).tellg(); (void)label_reader->seekg(0, std::ios::beg); // The first 8 bytes of the image file are type and number - CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 8, "Mnist file is corrupted."); + CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 8, "Invalid file, Mnist file is corrupted: " + file_name); uint32_t magic_number; RETURN_IF_NOT_OK(ReadFromReader(label_reader, &magic_number)); CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kMnistLabelFileMagicNumber, - "This is not the mnist label file: " + file_name); + "Invalid file, this is not the mnist label file: " + file_name); uint32_t num_items; RETURN_IF_NOT_OK(ReadFromReader(label_reader, &num_items)); - CHECK_FAIL_RETURN_UNEXPECTED((label_len - 8) == num_items, "Wrong number of labels!"); + CHECK_FAIL_RETURN_UNEXPECTED((label_len - 8) == num_items, "Invalid data, number of labels is wrong."); *num_labels = num_items; return Status::OK(); } @@ -286,23 +302,25 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la uint32_t num_images, num_labels; RETURN_IF_NOT_OK(CheckImage(image_names_[index], image_reader, &num_images)); RETURN_IF_NOT_OK(CheckLabel(label_names_[index], label_reader, &num_labels)); - CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), "num_images != num_labels"); + CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), "Invalid data, num_images is not equal to num_labels."); // The image size of the Mnist dataset is fixed at [28,28] int64_t size = kMnistImageRows * kMnistImageCols; auto images_buf = std::make_unique(size * num_images); auto labels_buf = std::make_unique(num_images); if (images_buf == nullptr || labels_buf == nullptr) { - std::string err_msg = "Fail to allocate memory for MNIST Buffer."; + std::string err_msg = "Failed to allocate memory for MNIST buffer."; MS_LOG(ERROR) << err_msg.c_str(); RETURN_STATUS_UNEXPECTED(err_msg); } (void)image_reader->read(images_buf.get(), size * num_images); if (image_reader->fail()) { - RETURN_STATUS_UNEXPECTED("Fail to read:" + image_names_[index] + " size:" + std::to_string(size * num_images)); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to read image: " + image_names_[index] + + ", size:" + std::to_string(size * num_images)); } (void)label_reader->read(labels_buf.get(), num_images); if (label_reader->fail()) { - RETURN_STATUS_UNEXPECTED("Fail to read:" + label_names_[index] + " size: " + std::to_string(num_images)); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to read label:" + label_names_[index] + + ", size: " + std::to_string(num_images)); } TensorShape img_tensor_shape = TensorShape({kMnistImageRows, kMnistImageCols, 1}); for (int64_t j = 0; j != num_images; ++j) { @@ -337,7 +355,7 @@ Status MnistOp::ParseMnistData() { num_rows_ = image_label_pairs_.size(); if (num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API MnistDataset.Please check file path or dataset API " + "Invalid data, no valid data matching the dataset API MnistDataset.Please check file path or dataset API " "validation first."); } return Status::OK(); @@ -372,14 +390,15 @@ Status MnistOp::WalkAllFiles() { std::sort(image_names_.begin(), image_names_.end()); std::sort(label_names_.begin(), label_names_.end()); - CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(), "num of idx3 files != num of idx1 files"); + CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(), + "Invalid data, num of images does not equal to num of labels."); return Status::OK(); } Status MnistOp::LaunchThreadsAndInitOp() { if (tree_ == nullptr) { - RETURN_STATUS_UNEXPECTED("tree_ not set"); + RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); } RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); @@ -409,7 +428,8 @@ Status MnistOp::CountTotalRows(const std::string &dir, const std::string &usage, RETURN_IF_NOT_OK(op->CheckImage(op->image_names_[i], &image_reader, &num_images)); uint32_t num_labels; RETURN_IF_NOT_OK(op->CheckLabel(op->label_names_[i], &label_reader, &num_labels)); - CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), "num of images does not equal to num of labels"); + CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), + "Invalid data, num of images is not equal to num of labels."); *count = *count + num_images; // Close the readers diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc index 407cb0ac22f..4e63ce987d9 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc @@ -41,10 +41,14 @@ Status DistributedSampler::InitSampler() { if (num_samples_ == 0 || num_samples_ > num_rows_) { num_samples_ = num_rows_; } - CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0, "num_samples <= 0\n"); - CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "num_rows <= 0\n"); - CHECK_FAIL_RETURN_UNEXPECTED(device_id_ < num_devices_ && device_id_ >= 0 && num_rows_ > 0 && num_samples_ > 0, - "fail to init DistributedSampler"); + CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0, "Invalid parameter, num_samples must be greater than 0, but got " + + std::to_string(num_samples_) + ".\n"); + CHECK_FAIL_RETURN_UNEXPECTED( + num_rows_ > 0, "Invalid parameter, num_rows must be greater than 0" + std::to_string(num_rows_) + ".\n"); + CHECK_FAIL_RETURN_UNEXPECTED( + device_id_ < num_devices_ && device_id_ >= 0 && num_rows_ > 0 && num_samples_ > 0, + "Invalid parameter, num_shard must be greater than shard_id and greater than 0, got num_shard: " + + std::to_string(num_devices_) + ", shard_id: " + std::to_string(device_id_) + ".\n"); rnd_.seed(seed_++); if (offset_ != -1 || !even_dist_) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc index 770c24c8c57..9ec0ef2aad4 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc @@ -55,13 +55,15 @@ Status PKSampler::InitSampler() { } else { std::sort(labels_.begin(), labels_.end()); } - CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0, "num_class or K (num samples per class) is not positive"); + CHECK_FAIL_RETURN_UNEXPECTED( + num_samples_ > 0, "Invalid parameter, num_class or K (num samples per class) must be greater than 0, but got " + + std::to_string(num_samples_)); return Status::OK(); } Status PKSampler::GetNextSample(std::unique_ptr *out_buffer) { if (next_id_ > num_samples_ || num_samples_ == 0) { - RETURN_STATUS_UNEXPECTED("Index out of bound in PKSampler"); + RETURN_STATUS_UNEXPECTED("Index must be less than or equal to num_samples, but got: " + std::to_string(next_id_)); } else if (next_id_ == num_samples_) { (*out_buffer) = std::make_unique(0, DataBuffer::kDeBFlagEOE); } else { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc index 998dee2a070..29620841f16 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc @@ -74,7 +74,10 @@ Status RandomSampler::InitSampler() { if (num_samples_ == 0 || num_samples_ > num_rows_) { num_samples_ = num_rows_; } - CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0 && num_rows_ > 0, "both num_samples & num_rows need to be positive"); + CHECK_FAIL_RETURN_UNEXPECTED( + num_samples_ > 0 && num_rows_ > 0, + "Invalid parameter, num_samples & num_rows must be greater than 0, but got num_samples: " + + std::to_string(num_samples_) + ", num_rows: " + std::to_string(num_rows_)); samples_per_buffer_ = samples_per_buffer_ > num_samples_ ? num_samples_ : samples_per_buffer_; rnd_.seed(seed_); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc index f13e8122c80..4498455e4fe 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc @@ -66,7 +66,7 @@ Status Sampler::HandshakeRandomAccessOp(const RandomAccessOp *op) { Status Sampler::CreateSamplerTensor(std::shared_ptr *sample_ids, int64_t num_elements) { if (num_elements == 0) { - RETURN_STATUS_UNEXPECTED("num of Elements is 0"); + RETURN_STATUS_UNEXPECTED("Invalid data, num of elements cannot be 0."); } if (col_desc_ == nullptr) { // a ColDescriptor for Tensor that holds SampleIds @@ -124,13 +124,13 @@ Status Sampler::GetAllIdsThenReset(py::array *data) { #endif Status Sampler::SetNumSamples(int64_t num_samples) { - CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "num_samples is negative"); + CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "Invalid parameter, num_samples must be greater than or equal to 0."); num_samples_ = num_samples; return Status::OK(); } Status Sampler::SetNumRowsInDataset(int64_t num_rows) { - CHECK_FAIL_RETURN_UNEXPECTED(num_rows > 0, "num_rows is negative or 0"); + CHECK_FAIL_RETURN_UNEXPECTED(num_rows > 0, "Invalid parameter, num_rows must be greater than 0."); num_rows_ = num_rows; return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc index 1cc4ac831a4..ced0ed1eeae 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc @@ -63,16 +63,24 @@ Status SequentialSampler::GetNextSample(std::unique_ptr *out_buffer) } Status SequentialSampler::InitSampler() { - CHECK_FAIL_RETURN_UNEXPECTED(start_index_ >= 0, "start_index < 0\n"); - CHECK_FAIL_RETURN_UNEXPECTED(start_index_ < num_rows_, "start_index >= num_rows\n"); - CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ >= 0, "num_samples < 0\n"); + CHECK_FAIL_RETURN_UNEXPECTED(start_index_ >= 0, + "Invalid parameter, start_index must be greater than or equal to 0, but got " + + std::to_string(start_index_) + ".\n"); + CHECK_FAIL_RETURN_UNEXPECTED(start_index_ < num_rows_, + "Invalid parameter, start_index must be less than num_rows, but got start_index: " + + std::to_string(start_index_) + ", num_rows: " + std::to_string(num_rows_) + ".\n"); + CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ >= 0, + "Invalid parameter, num_samples must be greater than or equal to 0, but got " + + std::to_string(num_samples_) + ".\n"); // Adjust the num_samples count based on the range of ids we are sequencing. If num_samples is 0, we sample // the entire set. If it's non-zero, we will implicitly cap the amount sampled based on available data. int64_t available_row_count = num_rows_ - start_index_; if (num_samples_ == 0 || num_samples_ > available_row_count) { num_samples_ = available_row_count; } - CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0 && samples_per_buffer_ > 0, "Fail to init Sequential Sampler"); + CHECK_FAIL_RETURN_UNEXPECTED( + num_samples_ > 0 && samples_per_buffer_ > 0, + "Invalid parameter, samples_per_buffer must be greater than 0, but got " + std::to_string(samples_per_buffer_)); samples_per_buffer_ = samples_per_buffer_ > num_samples_ ? num_samples_ : samples_per_buffer_; return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc index db2078795e7..3a9a0b418e9 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc @@ -33,7 +33,8 @@ SubsetRandomSampler::SubsetRandomSampler(int64_t num_samples, const std::vector< // Initialized this Sampler. Status SubsetRandomSampler::InitSampler() { - CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "num_rows <= 0\n"); + CHECK_FAIL_RETURN_UNEXPECTED( + num_rows_ > 0, "Invalid parameter, num_rows must be greater than 0, but got " + std::to_string(num_rows_) + ".\n"); // Special value of 0 for num_samples means that the user wants to sample the entire set of data. // In this case, the id's are provided by the user. Cap the num_samples on the number of id's given. diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc index 47c2c8b0d25..98aec562a9c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc @@ -42,14 +42,19 @@ Status WeightedRandomSampler::InitSampler() { if (num_samples_ == 0 || num_samples_ > num_rows_) { num_samples_ = num_rows_; } - CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0 && num_samples_, "num_samples & num_rows need to be positive"); - CHECK_FAIL_RETURN_UNEXPECTED(samples_per_buffer_ > 0, "samples_per_buffer<=0\n"); + CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0 && num_samples_, + "Invalid parameter, num_samples & num_rows must be greater than 0, but got num_rows: " + + std::to_string(num_rows_) + ", num_samples: " + std::to_string(num_samples_)); + CHECK_FAIL_RETURN_UNEXPECTED(samples_per_buffer_ > 0, + "Invalid parameter, samples_per_buffer must be greater than 0, but got " + + std::to_string(samples_per_buffer_) + ".\n"); if (weights_.size() > static_cast(num_rows_)) { return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, - "number of samples weights is more than num of rows. Might generate id out of bound OR other errors"); + "Invalid parameter, number of samples weights is more than num of rows. " + "Might generate id out of bound OR other errors"); } if (!replacement_ && (weights_.size() < static_cast(num_samples_))) { - RETURN_STATUS_UNEXPECTED("Without replacement, sample weights less than numSamples"); + RETURN_STATUS_UNEXPECTED("Invalid parameter, without replacement, weights size must be greater than num_samples."); } // Initialize random generator with seed from config manager diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc index 4f262334003..3d9b79a7dbf 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc @@ -47,8 +47,13 @@ TextFileOp::Builder::Builder() Status TextFileOp::Builder::ValidateInputs() const { std::string err_msg; - err_msg += builder_num_workers_ <= 0 ? "Number of parallel workers should be greater than 0\n" : ""; - err_msg += builder_device_id_ >= builder_num_devices_ || builder_num_devices_ < 1 ? "Wrong sharding configs\n" : ""; + err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; + err_msg += (builder_device_id_ >= builder_num_devices_ || builder_num_devices_ < 1) + ? "Invalid parameter, num_shard must be greater than shard_id and greater than 0, got num_shard: " + + std::to_string(builder_num_devices_) + ", shard_id: " + std::to_string(builder_device_id_) + ".\n" + : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } @@ -153,7 +158,7 @@ Status TextFileOp::LoadFile(const std::string &file, const int64_t start_offset, const int32_t worker_id) { std::ifstream handle(file); if (!handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Failed to open file " + file); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + file); } int64_t rows_each_buffer = 0; @@ -442,7 +447,7 @@ Status TextFileOp::operator()() { int64_t TextFileOp::CountTotalRows(const std::string &file) { std::ifstream handle(file); if (!handle.is_open()) { - MS_LOG(ERROR) << "Failed to open file: " << file; + MS_LOG(ERROR) << "Invalid file, failed to open file: " << file; return 0; } @@ -465,7 +470,7 @@ Status TextFileOp::CalculateNumRowsPerShard() { } if (all_num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API TextFileDataset.Please check file path or dataset API " + "Invalid data, no valid data matching the dataset API TextFileDataset.Please check file path or dataset API " "validation first."); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc index bcfa045796b..ffc2a97ef7c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc @@ -88,11 +88,13 @@ Status TFReaderOp::Builder::ValidateInputs() const { std::string err_msg; if (builder_num_workers_ <= 0) { - err_msg += "Number of parallel workers is smaller or equal to 0\n"; + err_msg += "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n"; } if (builder_device_id_ >= builder_num_devices_ || builder_num_devices_ < 1) { - err_msg += "Wrong sharding configs\n"; + err_msg += "Invalid parameter, num_shard must be greater than shard_id and greater than 0, got num_shard: " + + std::to_string(builder_num_devices_) + ", shard_id: " + std::to_string(builder_device_id_) + ".\n"; } std::vector invalid_files(builder_dataset_files_list_.size()); @@ -101,7 +103,7 @@ Status TFReaderOp::Builder::ValidateInputs() const { invalid_files.resize(std::distance(invalid_files.begin(), it)); if (!invalid_files.empty()) { - err_msg += "The following files either cannot be opened, or are not valid tfrecord files:\n"; + err_msg += "Invalid file, the following files either cannot be opened, or are not valid tfrecord files:\n"; std::string accumulated_filenames = std::accumulate( invalid_files.begin(), invalid_files.end(), std::string(""), @@ -193,7 +195,7 @@ Status TFReaderOp::Init() { total_rows_ = data_schema_->num_rows(); } if (total_rows_ < 0) { - RETURN_STATUS_UNEXPECTED("The num_sample or numRows for TFRecordDataset should be greater than 0"); + RETURN_STATUS_UNEXPECTED("Invalid parameter, num_sample or num_row for TFRecordDataset must be greater than 0."); } // Build the index with our files such that each file corresponds to a key id. @@ -227,7 +229,7 @@ Status TFReaderOp::CalculateNumRowsPerShard() { num_rows_per_shard_ = static_cast(std::ceil(num_rows_ * 1.0 / num_devices_)); if (num_rows_per_shard_ == 0) { RETURN_STATUS_UNEXPECTED( - "There is no valid data matching the dataset API TFRecordDataset.Please check file path or dataset API " + "Invalid data, no valid data matching the dataset API TFRecordDataset.Please check file path or dataset API " "validation first."); } return Status::OK(); @@ -569,7 +571,7 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off std::ifstream reader; reader.open(filename); if (!reader) { - RETURN_STATUS_UNEXPECTED("failed to open file: " + filename); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + filename); } int64_t rows_read = 0; @@ -597,7 +599,7 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off if (start_offset == kInvalidOffset || (rows_total >= start_offset && rows_total < end_offset)) { dataengine::Example tf_file; if (!tf_file.ParseFromString(serialized_example)) { - std::string errMsg = "parse tfrecord failed"; + std::string errMsg = "Invalid file, failed to parse tfrecord file : " + serialized_example; RETURN_STATUS_UNEXPECTED(errMsg); } RETURN_IF_NOT_OK(LoadExample(&tf_file, &new_tensor_table, rows_read)); @@ -639,7 +641,7 @@ Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, std::unique_p const google::protobuf::Map &feature_map = example_features.feature(); auto iter_column = feature_map.find(current_col.name()); if (iter_column == feature_map.end()) { - RETURN_STATUS_UNEXPECTED("key not found: " + current_col.name()); + RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.name() + "does not exist."); } const dataengine::Feature &column_values_list = iter_column->second; RETURN_IF_NOT_OK(LoadFeature(tensor_table, column_values_list, current_col, row, col)); @@ -690,11 +692,11 @@ Status TFReaderOp::LoadFeature(const std::unique_ptr *tensor_table break; } case dataengine::Feature::KindCase::KIND_NOT_SET: { - std::string err_msg = "tf_file column list type enum is KIND_NOT_SET"; + std::string err_msg = "Invalid data, tf_file column type must be uint8, int64 or float32."; RETURN_STATUS_UNEXPECTED(err_msg); } default: { - std::string err_msg = "tf_file column list type enum does not match any known DE type"; + std::string err_msg = "Invalid data, tf_file column type must be uint8, int64 or float32."; RETURN_STATUS_UNEXPECTED(err_msg); } } @@ -728,7 +730,8 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng // Must be single byte type for each element! if (current_col.type() != DataType::DE_UINT8 && current_col.type() != DataType::DE_INT8 && current_col.type() != DataType::DE_STRING) { - std::string err_msg = "Invalid datatype for Tensor at column: " + current_col.name(); + std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + + ", data type should be int8, uint8 or string, but got " + current_col.type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -755,7 +758,8 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng int64_t new_pad_size = 1; for (int i = 1; i < cur_shape.Size(); ++i) { if (cur_shape[i] == TensorShape::kDimUnknown) { - std::string err_msg = "More than one unknown dimension in the shape of column: " + current_col.name(); + std::string err_msg = + "Invalid data, more than one unknown dimension in the shape of column: " + current_col.name(); RETURN_STATUS_UNEXPECTED(err_msg); } new_pad_size *= cur_shape[i]; @@ -777,7 +781,8 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng // KFloatList can only map to DE types: // DE_FLOAT32 if (current_col.type() != DataType::DE_FLOAT32) { - std::string err_msg = "Invalid datatype for Tensor at column: " + current_col.name(); + std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + + ", data type should be string, but got " + current_col.type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -814,7 +819,9 @@ Status TFReaderOp::LoadIntListSwitch(const ColDescriptor ¤t_col, const dat } else if (current_col.type() == DataType::DE_INT8) { RETURN_IF_NOT_OK(LoadIntList(current_col, column_values_list, num_elements, tensor)); } else { - std::string err_msg = "Invalid datatype for Tensor at column: " + current_col.name(); + std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.name() + + ", data type should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8" + + ", but got " + current_col.type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -827,7 +834,8 @@ template Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, int32_t *num_elements, std::shared_ptr *tensor) { if (!(current_col.type().IsInt())) { - std::string err_msg = "Invalid datatype for Tensor at column: " + current_col.name(); + std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + + ", data type should be int, but got " + current_col.type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -869,7 +877,9 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector(record_length)); dataengine::Example example; - if (!example.ParseFromString(serialized_example)) RETURN_STATUS_UNEXPECTED("parse tf_file failed"); + if (!example.ParseFromString(serialized_example)) { + RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse tfrecord file: " + serialized_example); + } const dataengine::Features &example_features = example.features(); const google::protobuf::Map &feature_map = example_features.feature(); @@ -883,7 +893,7 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vectorfirst; @@ -905,11 +915,10 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector(num_devices_)) { - RETURN_STATUS_UNEXPECTED("Not enough tfrecord files provided\n"); + RETURN_STATUS_UNEXPECTED("Invalid file, not enough tfrecord files provided.\n"); } } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc index 5d375f26c0a..a12bddb57be 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc @@ -91,8 +91,12 @@ Status VOCOp::Builder::Build(std::shared_ptr *ptr) { Status VOCOp::Builder::SanityCheck() { Path dir(builder_dir_); std::string err_msg; - err_msg += dir.IsDirectory() == false ? "VOC path is invalid or not set\n" : ""; - err_msg += builder_num_workers_ <= 0 ? "Num of parallel workers is set to 0 or negative\n" : ""; + err_msg += dir.IsDirectory() == false + ? "Invalid parameter, VOC path is invalid or not set, path: " + builder_dir_ + ".\n" + : ""; + err_msg += builder_num_workers_ <= 0 ? "Invalid parameter, num_parallel_workers must be greater than 0, but got " + + std::to_string(builder_num_workers_) + ".\n" + : ""; return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg); } @@ -137,7 +141,8 @@ Status VOCOp::operator()() { std::shared_ptr sample_ids; RETURN_IF_NOT_OK(sampler_buffer->GetTensor(&sample_ids, 0, 0)); if (sample_ids->type() != DataType(DataType::DE_INT64)) { - RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't int64"); + RETURN_STATUS_UNEXPECTED("Invalid parameter, data type of Sampler Tensor isn't int64, got " + + sample_ids->type().ToString()); } RETURN_IF_NOT_OK(TraverseSampleIds(sample_ids, &keys)); RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer)); @@ -259,7 +264,7 @@ Status VOCOp::ParseImageIds() { std::ifstream in_file; in_file.open(image_sets_file); if (in_file.fail()) { - RETURN_STATUS_UNEXPECTED("Fail to open file: " + image_sets_file); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + image_sets_file); } std::string id; while (getline(in_file, id)) { @@ -301,21 +306,21 @@ Status VOCOp::ParseAnnotationIds() { Status VOCOp::ParseAnnotationBbox(const std::string &path) { if (!Path(path).Exists()) { - RETURN_STATUS_UNEXPECTED("File is not found : " + path); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + path); } Annotation annotation; XMLDocument doc; XMLError e = doc.LoadFile(common::SafeCStr(path)); if (e != XMLError::XML_SUCCESS) { - RETURN_STATUS_UNEXPECTED("Xml load failed"); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to load xml file: " + path); } XMLElement *root = doc.RootElement(); if (root == nullptr) { - RETURN_STATUS_UNEXPECTED("Xml load root element error"); + RETURN_STATUS_UNEXPECTED("Invalid data, failed to load root element for xml file."); } XMLElement *object = root->FirstChildElement("object"); if (object == nullptr) { - RETURN_STATUS_UNEXPECTED("No object find in " + path); + RETURN_STATUS_UNEXPECTED("Invalid data, no object found in " + path); } while (object != nullptr) { std::string label_name; @@ -338,7 +343,7 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) { XMLElement *ymax_node = bbox_node->FirstChildElement("ymax"); if (ymax_node != nullptr) ymax = ymax_node->FloatText(); } else { - RETURN_STATUS_UNEXPECTED("bndbox dismatch in " + path); + RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path); } if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 && ymin > 0 && xmax > xmin && ymax > ymin) { @@ -359,7 +364,7 @@ Status VOCOp::InitSampler() { Status VOCOp::LaunchThreadsAndInitOp() { if (tree_ == nullptr) { - RETURN_STATUS_UNEXPECTED("tree_ not set"); + RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); } RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks())); @@ -378,7 +383,7 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co if (decode_ == true) { Status rc = Decode(*tensor, tensor); if (rc.IsError()) { - RETURN_STATUS_UNEXPECTED("fail to decode file: " + path); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to decode file: " + path); } } return Status::OK(); @@ -402,7 +407,9 @@ Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) { } else { label_data.push_back(static_cast(label_index_[item.first])); } - CHECK_FAIL_RETURN_UNEXPECTED(item.second.size() == 6, "annotation only support 6 parameters."); + CHECK_FAIL_RETURN_UNEXPECTED( + item.second.size() == 6, + "Invalid parameter, annotation only support 6 parameters, but got " + std::to_string(item.second.size())); std::vector tmp_bbox = {(item.second)[0], (item.second)[1], (item.second)[2], (item.second)[3]}; bbox_data.insert(bbox_data.end(), tmp_bbox.begin(), tmp_bbox.end()); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc index f754b4898a9..615dcaea41c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc @@ -34,7 +34,7 @@ TakeOp::Builder::Builder(int32_t count) : build_max_takes_(count) { Status TakeOp::Builder::SanityCheck() const { if (build_max_takes_ <= 0) { - std::string err_msg("Take count must be greater than 0."); + std::string err_msg("Invalid parameter, take count must be greater than 0."); RETURN_STATUS_UNEXPECTED(err_msg); } return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc index 1b6a0ecb790..cee51bbe1ae 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc @@ -122,7 +122,8 @@ Status ZipOp::prepare(TensorQTable *const table) { draining_ = false; buffer_id_ = 0; if (table == nullptr) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "ZipOp prepare phase requires a tensor table."); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Invalid data, ZipOp prepare phase requires a tensor table, but got nullptr."); } // fill initial row TensorRow new_row; @@ -146,7 +147,8 @@ Status ZipOp::prepare(TensorQTable *const table) { // fillBuffer always expects a new table to fill Status ZipOp::fillBuffer(TensorQTable *const table) { if (table == nullptr) { - return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "ZipOp fillBuffer null table pointer."); + return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, + "Invalid data, ZipOp fillBuffer null table pointer."); } TensorRow new_row; while (table->size() < static_cast(rows_per_buffer_)) { @@ -252,7 +254,7 @@ Status ZipOp::ComputeColMap() { int32_t old_id = pair.second; // check if name already exists in column name descriptor if (column_name_id_map_.count(name) == 1) { - RETURN_STATUS_UNEXPECTED("key already exists when zipping datasets"); + RETURN_STATUS_UNEXPECTED("Invalid parameter, key: " + name + " already exists when zipping datasets."); } column_name_id_map_[name] = old_id + colsCurrent; } diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.cc index 802731b8fc1..3d19b991aad 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/lookup_op.cc @@ -35,7 +35,7 @@ Status LookupOp::Compute(const std::shared_ptr &input, std::shared_ptrshape(), output)); diff --git a/mindspore/ccsrc/minddata/dataset/text/vocab.cc b/mindspore/ccsrc/minddata/dataset/text/vocab.cc index 8bd4449d6c2..35639d8b2d6 100644 --- a/mindspore/ccsrc/minddata/dataset/text/vocab.cc +++ b/mindspore/ccsrc/minddata/dataset/text/vocab.cc @@ -136,6 +136,11 @@ Status Vocab::BuildFromFileCpp(const std::string &path, const std::string &delim const std::vector &special_tokens, bool prepend_special, std::shared_ptr *vocab) { // Validate parameters + if (path.empty()) { + MS_LOG(ERROR) << "vocab file path is not set!"; + RETURN_STATUS_UNEXPECTED("vocab file path is not set!"); + } + if (vocab_size < 0 && vocab_size != -1) { MS_LOG(ERROR) << "vocab_size shoule be either -1 or positive integer, but got " << vocab_size; RETURN_STATUS_UNEXPECTED("vocab_size shoule be either -1 or positive integer, but got " + diff --git a/tests/ut/python/dataset/test_datasets_cifarop.py b/tests/ut/python/dataset/test_datasets_cifarop.py index 24193199b2a..a073be084b9 100644 --- a/tests/ut/python/dataset/test_datasets_cifarop.py +++ b/tests/ut/python/dataset/test_datasets_cifarop.py @@ -210,7 +210,7 @@ def test_cifar10_exception(): with pytest.raises(ValueError, match=error_msg_6): ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=88) - error_msg_7 = "No .bin files found" + error_msg_7 = "no .bin files found" with pytest.raises(RuntimeError, match=error_msg_7): ds1 = ds.Cifar10Dataset(NO_BIN_DIR) for _ in ds1.__iter__(): @@ -360,7 +360,7 @@ def test_cifar100_exception(): with pytest.raises(ValueError, match=error_msg_6): ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=88) - error_msg_7 = "No .bin files found" + error_msg_7 = "no .bin files found" with pytest.raises(RuntimeError, match=error_msg_7): ds1 = ds.Cifar100Dataset(NO_BIN_DIR) for _ in ds1.__iter__(): diff --git a/tests/ut/python/dataset/test_datasets_coco.py b/tests/ut/python/dataset/test_datasets_coco.py index 0f7dccfe301..e27b6973eaf 100644 --- a/tests/ut/python/dataset/test_datasets_coco.py +++ b/tests/ut/python/dataset/test_datasets_coco.py @@ -258,7 +258,7 @@ def test_coco_case_exception(): pass assert False except RuntimeError as e: - assert "Invalid node found in json" in str(e) + assert "invalid node found in json" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_CATEGORY_ID_FILE, task="Detection") diff --git a/tests/ut/python/dataset/test_datasets_csv.py b/tests/ut/python/dataset/test_datasets_csv.py index 600885c3ec0..9907fa0bf8b 100644 --- a/tests/ut/python/dataset/test_datasets_csv.py +++ b/tests/ut/python/dataset/test_datasets_csv.py @@ -205,7 +205,7 @@ def test_csv_dataset_exception(): with pytest.raises(Exception) as err: for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): pass - assert "Failed to parse file" in str(err.value) + assert "failed to parse file" in str(err.value) def test_csv_dataset_type_error(): diff --git a/tests/ut/python/dataset/test_datasets_generator.py b/tests/ut/python/dataset/test_datasets_generator.py index 2c4342beba9..5e8d1851252 100644 --- a/tests/ut/python/dataset/test_datasets_generator.py +++ b/tests/ut/python/dataset/test_datasets_generator.py @@ -497,6 +497,7 @@ def test_generator_error_2(): data1 = ds.GeneratorDataset(generator_np, ["data"]) for _ in data1: pass + print("========", str(info.value)) assert "Generator should return a tuple of numpy arrays" in str(info.value) diff --git a/tests/ut/python/dataset/test_skip.py b/tests/ut/python/dataset/test_skip.py index 242ad7ac63e..8a1ce02cb5f 100644 --- a/tests/ut/python/dataset/test_skip.py +++ b/tests/ut/python/dataset/test_skip.py @@ -210,7 +210,7 @@ def test_skip_exception_1(): except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) - assert "Skip count must be positive integer or 0." in str(e) + assert "skip count should be greater than or equal to 0." in str(e) def test_skip_exception_2():