diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc index ec6df28606f..d494b7c34e4 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc @@ -90,7 +90,7 @@ Status BarrierOp::blockCond() { { py::gil_scoped_acquire gil_acquire; if (Py_IsInitialized() == 0) { - return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); + return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); } // we have condition name, however the flexibility is in python today try { @@ -99,7 +99,8 @@ Status BarrierOp::blockCond() { // Process the return value if (!py::isinstance(ret_py_obj)) { return Status(StatusCode::kMDPyFuncException, - "Invalid parameter, condition wait function should return true/false."); + "Invalid parameter, condition wait function should return boolean, but got " + + std::string(ret_py_obj.get_type().str())); } } catch (const py::error_already_set &e) { return Status(StatusCode::kMDPyFuncException, e.what()); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc index a97e68dd6c7..0ffeb04b936 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc @@ -134,7 +134,7 @@ Status BatchOp::operator()() { if ((num_workers_ > 1 || batch_map_func_) && GetMemoryUsage() > MAX_MEMORY_USAGE_THRESHOLD) { MS_LOG(WARNING) << "Memory consumption is more than " << (GetMemoryUsage() * 100) << "%, " << "which may cause oom error. Please reduce num_parallel_workers size / " - << "optimize per_batch_map function / other python data preprocess function to " + << "optimize 'per_batch_map' function / other python data preprocess function to " << "reduce memory usage."; } #endif @@ -203,8 +203,9 @@ Status BatchOp::BatchRows(const std::unique_ptr *src, TensorRow *d first_shape.Print(shape1); old_tensor->shape().Print(shape2); RETURN_STATUS_UNEXPECTED( - "Invalid data, batch operation expect same shape for each data row, but got inconsistent shape in column " + - std::to_string(i) + " expected shape for this column is:" + shape1.str() + ", got shape:" + shape2.str()); + "Inconsistent batch shapes, batch operation expect same shape for each data row, " + "but got inconsistent shape in column " + + std::to_string(i) + ", expected shape for this column is:" + shape1.str() + ", got shape:" + shape2.str()); } } } else { // handle string column differently @@ -300,7 +301,7 @@ Status BatchOp::MapColumns(std::pair, CBatchInfo> CHECK_FAIL_RETURN_UNEXPECTED(num_rows == out_cols[i].size(), "Invalid data, column: " + out_col_names_[i] + " expects: " + std::to_string(num_rows) + - " rows returned from per_batch_map, got: " + std::to_string(out_cols[i].size())); + " rows returned from 'per_batch_map', got: " + std::to_string(out_cols[i].size())); for (auto &t_row : *out_q_table) { t_row[col_id] = out_cols[i][row_id++]; } @@ -339,14 +340,16 @@ Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { *batch_size = size.cast(); if (*batch_size <= 0) { return Status(StatusCode::kMDPyFuncException, - "Invalid parameter, batch_size function should return an integer greater than 0, but got: " + + "Invalid batch_size function, 'batch_size' function should return an integer greater than 0, " + "but got: " + std::to_string(*batch_size)); } } catch (const py::error_already_set &e) { return Status(StatusCode::kMDPyFuncException, e.what()); } catch (const py::cast_error &e) { - return Status(StatusCode::kMDPyFuncException, - "Invalid parameter, batch_size function should return an integer greater than 0."); + return Status( + StatusCode::kMDPyFuncException, + "Invalid batch_size function, the return value of batch_size function cast failed: " + std::string(e.what())); } } return Status(StatusCode::kSuccess, "batch_size function call succeeded."); @@ -379,11 +382,13 @@ Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBat // Parse batch map return value py::tuple ret_tuple = py::cast(ret_py_obj); CHECK_FAIL_RETURN_UNEXPECTED(py::isinstance(ret_tuple), - "per_batch_map function should return a tuple."); + "Invalid per_batch_map, 'per_batch_map' function should return a tuple, but got " + + std::string(ret_py_obj.get_type().str())); CHECK_FAIL_RETURN_UNEXPECTED(ret_tuple.size() == out_col_names_.size(), - "Incorrect number of columns returned in per_batch_map function. Expects: " + + "Invalid per_batch_map, the number of columns returned in 'per_batch_map' function " + "should be " + std::to_string(out_col_names_.size()) + - " got: " + std::to_string(ret_tuple.size())); + " , but got: " + std::to_string(ret_tuple.size())); for (size_t i = 0; i < ret_tuple.size(); i++) { TensorRow output_batch; // If user returns a type that is neither a list nor an array, issue a error msg. @@ -405,7 +410,8 @@ Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBat return Status(StatusCode::kMDPyFuncException, e.what()); } catch (const py::cast_error &e) { return Status(StatusCode::kMDPyFuncException, - "Invalid parameter, per_batch_map function of batch should return a tuple of list of numpy array."); + "Invalid per_batch_map, the return value of 'per_batch_map' function cast to py::tuple failed: " + + std::string(e.what())); } } return Status::OK(); @@ -432,7 +438,7 @@ Status BatchOp::PadColumns(std::unique_ptr *table, const PadInfo & if (pad_shapes[col_id].empty()) pad_shapes[col_id] = max_shapes[col_id]; // fill pad shape with -1 CHECK_FAIL_RETURN_UNEXPECTED( pad_shapes[col_id].size() == max_shapes[col_id].size(), - "Invalid data, rank of pad_shape must be equal to rank of specified column. pad_shapes rank:" + + "Invalid pad_info, rank of pad_shape must be equal to rank of specified column. pad_shapes rank:" + std::to_string(pad_shapes[col_id].size()) + ", column rank: " + std::to_string(max_shapes[col_id].size())); } @@ -482,12 +488,14 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info, for (const auto &p : pad_info) { auto location = column_name_id_map.find(p.first); CHECK_FAIL_RETURN_UNEXPECTED(location != column_name_id_map.end(), - "Invalid parameter, column name: " + p.first + " does not exist."); + "Invalid pad_info, column name: " + p.first + " does not exist."); auto col_id = static_cast(location->second); CHECK_FAIL_RETURN_UNEXPECTED( col_id < pad_vals->size() && col_id < pad_shapes->size(), - "Invalid parameter, column id must be less than the size of pad_val and pad_shape, but got: " + - std::to_string(col_id)); + "Invalid pad_info, column name should be match with the size of pad value and pad shape, but got " + "column name: " + + p.first + ", the size of pad value: " + std::to_string(pad_vals->size()) + + " and the size of pad shape: " + std::to_string(pad_shapes->size()) + "."); pad_cols->insert(col_id); (*pad_vals)[col_id] = p.second.second; // set pad values (*pad_shapes)[col_id] = p.second.first.AsVector(); // empty vector if shape is unknown @@ -498,8 +506,9 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info, Status BatchOp::ComputeColMap() { CHECK_FAIL_RETURN_UNEXPECTED(child_.size() == 1, - "Invalid data, batch operator can't be used as a single operator, " - "should be preceded by an operator that reads data, for example, ImageFolderDataset."); + "Invalid batch, batch operator can't be used as a single operator, " + "should be preceded by an operator that reads data, for example, " + "ds1 = ds.ImageFolderDataset().batch()."); CHECK_FAIL_RETURN_UNEXPECTED(!(child_[0]->column_name_id_map().empty()), "Invalid data, the column of the previous operator of the batch cannot be empty."); @@ -514,7 +523,7 @@ Status BatchOp::ComputeColMap() { // check all input columns exist for (const auto &col : in_col_names_) { CHECK_FAIL_RETURN_UNEXPECTED(child_map_.find(col) != child_map_.end(), - "Invalid parameter, col:" + col + " doesn't exist in dataset."); + "Invalid input_columns, '" + col + "' of 'input_columns' doesn't exist."); } // following logic deals with per_batch_map @@ -551,8 +560,21 @@ Status BatchOp::ComputeColMap() { } } - CHECK_FAIL_RETURN_UNEXPECTED(column_name_id_map_.size() == (child_map_no_in_col.size() + out_col_names_.size()), - "Key error in column_name_id_map_. output_columns in batch is not set correctly!"); + if (column_name_id_map_.size() != (child_map_no_in_col.size() + out_col_names_.size())) { + const std::string prefix_str = std::string("["); + auto column_no_in_col = std::accumulate( + child_map_no_in_col.begin(), child_map_no_in_col.end(), prefix_str, + [](const std::string &str, const std::pair &p) { return str + p.first + ","; }); + column_no_in_col += "]"; + auto column_out = + std::accumulate(out_col_names_.begin(), out_col_names_.end(), prefix_str, + [](const std::string &str, const std::string &out_col) { return str + out_col + ","; }); + column_out += "]"; + RETURN_STATUS_UNEXPECTED( + "Invalid output_columns, columns that are not involved in 'per_batch_map' should not be " + "in output_columns, but got columns that are not in input_columns: " + + column_no_in_col + ", output_columns: " + column_out + "."); + } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc index 5d8ea08bfc4..9dd61ff7859 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc @@ -108,7 +108,7 @@ Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, T for (size_t i = 0; i < number_of_arguments; i++) { auto map_item = column_name_id_map_.find(length_dependent_columns_[i]); if (map_item == column_name_id_map_.end()) { - RETURN_STATUS_UNEXPECTED("BucketBatchByLength: Couldn't find the specified column(" + + RETURN_STATUS_UNEXPECTED("Invalid column, BucketBatchByLength couldn't find the specified column(" + length_dependent_columns_[i] + ") in the dataset."); } int32_t column_index = map_item->second; @@ -118,7 +118,8 @@ Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, T RETURN_IF_NOT_OK(output.at(0)->GetItemAt(out_element_length, {0})); if (*out_element_length < 0) { RETURN_STATUS_UNEXPECTED( - "Invalid parameter, element_length_function must return an integer greater than or equal to 0, but got" + + "Invalid element_length_function, element_length_function must return an integer greater than or equal to 0, " + "but got" + std::to_string(*out_element_length)); } } else { @@ -139,7 +140,8 @@ Status BucketBatchByLengthOp::PadAndBatchBucket(int32_t bucket_index, int32_t ba if (pad_shape[i] == TensorShape::kDimUnknown) { if (bucket_index + 1 >= bucket_boundaries_.size()) { std::string error_message = - "Invalid data, requested to pad to bucket boundary, element falls in last bucket."; + "Invalid data, requested to pad to bucket boundary failed, bucket index should be less than " + + std::to_string(bucket_boundaries_.size()) + ", but got " + std::to_string(bucket_index); return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, error_message); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc index 4ca06f21a20..940f8f04657 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc @@ -41,7 +41,8 @@ BuildSentencePieceVocabOp::BuildSentencePieceVocabOp(std::shared_ptrRegister(tree_->AllTasks())); RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask( @@ -58,7 +59,7 @@ Status BuildSentencePieceVocabOp::operator()() { RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); } RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); - CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "no operator should be after from_dataset (repeat detected)"); + CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "'build_sentencepiece_vocab' does not support 'repeat'.)"); eoe_warning = true; } // add empty tensorRow for quit @@ -71,13 +72,13 @@ Status BuildSentencePieceVocabOp::SentenceThread() { TaskManager::FindMe()->Post(); if (col_names_.empty() == true) { auto itr = column_name_id_map_.find("text"); - CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), - "Invalid data, 'text' column does not exist in dataset."); + CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid data, 'text' column does not exist."); col_id_ = itr->second; } else { auto itr = column_name_id_map_.find(col_names_[0]); - CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), - "Invalid parameter, column name: " + col_names_[0] + " does not exist in dataset."); + CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid column, column name: " + col_names_[0] + + " does not exist, check existed " + "column with dataset API 'get_col_names'"); col_id_ = itr->second; } std::unique_ptr sentence_iter = std::make_unique(this); @@ -89,7 +90,7 @@ Status BuildSentencePieceVocabOp::SentenceThread() { } else { if (vocab_ == nullptr) { return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, - "Invalid parameter, SentencePiece vocab not set."); + "[Internal ERROR] SentencePiece vocab should not be null."); } vocab_->set_model_proto(model_proto); } @@ -131,7 +132,7 @@ bool BuildSentencePieceVocabOp::Done() { return read_done_; } void BuildSentencePieceVocabOp::Next(std::string *sentence) { if (sentence == nullptr) { - MS_LOG(ERROR) << "BuildSentencePieceVocab get nullptr element, please check data."; + MS_LOG(ERROR) << "[Internal ERROR] BuildSentencePieceVocab get nullptr element, please check data."; return; } TensorRow new_row; @@ -151,8 +152,8 @@ void BuildSentencePieceVocabOp::Next(std::string *sentence) { if (new_row[col_id_]->type().IsNumeric() || new_row[col_id_]->Rank() > 1) { ret_status_ = Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, - "Invalid data, build_sentence_piece_vocab only works on string data with rank equal to 1, got type: " + - new_row[col_id_]->type().ToString() + "and rank: " + std::to_string(new_row[col_id_]->Rank())); + "Invalid data, build_sentence_piece_vocab only supports string data with rank equal to 1, but got type: " + + new_row[col_id_]->type().ToString() + ", rank: " + std::to_string(new_row[col_id_]->Rank())); read_done_ = true; return; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.h index 76ac829c603..24f3575666b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.h @@ -69,7 +69,9 @@ class BuildSentencePieceVocabOp : public PipelineOp { Status operator()() override; - Status Reset() override { RETURN_STATUS_UNEXPECTED("Reset shouldn't be called in BuildSentencePieceVocabOp"); } + Status Reset() override { + RETURN_STATUS_UNEXPECTED("[Internal ERROR] Reset shouldn't be called in BuildSentencePieceVocabOp."); + } std::string Name() const override { return kBuildSentencePieceVocabOp; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc index 66bdc5eb079..126453b1877 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc @@ -54,7 +54,8 @@ Status BuildVocabOp::WorkerEntry(int32_t worker_id) { while (!new_row.empty()) { for (int32_t col : col_ids_) { CHECK_FAIL_RETURN_UNEXPECTED(!new_row[col]->type().IsNumeric(), - "Invalid data, build_vocab only works on string data, but got numeric data type: " + + "Invalid datatype, 'build_vocab' only supports string type of input, but got " + "numeric type: " + new_row[col]->type().ToString()); for (auto itr = new_row[col]->begin(); itr != new_row[col]->end(); ++itr) { (*wrkr_map)[std::string(*itr)] += 1; @@ -79,7 +80,8 @@ Status BuildVocabOp::WorkerEntry(int32_t worker_id) { Status BuildVocabOp::operator()() { // launch the collector thread if (tree_ == nullptr) { - return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set."); + return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, + "[Internal ERROR] Pipeline init failed, Execution tree not set."); } RETURN_IF_NOT_OK(distributor_queue_->Register(tree_->AllTasks())); RETURN_IF_NOT_OK(collector_queue_->Register(tree_->AllTasks())); @@ -96,8 +98,9 @@ Status BuildVocabOp::operator()() { col_ids_.reserve(col_names_.size()); for (std::string col : col_names_) { auto itr = column_name_id_map_.find(col); - CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), - "Invalid parameter, column name: " + col + " does not exist in dataset."); + CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid column name, column name: " + col + + " does not exist, check existed column " + "with dataset API 'get_col_names'"); col_ids_.push_back(itr->second); } } else { @@ -113,7 +116,8 @@ Status BuildVocabOp::operator()() { RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); } RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); - CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "no operator should be after from_dataset (repeat detected)"); + CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, + "Invalid repeat operator, BuildVocab does not support 'repeat' operator."); eoe_warning = true; } @@ -137,7 +141,8 @@ Status BuildVocabOp::CollectorThread() { ++num_quited_worker; } } // all frequencies are obtained - CHECK_FAIL_RETURN_UNEXPECTED(!word_cnt_.empty(), "Invalid data, there are no words in the dataset."); + CHECK_FAIL_RETURN_UNEXPECTED(!word_cnt_.empty(), + "Invalid data, BuildVocab load data failed that no words found in vocab, check vocab."); std::vector words; // make sure enough is reserved, this will become a partially sorted list eventually words.reserve(wrkr_map->size()); @@ -158,7 +163,7 @@ Status BuildVocabOp::CollectorThread() { } CHECK_FAIL_RETURN_UNEXPECTED(err_msg.empty(), - "Invalid data, these special words are already in the dataset: " + err_msg + "."); + "Invalid special words, these special words are already in the vocab: " + err_msg + "."); int64_t num_words = std::min(static_cast(words.size()), top_k_); if (num_words == 0) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.h index e0e40046079..53b13f47b82 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.h @@ -66,7 +66,7 @@ class BuildVocabOp : public ParallelOp { Status operator()() override; - Status Reset() override { RETURN_STATUS_UNEXPECTED("Reset shouldn't be called in BuildVocabOp"); } + Status Reset() override { RETURN_STATUS_UNEXPECTED("[Internal ERROR] Reset shouldn't be called in BuildVocabOp"); } private: const int32_t interval_; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc index db5076c6790..e9b741cc758 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc @@ -191,7 +191,7 @@ Status CacheBase::FetchFromCache(int32_t worker_id) { if (AllowCacheMiss()) { ++num_cache_miss_; } else { - std::string errMsg = "Row id " + std::to_string(row_id) + " not found."; + std::string errMsg = "[Internal ERROR] Row id " + std::to_string(row_id) + " not found."; RETURN_STATUS_UNEXPECTED(errMsg); } } @@ -225,7 +225,8 @@ Status CacheBase::UpdateColumnMapFromCache() { Status CacheBase::GetPrefetchRow(row_id_type row_id, TensorRow *out) { RETURN_UNEXPECTED_IF_NULL(out); - CHECK_FAIL_RETURN_UNEXPECTED(row_id >= 0, "Expect positive row id, but got:" + std::to_string(row_id)); + CHECK_FAIL_RETURN_UNEXPECTED(row_id >= 0, + "[Internal ERROR] Expect positive row id, but got:" + std::to_string(row_id)); RETURN_IF_NOT_OK(prefetch_.PopFront(row_id, out)); return Status::OK(); } @@ -278,7 +279,7 @@ Status CacheBase::Prefetcher(int32_t worker_id) { cache_miss.clear(); std::unique_ptr blk; RETURN_IF_NOT_OK(prefetch_queues_[worker_id]->PopFront(&blk)); - CHECK_FAIL_RETURN_UNEXPECTED(!blk->eof(), "Expect eoe or a regular io block."); + CHECK_FAIL_RETURN_UNEXPECTED(!blk->eof(), "[Internal ERROR] Expect eoe or a regular io block."); if (!blk->eoe()) { RETURN_IF_NOT_OK(blk->GetKeys(&prefetch_keys)); Status rc; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc index db519c78707..a82925e4dbf 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc @@ -29,7 +29,7 @@ namespace dataset { Status CacheLookupOp::operator()() { if (!sampler_) { return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, - "Invalid parameter, CacheLookupOp requires a sampler before it can be executed, but got nullptr."); + "Invalid sampler, Cache requires a sampler before it can be executed, but got nullptr."); } RETURN_IF_NOT_OK(RegisterResources()); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc index 9711af21079..9a9de9c342a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_merge_op.cc @@ -147,7 +147,8 @@ Status CacheMergeOp::CacheMissWorkerEntry(int32_t workerId) { } else { row_id_type row_id = new_row.getId(); if (row_id < 0) { - std::string errMsg = "Expect positive row id, but got: " + std::to_string(row_id); + std::string errMsg = + "[Internal ERROR] row id should be greater than or equal to 0, but got: " + std::to_string(row_id); RETURN_STATUS_UNEXPECTED(errMsg); } if (cache_missing_rows_) { @@ -213,7 +214,8 @@ Status CacheMergeOp::PrepareOperator() { // Run any common code from super clas // specific logic CHECK_FAIL_RETURN_UNEXPECTED( child_.size() == kNumChildren, - "Incorrect number of children of CacheMergeOp, required num is 2, but got:" + std::to_string(child_.size())); + "[Internal ERROR] Incorrect number of children of CacheMergeOp, required num is 2, but got:" + + std::to_string(child_.size())); RETURN_IF_NOT_OK(DatasetOp::PrepareOperator()); // Get the computed check sum from all ops in the cache miss class uint32_t cache_crc = DatasetOp::GenerateCRC(child_[kCacheMissChildIdx]); @@ -231,7 +233,7 @@ Status CacheMergeOp::PrepareOperator() { // Run any common code from super clas } Status CacheMergeOp::ComputeColMap() { - CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "Invalid data, cache miss stream is empty."); + CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "[Internal ERROR] cache miss stream is empty."); if (column_name_id_map().empty()) { column_name_id_map_ = child_[kCacheMissChildIdx]->column_name_id_map(); } @@ -270,7 +272,7 @@ Status CacheMergeOp::GetRq(row_id_type row_id, CacheMergeOp::TensorRowCacheReque RETURN_IF_NOT_OK(mem.allocate(1)); *out = mem.GetMutablePointer(); } else { - RETURN_STATUS_UNEXPECTED("Invalid data, map insert fail."); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] map insert fail."); } } return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc index d16d1cfb65e..44b110ae5f0 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc @@ -43,7 +43,7 @@ Status CacheOp::operator()() { RETURN_UNEXPECTED_IF_NULL(tree_); if (!sampler_) { return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, - "Invalid parameter, CacheOp requires a sampler before it can be executed, but got nullptr."); + "Invalid sampler, CacheOp requires a sampler before it can be executed, but got nullptr."); } RETURN_IF_NOT_OK(RegisterResources()); @@ -145,9 +145,9 @@ Status CacheOp::WaitForCachingAllRows() { BuildPhaseDone = true; break; case CacheServiceState::kOutOfMemory: - return Status(StatusCode::kMDOutOfMemory, "Cache server is running out of memory"); + return Status(StatusCode::kMDOutOfMemory, "Cache server is running out of memory, check memory usage."); case CacheServiceState::kNoSpace: - return Status(StatusCode::kMDNoSpace, "Cache server is running of out spill storage"); + return Status(StatusCode::kMDNoSpace, "Cache server is running of out spill storage, check memory usage."); case CacheServiceState::kNone: case CacheServiceState::kError: default: diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc index 6432f4bef38..cf09d89c050 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/concat_op.cc @@ -74,9 +74,17 @@ Status ConcatOp::Verify(int32_t id, const TensorRow &new_row) { // Compare the data type and data rank with these in child[0] int32_t index = 0; for (auto item : new_row) { - if ((item->type() != data_type_[index]) || item->Rank() != data_rank_[index++]) { - RETURN_STATUS_UNEXPECTED("Invalid data, data type or data rank is not the same with previous dataset."); + if (item->type() != data_type_[index]) { + RETURN_STATUS_UNEXPECTED( + "Invalid datatype, the data type of two datasets concated should be the same, but got " + + item->type().ToString() + " and " + data_type_[index].ToString() + "."); } + if (item->Rank() != data_rank_[index]) { + RETURN_STATUS_UNEXPECTED( + "Invalid datatype, the data rank of two datasets concated should be the same, but got " + + std::to_string(item->Rank()) + " and " + std::to_string(data_rank_[index]) + "."); + } + index++; } } verified_ = true; @@ -89,12 +97,13 @@ Status ConcatOp::ComputeColMap() { // Obtain columns_name_id_map from child_[0] column_name_id_map_ = child_[0]->column_name_id_map(); if (column_name_id_map_.empty()) { - RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!"); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] Child column name map cannot be empty!"); } // Verify all children have the same column name map for (size_t i = 0; i < child_.size(); ++i) { if (child_[i]->column_name_id_map() != column_name_id_map_) { - RETURN_STATUS_UNEXPECTED("Invalid data, column name or column order is not the same with previous dataset."); + RETURN_STATUS_UNEXPECTED( + "Invalid columns, 'column name' or 'column order' of concat datasets should be the same."); } } } else { @@ -118,7 +127,7 @@ Status ConcatOp::GetNumClasses(int64_t *num_classes) { *num_classes = max_num_classes; return Status::OK(); } -Status ConcatOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); } +Status ConcatOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ConcatOp is an inlined operator."); } bool ConcatOp::IgnoreSample() { bool is_not_mappable_or_second_ne_zero = true; @@ -184,10 +193,10 @@ Status ConcatOp::GetNextRow(TensorRow *row) { return Status::OK(); } if (row->eof()) { - CHECK_FAIL_RETURN_UNEXPECTED(cur_child_ == 0, "Received an unexpected EOF."); + CHECK_FAIL_RETURN_UNEXPECTED(cur_child_ == 0, "[Internal ERROR] Received an unexpected EOF."); for (int32_t i = cur_child_ + 1; i < child_.size(); i++) { RETURN_IF_NOT_OK(child_[i]->GetNextRow(row)); - CHECK_FAIL_RETURN_UNEXPECTED(row->eof(), "Row must be an EOF."); + CHECK_FAIL_RETURN_UNEXPECTED(row->eof(), "[Internal ERROR] Row must be an EOF."); } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc index eae6b081166..1837105817c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc @@ -63,7 +63,7 @@ Status DatasetOp::AddChild(std::shared_ptr child) { } if (operator_id_ == kInvalidOperatorId) { std::string err_msg( - "Cannot add child node. Tree node connections can only " + "[Internal ERROR] Cannot add child node. Tree node connections can only " "be made if the node belongs to a tree."); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -71,7 +71,7 @@ Status DatasetOp::AddChild(std::shared_ptr child) { // disallow relationships with other trees if (tree_ != child->tree_) { std::string err_msg( - "Cannot add child node. Tree node connections can only be made if both nodes belong to the same tree."); + "Invalid operator structure, the relationship of operators should be one by one, but got too many branches."); RETURN_STATUS_UNEXPECTED(err_msg); } child_.push_back(child); @@ -82,7 +82,7 @@ Status DatasetOp::AddChild(std::shared_ptr child) { Status DatasetOp::RemoveChild(std::shared_ptr child) { if (operator_id_ == kInvalidOperatorId) { std::string err_msg( - "Cannot remove child node. Tree node connections can only " + "[Internal ERROR] Cannot remove child node. Tree node connections can only " "be made if the node belongs to a tree."); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -90,7 +90,7 @@ Status DatasetOp::RemoveChild(std::shared_ptr child) { // disallow relationships with other trees if (tree_ != child->tree_) { std::string err_msg( - "Cannot remove child node. Tree node connections can only be made if both nodes belong to the same tree."); + "Invalid operator structure, the relationship of operators should be one by one, but got too many branches."); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -132,11 +132,15 @@ void DatasetOp::RemoveParent(const DatasetOp *parent) { // Removes this node from the tree and connects it's parent/child together Status DatasetOp::Remove() { if (parent_.size() > 1) { - std::string err_msg("[Internal ERROR], no support for the relationship between operators is not one-to-one."); + std::string err_msg( + "Invalid operator structure, the relationship between operators should be one-to-one, but encountered more than " + "one parent, namely: " + + std::to_string(parent_.size())); RETURN_STATUS_UNEXPECTED(err_msg); } if (child_.size() > 1) { - std::string err_msg("[Internal ERROR], no support for the relationship between operators is not one-to-one."); + std::string err_msg( + "Invalid operator structure, the relationship of operators should be one by one, but got too many branches."); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -151,7 +155,8 @@ Status DatasetOp::Remove() { // If we have a parent, then assign child's parent to point to our parent. if (!parent_.empty()) { CHECK_FAIL_RETURN_UNEXPECTED(parent_[0]->Children().size() == 1, - "Removing a node whose parent has more than 1 child is not supported."); + "Invalid operator structure, the relationship of operators should be one by one, " + "but got too many branches."); child_[0]->parent_[0] = parent_[0]; } else { // We don't have a parent, so we are the root node being removed. @@ -293,7 +298,8 @@ Status DatasetOp::GetClassIndexing(std::vectorGetClassIndexing(output_class_indexing); } else { *output_class_indexing = {}; - RETURN_STATUS_UNEXPECTED("Trying to get class index from leaf node, missing override."); + RETURN_STATUS_UNEXPECTED("Unsupported scenario, GetClassIndexing failed for " + Name() + + " doesn't support GetClassIndexing yet."); } } @@ -343,12 +349,14 @@ std::string DatasetOp::ColumnNameMapAsString() const { // Operations changing the column map must overwrite this function. Status DatasetOp::ComputeColMap() { if (child_.size() > 1) { - RETURN_STATUS_UNEXPECTED("[Internal ERROR], no support for the relationship between operators is not one-to-one."); + RETURN_STATUS_UNEXPECTED( + "Invalid operator structure, the relationship of operators should be one by one, but got too many branches."); } if (column_name_id_map_.empty()) { column_name_id_map_ = child_[0]->column_name_id_map(); if (column_name_id_map_.empty()) { - RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!"); + RETURN_STATUS_UNEXPECTED("Invalid column list, the column list of " + child_[0]->Name() + + " should have one column at least, but got empty."); } MS_LOG(DEBUG) << "Setting column map:\n" << DatasetOp::ColumnNameMapAsString(); } else { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc index b43705a4789..ca2424f4c24 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc @@ -119,8 +119,8 @@ Status DeviceQueueOp::FilterMetadata(TensorRow *row) { Status DeviceQueueOp::CheckExceptions(const TensorRow &row) const { // this method checks if the row meets the conditions to be sent to TDT for (const auto &item : row) { - CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Invalid data, cannot send string tensor to device."); - CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Invalid data, cannot send tensor with no data to device."); + CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Invalid datatype, cannot send string data to device."); + CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Invalid data, the data send to device is null."); } return Status::OK(); } @@ -152,7 +152,8 @@ Status DeviceQueueOp::operator()() { } } if (tdtInstancePtr->acl_handle_ == nullptr) { - RETURN_STATUS_UNEXPECTED("Create channel for sending data failed, please check DEVICE ID setting."); + RETURN_STATUS_UNEXPECTED( + "[Internal ERROR] Create channel for sending data failed, please check DEVICE ID setting."); } RETURN_IF_NOT_OK(SendDataToAscend()); #endif @@ -343,7 +344,8 @@ Status DeviceQueueOp::SendRowToTdt(TensorRow curr_row, bool is_profiling_enable, #ifdef ENABLE_TDTQUE Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) { if (!create_data_info_queue_) { - return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "DataInfo queue is not created."); + return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, + "[Internal ERROR] DataInfo queue is not created."); } // This place has a race condition with operator(), so the first one // arrive here will do the initialize work. @@ -359,7 +361,7 @@ Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) { } #else Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) { - return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "GetDataInfo is not supported yet."); + return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "'GetDataInfo' only supported on Ascend."); } #endif @@ -446,7 +448,7 @@ Status DeviceQueueOp::PushDataToGPU() { if (!ps::PsDataPrefetch::GetInstance().PrefetchData(channel_name_, items[0].data_ptr_, items[0].data_len_, items[0].data_type_)) { return Status(StatusCode::kMDTimeOut, __LINE__, __FILE__, - "Failed to prefetch data in current PS mode(cache data when sending)."); + "[Internal ERROR] Failed to prefetch data in current PS mode(cache data when sending)."); } RETURN_IF_NOT_OK(RetryPushData(handle, items)); #ifndef ENABLE_SECURITY @@ -623,18 +625,19 @@ Status DeviceQueueOp::MallocForGPUData(std::vector *items, for (auto &sub_item : *items) { auto rc = pool_[worker_id]->Allocate(sub_item.data_len_, &sub_item.data_ptr_); if (rc.IsError() || sub_item.data_ptr_ == nullptr) { - return Status(StatusCode::kMDOutOfMemory, __LINE__, __FILE__, "Memory malloc failed."); + return Status(StatusCode::kMDOutOfMemory, __LINE__, __FILE__, "Memory malloc failed, check memory usage."); } if (curr_row[i] == nullptr) { - MS_LOG(ERROR) << "The pointer curr_row[" << i << "] is null"; - return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "TensorRow 'curr_row' contains nullptr."); + MS_LOG(ERROR) << "[Internal ERROR] The pointer curr_row[" << i << "] is null"; + return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, + "[Internal ERROR] TensorRow 'curr_row' contains nullptr."); } sub_item.data_type_ = curr_row[i]->type().ToString(); const unsigned char *column_data = curr_row[i]->GetBuffer(); if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data, static_cast(curr_row[i++]->SizeInBytes())) != 0) { - MS_LOG(ERROR) << "memcpy_s failed!"; - return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "memcpy failed when using memcpy_s do copy."); + MS_LOG(ERROR) << "[Internal ERROR] memcpy_s failed."; + return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "[Internal ERROR] memcpy_s failed."); } } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/epoch_ctrl_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/epoch_ctrl_op.cc index d6c49f6ff61..971f3f8ee4d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/epoch_ctrl_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/epoch_ctrl_op.cc @@ -43,7 +43,7 @@ void EpochCtrlOp::Print(std::ostream &out, bool show_all) const { Status EpochCtrlOp::GetNextRow(TensorRow *row) { RETURN_UNEXPECTED_IF_NULL(row); if (child_.empty()) { - RETURN_STATUS_UNEXPECTED("EpochCtrlOp can't be the leaf node(first operator) of pipeline."); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] EpochCtrlOp can't be the leaf node(first operator) of pipeline."); } // `retry_if_eoe` is false because EpochCtrlOp does not eat EOE. diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc index e8025540d65..ea9505aad23 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc @@ -143,7 +143,7 @@ Status FilterOp::WorkerCompute(const TensorRow &in_row, bool *out_predicate) { Status FilterOp::CheckInput(const TensorRow &input) const { for (auto &item : input) { if (item == nullptr) { - RETURN_STATUS_UNEXPECTED("Invalid data, input tensor is null."); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] input tensor is null."); } } return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.cc index 7195cb26ee5..1292e261602 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/map_op.cc @@ -192,7 +192,7 @@ Status MapOp::WorkerEntry(int32_t worker_id) { } RETURN_IF_NOT_OK(worker_out_queues_[worker_id]->EmplaceBack(std::move(in_row))); } else { - CHECK_FAIL_RETURN_UNEXPECTED(in_row.size() != 0, "MapOp got an empty TensorRow."); + CHECK_FAIL_RETURN_UNEXPECTED(in_row.size() != 0, "[Internal ERROR] MapOp got an empty TensorRow."); TensorRow out_row; // Perform the compute function of TensorOp(s) and store the result in new_tensor_table. RETURN_IF_NOT_OK(WorkerCompute(in_row, &out_row, job_list)); @@ -244,7 +244,11 @@ Status MapOp::WorkerCompute(const TensorRow &in_row, TensorRow *out_row, // Sanity check a row in result_table if (!result_table.empty() && out_columns_.size() != result_table[0].size()) { - RETURN_STATUS_UNEXPECTED("Result of a tensorOp doesn't match output column names"); + RETURN_STATUS_UNEXPECTED( + "Invalid columns, the number of columns returned in 'map' operations should match " + "the number of 'output_columns', but got the number of columns returned in 'map' operations: " + + std::to_string(result_table[0].size()) + + ", the number of 'output_columns': " + std::to_string(out_columns_.size()) + "."); } // Merging the data processed by job (result_table) with the data that are not used. @@ -299,7 +303,8 @@ Status MapOp::InitPrivateVariable(std::unordered_map *col_ if (in_columns_.empty()) { auto itr = std::find_if(col_name_id_map->begin(), col_name_id_map->end(), [](const auto &it) { return it.second == 0; }); - CHECK_FAIL_RETURN_UNEXPECTED(itr != col_name_id_map->end(), "Column name id map doesn't have id 0"); + CHECK_FAIL_RETURN_UNEXPECTED(itr != col_name_id_map->end(), + "[Internal ERROR] Column name id map doesn't have id 0"); MS_LOG(INFO) << "Input columns empty for map op, will apply to the first column in the current table."; in_columns_.push_back(itr->first); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.cc index 9f44adb0374..52d1f574b55 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/project_op.cc @@ -74,7 +74,7 @@ TensorRow ProjectOp::Project(const TensorRow &row) { // However, the ProjectOp is defined as a inlined operator, so it is invalid to launch the // functor since this op runs inlined inside another operator. The function is overloaded to // ensure that it is not called by mistake (it will generate an error). -Status ProjectOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. ProjectOp is an inlined operator."); } +Status ProjectOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ProjectOp is an inlined operator."); } Status ProjectOp::EoeReceived(int32_t worker_id) { state_ = OpState::kDeOpIdle; @@ -92,7 +92,7 @@ Status ProjectOp::ComputeColMap() { for (size_t i = 0; i < columns_to_project_.size(); i++) { std::string ¤t_column = columns_to_project_[i]; if (child_column_name_mapping.find(current_column) == child_column_name_mapping.end()) { - std::string err_msg = "Invalid parameter, column name: " + current_column + " does not exist in dataset."; + std::string err_msg = "Invalid column, column name: " + current_column + " does not exist."; RETURN_STATUS_UNEXPECTED(err_msg); } // Setup the new column name mapping for ourself (base class field) diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc index e2558510655..89af31bb069 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc @@ -41,7 +41,7 @@ Status RenameOp::GetNextRow(TensorRow *row) { return Status::OK(); } -Status RenameOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. RenameOp is an inlined operator."); } +Status RenameOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] RenameOp is an inlined operator."); } // Rename core functionality to compute the new column name id map. // We need to overwrite the super class ComputeColMap here because we're making a modification of the @@ -71,7 +71,7 @@ Status RenameOp::ComputeColMap() { MS_LOG(DEBUG) << "Rename operator index found " << index << " value " << id << "."; if (new_col_name.find(out_columns_[index]) != new_col_name.end()) { std::string err_msg( - "Invalid parameter, rename operation does not support rename one column name into another already exist " + "Invalid column, rename operation does not support rename one column name into another already exist " "column name, existing column name is: " + out_columns_[index] + "."); RETURN_STATUS_UNEXPECTED(err_msg); @@ -82,7 +82,7 @@ Status RenameOp::ComputeColMap() { // not found if (new_col_name.find(name) != new_col_name.end()) { std::string err_msg( - "Invalid parameter, rename operation does not support rename one column name into another already exist " + "Invalid column, rename operation does not support rename one column name into another already exist " "column name, existing column name is: " + name + "."); RETURN_STATUS_UNEXPECTED(err_msg); @@ -95,7 +95,7 @@ Status RenameOp::ComputeColMap() { // only checks number of renamed columns have been found, this input check doesn't check everything if (found != in_columns_.size()) { MS_LOG(DEBUG) << "Rename operator column names found: " << found << " out of " << in_columns_.size() << "."; - std::string err_msg = "Invalid parameter, column to be renamed does not exist in dataset."; + std::string err_msg = "Invalid column, column to be renamed does not exist."; RETURN_STATUS_UNEXPECTED(err_msg); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc index 3177325bf63..8e02389e814 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/repeat_op.cc @@ -60,7 +60,7 @@ void RepeatOp::Print(std::ostream &out, bool show_all) const { Status RepeatOp::GetNextRow(TensorRow *row) { RETURN_UNEXPECTED_IF_NULL(row); if (child_.empty()) { - RETURN_STATUS_UNEXPECTED("Pipeline init failed, RepeatOp can't be the first op in pipeline."); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] Pipeline init failed, RepeatOp can't be the first op in pipeline."); } RETURN_IF_NOT_OK(child_[0]->GetNextRow(row)); @@ -108,7 +108,7 @@ Status RepeatOp::EoeReceived(int32_t worker_id) { // However, the RepeatOp is defined as a inlined operator, so it is invalid to launch the // functor since this op runs inlined inside another operator. The function is overloaded to // ensure that it is not called by mistake (it will generate an error). -Status RepeatOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. RepeatOp is an inlined operator."); } +Status RepeatOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] RepeatOp is an inlined operator."); } // Base-class override for handling cases when an eof is received. Status RepeatOp::EofReceived(int32_t worker_id) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc index ef47844ce27..3912a6178aa 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc @@ -205,7 +205,8 @@ Status ShuffleOp::InitShuffleBuffer() { // rows. if (shuffle_buffer_state_ != kShuffleStateInit) { return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, - "Invalid shuffle buffer state, shuffle buffer should be init first or reset after each epoch."); + "[Internal ERROR] Invalid shuffle buffer state, shuffle buffer should be init first or reset " + "after each epoch."); } // Before we drop into the fetching loop, call the fetch once for the first time @@ -220,7 +221,7 @@ Status ShuffleOp::InitShuffleBuffer() { } if (new_row.empty()) { - RETURN_STATUS_UNEXPECTED("Invalid data, unable to fetch a single row for shuffle buffer."); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] Unable to fetch a single row for shuffle buffer."); } // Now fill the rest of the shuffle buffer until we are unable to get the next row or we reached diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc index c7d1cb648b9..295089b018c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/skip_op.cc @@ -43,7 +43,7 @@ void SkipOp::Print(std::ostream &out, bool show_all) const { } } -Status SkipOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); } +Status SkipOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] SkipOp is an inlined operator."); } Status SkipOp::GetNextRow(TensorRow *row) { RETURN_UNEXPECTED_IF_NULL(row); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc index 84c0cbfc52b..aad8f15fa43 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc @@ -64,7 +64,7 @@ Status AlbumOp::PrepareData() { dirname_offset_ = folder_path_.length(); std::shared_ptr dirItr = Path::DirIterator::OpenDirectory(&folder); if (!folder.Exists() || dirItr == nullptr) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_ + "."); + RETURN_STATUS_UNEXPECTED("Invalid folder, " + folder_path_ + " does not exist or permission denied."); } MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << "."; @@ -94,7 +94,7 @@ Status AlbumOp::PrepareData() { // This function does not return status because we want to just skip bad input, not crash bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { if (valid == nullptr) { - MS_LOG(ERROR) << "Album parameter can't be nullptr."; + MS_LOG(ERROR) << "[Internal ERROR] Album parameter can't be nullptr."; return false; } std::ifstream file_handle; @@ -214,8 +214,8 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label)); } else { - RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither int32 nor int64, it is " + - data_schema_->Column(col_num).Type().ToString()); + RETURN_STATUS_UNEXPECTED("Invalid column type, column type of " + data_schema_->Column(col_num).Name() + + " should be int32 or int64, but got " + data_schema_->Column(col_num).Type().ToString()); } row->push_back(std::move(label)); return Status::OK(); @@ -243,7 +243,8 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array)); } else { - RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither float32 nor float64, it is " + + RETURN_STATUS_UNEXPECTED("Invalid column type, column type of " + data_schema_->Column(col_num).Name() + + " should be float32 nor float64, but got " + data_schema_->Column(col_num).Type().ToString()); } row->push_back(std::move(float_array)); @@ -323,7 +324,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { std::ifstream file_handle(folder_path_ + file); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file); + RETURN_STATUS_UNEXPECTED("Invalid json file, " + folder_path_ + file + " does not exist or permission denied."); } std::string line; while (getline(file_handle, line)) { @@ -342,7 +343,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { } } catch (const std::exception &err) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file); + RETURN_STATUS_UNEXPECTED("Invalid file, " + folder_path_ + file + " load failed: " + std::string(err.what())); } } file_handle.close(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc index ea94851af69..5e9affec769 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc @@ -60,16 +60,16 @@ Status CelebAOp::ParseAttrFile() { auto realpath = FileUtils::GetRealPath((folder_path / "list_attr_celeba.txt").ToString().data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << (folder_path / "list_attr_celeba.txt").ToString(); - RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + - (folder_path / "list_attr_celeba.txt").ToString()); + MS_LOG(ERROR) << "Invalid file path, " << (folder_path / "list_attr_celeba.txt").ToString() << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, " + (folder_path / "list_attr_celeba.txt").ToString() + + " does not exist."); } std::ifstream attr_file(realpath.value()); if (!attr_file.is_open()) { std::string attr_file_name = (folder_path / "list_attr_celeba.txt").ToString(); return Status(StatusCode::kMDFileNotExist, __LINE__, __FILE__, - "Invalid file, failed to open Celeba attr file: " + attr_file_name); + "Invalid attr file, failed to open: " + attr_file_name + ", permission denied."); } attr_file_ = (folder_path / "list_attr_celeba.txt").ToString(); @@ -89,12 +89,11 @@ Status CelebAOp::ParseAttrFile() { try { num_rows_in_attr_file_ = static_cast(std::stoul(rows_num)); // First line is rows number in attr file } catch (std::invalid_argument &e) { - RETURN_STATUS_UNEXPECTED( - "Invalid data, failed to convert rows_num from attr_file to unsigned long, invalid value: " + rows_num + "."); + RETURN_STATUS_UNEXPECTED("Invalid rows_num, failed to convert rows_num: " + rows_num + " to unsigned long in " + + attr_file_ + "."); } catch (std::out_of_range &e) { - RETURN_STATUS_UNEXPECTED( - "Invalid data, failed to convert rows_num from attr_file to unsigned long, value out of range: " + rows_num + - "."); + RETURN_STATUS_UNEXPECTED("Invalid rows_num, rows_num in " + attr_file_ + " is out of range, rows_num is " + + rows_num + "."); } (void)getline(attr_file, attr_name); // Second line is attribute name,ignore it @@ -125,8 +124,8 @@ bool CelebAOp::CheckDatasetTypeValid() { Path folder_path(folder_path_); partition_file_.open((folder_path / "list_eval_partition.txt").ToString()); if (!partition_file_.is_open()) { - MS_LOG(ERROR) << "Invalid file, fail to open CelebA partition file, path=" - << (folder_path / "list_eval_partition.txt").ToString(); + MS_LOG(ERROR) << "Invalid eval partition file, failed to open eval partition file: " + << (folder_path / "list_eval_partition.txt").ToString() << " does not exist or permission denied."; return false; } } @@ -140,10 +139,12 @@ bool CelebAOp::CheckDatasetTypeValid() { try { type = std::stoi(vec[1]); } catch (std::invalid_argument &e) { - MS_LOG(WARNING) << "Invalid data, failed to convert to int, invalid value: " << vec[1] << "."; + MS_LOG(WARNING) << "Invalid number, the second word in list_eval_partition.txt should be numeric, but got: " + << vec[1] << "."; return false; } catch (std::out_of_range &e) { - MS_LOG(WARNING) << "Invalid data, failed to convert to int, value out of range: " << vec[1] << "."; + MS_LOG(WARNING) << "Invalid number, the second word in list_eval_partition.txt is out of range, word is: " << vec[1] + << "."; return false; } // train:0, valid=1, test=2 @@ -185,12 +186,11 @@ Status CelebAOp::PrepareData() { try { value = std::stoi(split[label_index]); } catch (std::invalid_argument &e) { - RETURN_STATUS_UNEXPECTED("Invalid data, failed to convert item from attr_file to int, corresponding value: " + - split[label_index] + "."); + RETURN_STATUS_UNEXPECTED("Invalid label index, the label index in " + file_path.ToString() + + " should be numeric, but got: " + split[label_index] + "."); } catch (std::out_of_range &e) { - RETURN_STATUS_UNEXPECTED( - "Invalid data, failed to convert item from attr_file to int as out of range, corresponding value: " + - split[label_index] + "."); + RETURN_STATUS_UNEXPECTED("Invalid label index, the label index in " + file_path.ToString() + + " is out of range, index is " + split[label_index] + "."); } image_labels.second.push_back(value); } @@ -242,7 +242,8 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { Status rc = Decode(image, &image); if (rc.IsError()) { image = nullptr; - std::string err_msg = "Invalid data, failed to decode image: " + image_path.ToString(); + std::string err_msg = + "Invalid image, " + image_path.ToString() + " decode failed, the image is broken or permission denied."; return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); } } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc index 3f1338bd4f4..7006803dd3a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc @@ -113,7 +113,7 @@ Status CifarOp::ReadCifar10BlockData() { // check the validity of the file path Path file_path(file); CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), - "Invalid file, failed to find cifar10 file: " + file); + "Invalid cifar10 file, " + file + " does not exist or is a directory."); std::string file_name = file_path.Basename(); if (usage_ == "train") { @@ -125,12 +125,12 @@ Status CifarOp::ReadCifar10BlockData() { } std::ifstream in(file, std::ios::binary); - CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar10 file: " + file + - ", make sure file not damaged or permission denied."); + CHECK_FAIL_RETURN_UNEXPECTED( + in.is_open(), "Invalid cifar10 file, failed to open " + file + ", the file is damaged or permission denied."); for (uint32_t index = 0; index < num_cifar10_records / kCifarBlockImageNum; ++index) { (void)in.read(reinterpret_cast(&(image_data[0])), block_size * sizeof(unsigned char)); - CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar10 file: " + file + + CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid cifar10 file, failed to read data from: " + file + ", re-download dataset(make sure it is CIFAR-10 binary version)."); (void)cifar_raw_data_block_->EmplaceBack(image_data); // Add file path info @@ -155,7 +155,7 @@ Status CifarOp::ReadCifar100BlockData() { // check the validity of the file path Path file_path(file); CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), - "Invalid file, failed to find cifar100 file: " + file); + "Invalid cifar100 file, " + file + " does not exist or is a directory."); std::string file_name = file_path.Basename(); // if usage is train/test, get only these 2 files @@ -167,16 +167,16 @@ Status CifarOp::ReadCifar100BlockData() { } else if (file_name.find("train") != std::string::npos) { num_cifar100_records = num_cifar100_train_records; } else { - RETURN_STATUS_UNEXPECTED("Invalid file, Cifar100 train/test file not found in: " + file_name); + RETURN_STATUS_UNEXPECTED("Invalid cifar100 file, Cifar100 train/test file is missing in: " + file_name); } std::ifstream in(file, std::ios::binary); - CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar100 file: " + file + - ", make sure file not damaged or permission denied."); + CHECK_FAIL_RETURN_UNEXPECTED( + in.is_open(), "Invalid cifar100 file, failed to open " + file + ", the file is damaged or permission denied."); for (uint32_t index = 0; index < num_cifar100_records / kCifarBlockImageNum; index++) { (void)in.read(reinterpret_cast(&(image_data[0])), block_size * sizeof(unsigned char)); - CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar100 file: " + file + + CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid cifar100 file, failed to read data from: " + file + ", re-download dataset(make sure it is CIFAR-100 binary version)."); (void)cifar_raw_data_block_->EmplaceBack(image_data); // Add file path info @@ -200,10 +200,10 @@ Status CifarOp::GetCifarFiles() { } } } else { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open directory: " + dir_path.ToString() + - ", make sure file not damaged or permission denied."); + RETURN_STATUS_UNEXPECTED("Invalid directory, " + dir_path.ToString() + " is not a directory or permission denied."); } - CHECK_FAIL_RETURN_UNEXPECTED(!cifar_files_.empty(), "Invalid file, no .bin files found under " + folder_path_); + CHECK_FAIL_RETURN_UNEXPECTED(!cifar_files_.empty(), + "Invalid cifar folder, cifar(.bin) files are missing under " + folder_path_); std::sort(cifar_files_.begin(), cifar_files_.end()); return Status::OK(); } @@ -306,9 +306,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, constexpr int64_t num_cifar10_records = 10000; for (auto &file : op->cifar_files_) { Path file_path(file); - CHECK_FAIL_RETURN_UNEXPECTED( - file_path.Exists() && !file_path.IsDirectory(), - "Invalid file, failed to open cifar10 file: " + file + ", make sure file not damaged or permission denied."); + CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), + "Invalid cifar10 file, " + file + " does not exist or is a directory."); std::string file_name = file_path.Basename(); if (op->usage_ == "train") { @@ -321,8 +320,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, std::ifstream in(file, std::ios::binary); - CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar10 file: " + file + - ", make sure file not damaged or permission denied."); + CHECK_FAIL_RETURN_UNEXPECTED( + in.is_open(), "Invalid cifar10 file, failed to open " + file + ", the file is damaged or permission denied."); *count = *count + num_cifar10_records; } return Status::OK(); @@ -334,9 +333,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, Path file_path(file); std::string file_name = file_path.Basename(); - CHECK_FAIL_RETURN_UNEXPECTED( - file_path.Exists() && !file_path.IsDirectory(), - "Invalid file, failed to find cifar100 file: " + file + ", make sure file not damaged or permission denied."); + CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), + "Invalid cifar100 file, " + file + " does not exist or is a directory."); if (op->usage_ == "train" && file_path.Basename().find("train") == std::string::npos) continue; if (op->usage_ == "test" && file_path.Basename().find("test") == std::string::npos) continue; @@ -347,8 +345,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, num_cifar100_records += kCifar100RecordsPerTrainFile; } std::ifstream in(file, std::ios::binary); - CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar100 file: " + file + - ", make sure file not damaged or permission denied."); + CHECK_FAIL_RETURN_UNEXPECTED( + in.is_open(), "Invalid cifar100 file, failed to open " + file + ", the file is damaged or permission denied."); } *count = num_cifar100_records; return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cityscapes_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cityscapes_op.cc index 4f921e9dd0e..a060401390c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cityscapes_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cityscapes_op.cc @@ -56,7 +56,8 @@ Status CityscapesOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { } else { std::ifstream file_handle(data.second); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + data.second); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + data.second + + ", the json is damaged or permission denied."); } std::string contents((std::istreambuf_iterator(file_handle)), std::istreambuf_iterator()); nlohmann::json contents_js = nlohmann::json::parse(contents); @@ -71,13 +72,15 @@ Status CityscapesOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { if (decode_ == true) { Status rc = Decode(image, &image); if (rc.IsError()) { - std::string err = "Invalid data, failed to decode image: " + data.first; + std::string err = + "Invalid image, failed to decode " + data.first + ", the image is damaged or permission denied."; RETURN_STATUS_UNEXPECTED(err); } if (task_ != taskSuffix) { Status rc_t = Decode(task, &task); if (rc_t.IsError()) { - std::string err_t = "Invalid data, failed to decode image: " + data.second; + std::string err_t = + "Invalid image, failed to decode " + data.second + ", the image is damaged or permission denied."; RETURN_STATUS_UNEXPECTED(err_t); } } @@ -106,8 +109,8 @@ void CityscapesOp::Print(std::ostream &out, bool show_all) const { Status CityscapesOp::PrepareData() { auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data()); if (!real_dataset_dir.has_value()) { - MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_; - RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_); + MS_LOG(ERROR) << "Invalid file path, Cityscapes Dataset dir: " << dataset_dir_ << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, Cityscapes Dataset dir: " + dataset_dir_ + " does not exist."); } Path dataset_dir(real_dataset_dir.value()); @@ -143,15 +146,18 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con Path images_dir_p(images_dir); if (!images_dir_p.IsDirectory()) { - RETURN_STATUS_UNEXPECTED("Invalid path, " + images_dir_p.ToString() + " is an invalid directory path."); + RETURN_STATUS_UNEXPECTED("Invalid path, Cityscapes Dataset image dir: " + images_dir_p.ToString() + + " is not a directory path."); } Path task_dir_p(task_dir); if (!task_dir_p.IsDirectory()) { - RETURN_STATUS_UNEXPECTED("Invalid path, " + task_dir_p.ToString() + " is an invalid directory path."); + RETURN_STATUS_UNEXPECTED("Invalid path, Cityscapes Dataset task dir: " + task_dir_p.ToString() + + " is not a directory path."); } std::shared_ptr d_it = Path::DirIterator::OpenDirectory(&images_dir_p); if (d_it == nullptr) { - RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + images_dir_p.ToString()); + RETURN_STATUS_UNEXPECTED("Invalid path, failed to open Cityscapes Dataset image directory: " + + images_dir_p.ToString()); } while (d_it->HasNext()) { @@ -165,7 +171,8 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con Path task_city_dir = task_dir_p / city_dir.Basename(); std::shared_ptr img_city_it = Path::DirIterator::OpenDirectory(&img_city_dir); if (img_city_it == nullptr) { - RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + img_city_dir.ToString()); + RETURN_STATUS_UNEXPECTED("Invalid path, failed to open Cityscapes Dataset image city directory: " + + img_city_dir.ToString()); } while (img_city_it->HasNext()) { @@ -179,13 +186,15 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con Path task_file_path = task_city_dir / (img_file_name.substr(0, img_file_name.find("_leftImg8bit")) + "_" + GetTaskSuffix(task_, real_quality_mode)); if (!task_file_path.Exists()) { - RETURN_STATUS_UNEXPECTED("Invalid file, " + task_file_path.ToString() + " not found."); + RETURN_STATUS_UNEXPECTED("Invalid file, Cityscapes Dataset task file: " + task_file_path.ToString() + + " does not exist."); } image_task_map_[image_file_path.ToString()] = task_file_path.ToString(); } } catch (const std::exception &err) { - RETURN_STATUS_UNEXPECTED("Invalid path, failed to load Cityscapes Dataset: " + dataset_dir_); + RETURN_STATUS_UNEXPECTED("Invalid path, failed to load Cityscapes Dataset from " + dataset_dir_ + ": " + + std::string(err.what())); } } @@ -213,7 +222,9 @@ Status CityscapesOp::CountDatasetInfo() { num_rows_ = static_cast(image_task_pairs_.size()); if (num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( - "Invalid data, no valid data matching the dataset API CityscapesDataset. Please check file path or dataset API."); + "Invalid data, no valid data matching the dataset API 'CityscapesDataset'. Please check dataset API or file " + "path: " + + dataset_dir_ + "."); } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc index 4ea857908f6..cdeee7a2fcd 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc @@ -55,7 +55,7 @@ Status ClueOp::GetValue(const nlohmann::json &js, std::vector key_c if (cursor.find(key_chain[i]) != cursor.end()) { cursor = cursor[key_chain[i]]; } else { - RETURN_STATUS_UNEXPECTED("Invalid data, in given JSON file, failed to find key: " + key_chain[i]); + RETURN_STATUS_UNEXPECTED("Invalid json file, in given JSON file, failed to find key: " + key_chain[i]); } } std::string final_str = key_chain.back(); @@ -84,13 +84,13 @@ Status ClueOp::GetValue(const nlohmann::json &js, std::vector key_c Status ClueOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) { auto realpath = FileUtils::GetRealPath(file.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file; - RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file); + std::string err_msg = "Invalid file path, " + file + " does not exist."; + LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } std::ifstream handle(realpath.value()); if (!handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + file); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file + ", the file is damaged or permission denied."); } int64_t rows_total = 0; @@ -115,7 +115,7 @@ Status ClueOp::LoadFile(const std::string &file, int64_t start_offset, int64_t e js = nlohmann::json::parse(line); } catch (const std::exception &err) { // Catch any exception and convert to Status return code - RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse JSON file: " + file); + RETURN_STATUS_UNEXPECTED("Invalid json, failed to parse " + file + ", " + std::string(err.what())); } int cols_count = cols_to_keyword_.size(); TensorRow t_row(cols_count, nullptr); @@ -219,7 +219,7 @@ Status ClueOp::CalculateNumRowsPerShard() { } std::string file_list = ss.str(); RETURN_STATUS_UNEXPECTED( - "Invalid data, CLUEDataset API can't read the data file (interface mismatch or no data found). " + "Invalid data, 'CLUEDataset' API can't read the data file (interface mismatch or no data found). " "Check file path:" + file_list); } @@ -232,13 +232,13 @@ Status ClueOp::CalculateNumRowsPerShard() { int64_t CountTotalRowsPerFile(const std::string &file) { auto realpath = FileUtils::GetRealPath(file.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Get real path failed, path=" << file; + MS_LOG(ERROR) << "Invalid file, " << file << " does not exist."; return 0; } std::ifstream handle(realpath.value()); if (!handle.is_open()) { - MS_LOG(ERROR) << "Invalid file, failed to open file: " << file; + MS_LOG(ERROR) << "Invalid file, failed to open " << file << ": the file is damaged or permission denied."; return 0; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc index 791b5069864..16003ada4ce 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc @@ -78,8 +78,8 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { std::shared_ptr image, coordinate; auto itr = coordinate_map_.find(image_id); if (itr == coordinate_map_.end()) { - RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + - " in annotation node is not found in image node in JSON file."); + RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + image_id + + " is missing from image node in annotation file: " + annotation_path_); } std::string kImageFile = image_folder_path_ + std::string("/") + image_id; @@ -115,7 +115,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { } else if (task_type_ == TaskType::Panoptic) { RETURN_IF_NOT_OK(LoadMixTensorRow(row_id, image_id, image, coordinate, trow)); } else { - RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff or Panoptic."); + RETURN_STATUS_UNEXPECTED("Invalid task, task type should be Detection, Stuff, Keypoint or Panoptic."); } return Status::OK(); @@ -128,8 +128,8 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima std::vector iscrowd_row; auto itr_item = simple_item_map_.find(image_id); if (itr_item == simple_item_map_.end()) { - RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + - " in annotation node is not found in image node in JSON file."); + RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + image_id + + " is missing in the node of image from annotation file: " + annotation_path_ + "."); } std::vector annotation = itr_item->second; @@ -153,7 +153,7 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima std::string img_id; size_t pos = image_id.find("."); if (pos == std::string::npos) { - RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\""); + RETURN_STATUS_UNEXPECTED("Invalid image, 'image_id': " + image_id + " should be with suffix like \".jpg\""); } std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id)); std::shared_ptr filename; @@ -171,8 +171,8 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_ std::vector item_queue; auto itr_item = simple_item_map_.find(image_id); if (itr_item == simple_item_map_.end()) { - RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + - " in annotation node is not found in image node in JSON file."); + RETURN_STATUS_UNEXPECTED("Invalid image_id, the attribute of 'image_id': " + image_id + + " is missing in the node of 'image' from annotation file: " + annotation_path_); } item_queue = itr_item->second; @@ -186,7 +186,7 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_ std::string img_id; size_t pos = image_id.find("."); if (pos == std::string::npos) { - RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\""); + RETURN_STATUS_UNEXPECTED("Invalid image, 'image_id': " + image_id + " should be with suffix like \".jpg\""); } std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id)); std::shared_ptr filename; @@ -206,8 +206,8 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, std::vector area_row; auto itr_item = simple_item_map_.find(image_id); if (itr_item == simple_item_map_.end()) { - RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id + - " in annotation node is not found in image node in JSON file."); + RETURN_STATUS_UNEXPECTED("Invalid image_id, the attribute of 'image_id': " + image_id + + " is missing in the node of 'image' from annotation file: " + annotation_path_); } std::vector annotation = itr_item->second; @@ -237,7 +237,7 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, std::string img_id; size_t pos = image_id.find("."); if (pos == std::string::npos) { - RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\""); + RETURN_STATUS_UNEXPECTED("Invalid image, " + image_id + " should be with suffix like \".jpg\""); } std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id)); std::shared_ptr filename; @@ -252,7 +252,9 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, template Status CocoOp::SearchNodeInJson(const nlohmann::json &input_tree, std::string node_name, T *output_node) { auto node = input_tree.find(node_name); - CHECK_FAIL_RETURN_UNEXPECTED(node != input_tree.end(), "Invalid data, required node not found in JSON: " + node_name); + CHECK_FAIL_RETURN_UNEXPECTED(node != input_tree.end(), "Invalid annotation, the attribute of '" + node_name + + "' is missing in annotation file: " + annotation_path_ + + "."); (*output_node) = *node; return Status::OK(); } @@ -262,17 +264,19 @@ Status CocoOp::PrepareData() { try { auto realpath = FileUtils::GetRealPath(annotation_path_.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << annotation_path_; - RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + annotation_path_); + std::string err_msg = "Invalid file path, Coco Dataset annotation file: " + annotation_path_ + " does not exist."; + LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); } std::ifstream in(realpath.value()); if (!in.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open annotation file: " + annotation_path_); + RETURN_STATUS_UNEXPECTED("Invalid annotation file, Coco Dataset annotation file: " + annotation_path_ + + " open failed, permission denied!"); } in >> js; } catch (const std::exception &err) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open JSON file: " + annotation_path_ + "."); + RETURN_STATUS_UNEXPECTED("Invalid annotation file, Coco Dataset annotation file:" + annotation_path_ + + " load failed, error description: " + std::string(err.what())); } std::vector image_que; @@ -292,8 +296,8 @@ Status CocoOp::PrepareData() { RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonAnnoImageId), &image_id)); auto itr_file = image_index_.find(image_id); if (itr_file == image_index_.end()) { - RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + std::to_string(image_id) + - " in annotation node is not found in image node in JSON file."); + RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + std::to_string(image_id) + + " is missing in the node of 'image' from annotation file: " + annotation_path_); } file_name = itr_file->second; switch (task_type_) { @@ -313,7 +317,7 @@ Status CocoOp::PrepareData() { RETURN_IF_NOT_OK(PanopticColumnLoad(annotation, file_name, image_id)); break; default: - RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic."); + RETURN_STATUS_UNEXPECTED("Invalid task, task type should be Detection, Stuff, Keypoint or Panoptic."); } } for (auto img : image_que) { @@ -322,7 +326,7 @@ Status CocoOp::PrepareData() { num_rows_ = image_ids_.size(); if (num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( - "Invalid data, CocoDataset API can't read the data file (interface mismatch or no data found). " + "Invalid data, 'CocoDataset' API can't read the data file (interface mismatch or no data found). " "Check file in directory: " + image_folder_path_ + "."); } @@ -331,7 +335,8 @@ Status CocoOp::PrepareData() { Status CocoOp::ImageColumnLoad(const nlohmann::json &image_tree, std::vector *image_vec) { if (image_tree.size() == 0) { - RETURN_STATUS_UNEXPECTED("Invalid data, no \"image\" node found in JSON file: " + annotation_path_ + "."); + RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'image' node is missing in annotation file: " + annotation_path_ + + "."); } for (auto img : image_tree) { std::string file_name; @@ -354,8 +359,8 @@ Status CocoOp::DetectionColumnLoad(const nlohmann::json &annotation_tree, const RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoCategoryId), &category_id)); auto search_category = category_set_.find(category_id); if (search_category == category_set_.end()) - RETURN_STATUS_UNEXPECTED( - "Invalid data, category_id can't find in categories where category_id: " + std::to_string(category_id) + "."); + RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'category_id': " + std::to_string(category_id) + + " is missing in the node of 'categories' from annotation file: " + annotation_path_); auto node_iscrowd = annotation_tree.find(kJsonAnnoIscrowd); if (node_iscrowd != annotation_tree.end()) iscrowd = *node_iscrowd; bbox.insert(bbox.end(), node_bbox.begin(), node_bbox.end()); @@ -392,13 +397,13 @@ Status CocoOp::KeypointColumnLoad(const nlohmann::json &annotation_tree, const s const int32_t &unique_id) { auto itr_num_keypoint = annotation_tree.find(kJsonAnnoNumKeypoints); if (itr_num_keypoint == annotation_tree.end()) - RETURN_STATUS_UNEXPECTED( - "Invalid data, no num_keypoint found in annotation file where image_id: " + std::to_string(unique_id) + "."); + RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'num_keypoint' node is missing in annotation file: " + + annotation_path_ + " where 'image_id': " + std::to_string(unique_id) + "."); simple_item_map_[image_file].push_back(*itr_num_keypoint); auto itr_keypoint = annotation_tree.find(kJsonAnnoKeypoints); if (itr_keypoint == annotation_tree.end()) - RETURN_STATUS_UNEXPECTED( - "Invalid data, no keypoint found in annotation file where image_id: " + std::to_string(unique_id) + "."); + RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'keypoint' node is missing in annotation file: " + + annotation_path_ + " where 'image_id': " + std::to_string(unique_id) + "."); coordinate_map_[image_file].push_back(*itr_keypoint); return Status::OK(); } @@ -407,31 +412,34 @@ Status CocoOp::PanopticColumnLoad(const nlohmann::json &annotation_tree, const s const int32_t &image_id) { auto itr_segments = annotation_tree.find(kJsonAnnoSegmentsInfo); if (itr_segments == annotation_tree.end()) - RETURN_STATUS_UNEXPECTED( - "Invalid data, no segments_info found in annotation file where image_id: " + std::to_string(image_id) + "."); + RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'segments_info' node is missing in annotation file: " + + annotation_path_ + " where 'image_id': " + std::to_string(image_id) + "."); for (auto info : *itr_segments) { std::vector bbox; uint32_t category_id = 0; auto itr_bbox = info.find(kJsonAnnoBbox); if (itr_bbox == info.end()) - RETURN_STATUS_UNEXPECTED("Invalid data, no bbox found in segments_info(in annotation file) where image_id: " + - std::to_string(image_id) + "."); + RETURN_STATUS_UNEXPECTED( + "Invalid annotation, the 'bbox' attribute is missing in the node of 'segments_info' where 'image_id': " + + std::to_string(image_id) + " from annotation file: " + annotation_path_ + "."); bbox.insert(bbox.end(), itr_bbox->begin(), itr_bbox->end()); coordinate_map_[image_file].push_back(bbox); RETURN_IF_NOT_OK(SearchNodeInJson(info, std::string(kJsonAnnoCategoryId), &category_id)); auto search_category = category_set_.find(category_id); if (search_category == category_set_.end()) - RETURN_STATUS_UNEXPECTED( - "Invalid data, category_id can't find in categories where category_id: " + std::to_string(category_id) + "."); + RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'category_id': " + std::to_string(category_id) + + " is missing in the node of 'categories' from " + annotation_path_ + "."); auto itr_iscrowd = info.find(kJsonAnnoIscrowd); if (itr_iscrowd == info.end()) RETURN_STATUS_UNEXPECTED( - "Invalid data, no iscrowd found in segments_info where image_id: " + std::to_string(image_id) + "."); + "Invalid annotation, the attribute of 'iscrowd' is missing in the node of 'segments_info' where 'image_id': " + + std::to_string(image_id) + " from annotation file: " + annotation_path_ + "."); auto itr_area = info.find(kJsonAnnoArea); if (itr_area == info.end()) RETURN_STATUS_UNEXPECTED( - "Invalid data, no area found in segments_info where image_id: " + std::to_string(image_id) + "."); + "Invalid annotation, the attribute of 'area' is missing in the node of 'segments_info' where 'image_id': " + + std::to_string(image_id) + " from annotation file: " + annotation_path_ + "."); simple_item_map_[image_file].push_back(category_id); simple_item_map_[image_file].push_back(*itr_iscrowd); simple_item_map_[image_file].push_back(*itr_area); @@ -441,7 +449,8 @@ Status CocoOp::PanopticColumnLoad(const nlohmann::json &annotation_tree, const s Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) { if (categories_tree.size() == 0) { - RETURN_STATUS_UNEXPECTED("Invalid data, no categories found in annotation_path: " + annotation_path_); + RETURN_STATUS_UNEXPECTED( + "Invalid annotation, the 'categories' node is missing in annotation file: " + annotation_path_ + "."); } for (auto category : categories_tree) { int32_t id = 0; @@ -449,7 +458,9 @@ Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) { std::vector label_info; auto itr_id = category.find(kJsonId); if (itr_id == category.end()) { - RETURN_STATUS_UNEXPECTED("Invalid data, no JSON id found in categories of " + annotation_path_); + RETURN_STATUS_UNEXPECTED( + "Invalid annotation, the attribute of 'id' is missing in the node of 'categories' from annotation file: " + + annotation_path_); } id = *itr_id; label_info.push_back(id); @@ -458,13 +469,16 @@ Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) { auto itr_name = category.find(kJsonCategoriesName); CHECK_FAIL_RETURN_UNEXPECTED( itr_name != category.end(), - "Invalid data, no categories name found in categories where id: " + std::to_string(id)); + "Invalid annotation, the attribute of 'name' is missing in the node of 'categories' where 'id': " + + std::to_string(id)); name = *itr_name; if (task_type_ == TaskType::Panoptic) { auto itr_isthing = category.find(kJsonCategoriesIsthing); CHECK_FAIL_RETURN_UNEXPECTED(itr_isthing != category.end(), - "Invalid data, nothing found in categories of " + annotation_path_); + "Invalid annotation, the attribute of 'isthing' is missing in the node of " + "'categories' from annotation file: " + + annotation_path_); label_info.push_back(*itr_isthing); } label_index_.emplace_back(std::make_pair(name, label_info)); @@ -477,7 +491,8 @@ Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &c if (decode_ == true) { Status rc = Decode(*tensor, tensor); - CHECK_FAIL_RETURN_UNEXPECTED(rc.IsOk(), "Invalid data, failed to decode image: " + path); + CHECK_FAIL_RETURN_UNEXPECTED( + rc.IsOk(), "Invalid image, failed to decode " + path + ": the image is broken or permission denied."); } return Status::OK(); } @@ -505,8 +520,8 @@ Status CocoOp::GetClassIndexing(std::vector t; if (cur_col_ >= column_default_.size()) { - err_message_ = "Number of file columns does not match the default records"; + std::stringstream ss; + ss << "Invalid columns, the size of column_names should be less than the size of 'column_defaults', " + << "but got the size of column_names: " << cur_col_ + << ", the size of column_defaults : " << column_default_.size() << "."; + err_message_ = ss.str(); return -1; } Status rc; @@ -139,7 +143,11 @@ int CsvOp::CsvParser::PutRecord(int c) { break; } if (cur_col_ >= cur_row_.size()) { - err_message_ = "Number of file columns does not match the tensor table"; + std::stringstream ss; + ss << "Invalid columns, the size of column_names should be greater than or equal to the size of columns of " + << "loading data, but got the size of column_names: " << cur_col_ + << ", the size of columns in original loaded dataset: " << column_default_.size() << "."; + err_message_ = ss.str(); return -1; } cur_row_[cur_col_] = std::move(t); @@ -166,7 +174,11 @@ int CsvOp::CsvParser::PutRow(int c) { } if (cur_col_ != column_default_.size()) { - err_message_ = "The number of columns does not match the definition."; + std::stringstream ss; + ss << "Invalid columns, the size of column_names should be less than the size of 'column_defaults', " + << "but got the size of column_names: " << cur_col_ + << ", the size of 'column_defaults': " << column_default_.size() << "."; + err_message_ = ss.str(); return -1; } @@ -201,11 +213,11 @@ int CsvOp::CsvParser::EndFile(int c) { int CsvOp::CsvParser::CatchException(int c) { if (GetMessage(c) == Message::MS_QUOTE && cur_state_ == State::UNQUOTE) { - err_message_ = "Invalid quote in unquote field."; + err_message_ = "Invalid csv file, unexpected quote in unquote field from " + file_path_ + "."; } else if (GetMessage(c) == Message::MS_END_OF_FILE && cur_state_ == State::QUOTE) { - err_message_ = "Reach the end of file in quote field."; + err_message_ = "Invalid csv file, reach the end of file in quote field, check " + file_path_ + "."; } else if (GetMessage(c) == Message::MS_NORMAL && cur_state_ == State::SECOND_QUOTE) { - err_message_ = "Receive unquote char in quote field."; + err_message_ = "Invalid csv file, receive unquote char in quote field, check " + file_path_ + "."; } return -1; } @@ -459,14 +471,14 @@ Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t en auto realpath = FileUtils::GetRealPath(file.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << file; - RETURN_STATUS_UNEXPECTED("Invalid file, " + DatasetName() + " file get real path failed, path=" + file); + MS_LOG(ERROR) << "Invalid file path, " << file << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, " + file + " does not exist."); } std::ifstream ifs; ifs.open(realpath.value(), std::ifstream::in); if (!ifs.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + " file: " + file); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file + ", the file is damaged or permission denied."); } if (column_name_list_.empty()) { std::string tmp; @@ -483,17 +495,18 @@ Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t en if (err != 0) { // if error code is -2, the returned error is interrupted if (err == -2) return Status(kMDInterrupted); - RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse file: " + file + ": line " + + RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse csv file: " + file + " at line " + std::to_string(csv_parser.GetTotalRows() + 1) + ". Error message: " + csv_parser.GetErrorMessage()); } } } catch (std::invalid_argument &ia) { std::string err_row = std::to_string(csv_parser.GetTotalRows() + 1); - RETURN_STATUS_UNEXPECTED("Invalid data, " + file + ": line " + err_row + ", type does not match."); + RETURN_STATUS_UNEXPECTED("Invalid csv, csv file: " + file + " parse failed at line " + err_row + + ", type does not match."); } catch (std::out_of_range &oor) { std::string err_row = std::to_string(csv_parser.GetTotalRows() + 1); - RETURN_STATUS_UNEXPECTED("Invalid data, " + file + ": line " + err_row + ", value out of range."); + RETURN_STATUS_UNEXPECTED("Invalid csv, " + file + " parse failed at line " + err_row + " : value out of range."); } return Status::OK(); } @@ -594,13 +607,14 @@ int64_t CsvOp::CountTotalRows(const std::string &file) { CsvParser csv_parser(0, jagged_rows_connector_.get(), field_delim_, column_default_list_, file); Status rc = csv_parser.InitCsvParser(); if (rc.IsError()) { - MS_LOG(ERROR) << "[Internal ERROR], failed to initialize " + DatasetName(true) + " Parser. Error:" << rc; + MS_LOG(ERROR) << "[Internal ERROR], failed to initialize " + DatasetName(true) + " Parser. Error description:" + << rc; return 0; } auto realpath = FileUtils::GetRealPath(file.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << file; + MS_LOG(ERROR) << "Invalid file path, csv file: " << file << " does not exist."; return 0; } @@ -673,8 +687,8 @@ Status CsvOp::ComputeColMap() { /* Process exception if ERROR in column name solving*/ if (!rc.IsOk()) { - MS_LOG(ERROR) << "Invalid file, fail to analyse column name map, path=" + csv_file; - RETURN_STATUS_UNEXPECTED("Invalid file, fail to analyse column name map, path=" + csv_file); + MS_LOG(ERROR) << "Invalid file, failed to get column name list from csv file: " + csv_file; + RETURN_STATUS_UNEXPECTED("Invalid file, failed to get column name list from csv file: " + csv_file); } } } else { @@ -689,9 +703,10 @@ Status CsvOp::ComputeColMap() { if (column_default_list_.size() != column_name_id_map_.size()) { RETURN_STATUS_UNEXPECTED( - "Invalid parameter, the number of column names does not match the default column, size of default column_list: " + + "Invalid parameter, the size of column_names should be equal to the size of 'column_defaults', but got " + " size of 'column_defaults': " + std::to_string(column_default_list_.size()) + - ", size of column_name: " + std::to_string(column_name_id_map_.size())); + ", size of column_names: " + std::to_string(column_name_id_map_.size())); } return Status::OK(); @@ -703,7 +718,7 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) { if (!check_flag_) { auto realpath = FileUtils::GetRealPath(csv_file_name.data()); if (!realpath.has_value()) { - std::string err_msg = "Invalid file, " + DatasetName() + " file get real path failed, path=" + csv_file_name; + std::string err_msg = "Invalid file path, csv file: " + csv_file_name + " does not exist."; MS_LOG(ERROR) << err_msg; RETURN_STATUS_UNEXPECTED(err_msg); } @@ -721,11 +736,9 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) { if (column_name_id_map_.find(col_names[i]) == column_name_id_map_.end()) { column_name_id_map_[col_names[i]] = i; } else { - MS_LOG(ERROR) << "Invalid parameter, duplicate column names are not allowed: " + col_names[i] + - ", The corresponding data files: " + csv_file_name; - - RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column names are not allowed: " + col_names[i] + - ", The corresponding data files: " + csv_file_name); + MS_LOG(ERROR) << "Invalid parameter, duplicate column " << col_names[i] << " for csv file: " << csv_file_name; + RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column " + col_names[i] + + " for csv file: " + csv_file_name); } } check_flag_ = true; @@ -736,11 +749,10 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) { if (column_name_id_map_.find(column_name_list_[i]) == column_name_id_map_.end()) { column_name_id_map_[column_name_list_[i]] = i; } else { - MS_LOG(ERROR) << "Invalid parameter, duplicate column names are not allowed: " + column_name_list_[i] + - ", The corresponding data files: " + csv_file_name; - - RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column names are not allowed: " + - column_name_list_[i] + ", The corresponding data files: " + csv_file_name); + MS_LOG(ERROR) << "Invalid parameter, duplicate column " << column_name_list_[i] + << " for csv file: " << csv_file_name << "."; + RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column " + column_name_list_[i] + + " for csv file: " + csv_file_name + "."); } } check_flag_ = true; @@ -764,7 +776,7 @@ bool CsvOp::ColumnNameValidate() { for (auto &csv_file : csv_files_list_) { auto realpath = FileUtils::GetRealPath(csv_file.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << csv_file; + MS_LOG(ERROR) << "Invalid file path, csv file: " << csv_file << " does not exist."; return false; } @@ -781,9 +793,8 @@ bool CsvOp::ColumnNameValidate() { match_file = csv_file; } else { // Case the other files if (col_names != record) { - MS_LOG(ERROR) - << "Invalid parameter, every corresponding column name must be identical, either element or permutation. " - << "Invalid files are: " + match_file + " and " + csv_file; + MS_LOG(ERROR) << "Invalid parameter, every column name should be equal the record from csv, but got column: " + << col_names << ", csv record: " << record << ". Check " + match_file + " and " + csv_file + "."; return false; } } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/div2k_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/div2k_op.cc index 98294c85f04..a54a5226442 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/div2k_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/div2k_op.cc @@ -76,13 +76,15 @@ Status DIV2KOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { if (decode_ == true) { Status hr_rc = Decode(hr_image, &hr_image); if (hr_rc.IsError()) { - std::string err = "Invalid data, failed to decode image: " + data.first; + std::string err = + "Invalid image, failed to decode " + data.first + ", the image is damaged or permission denied."; RETURN_STATUS_UNEXPECTED(err); } Status lr_rc = Decode(lr_image, &lr_image); if (lr_rc.IsError()) { - std::string err = "Invalid data, failed to decode image: " + data.second; + std::string err = + "Invalid image, failed to decode " + data.second + ", the image is damaged or permission denied."; RETURN_STATUS_UNEXPECTED(err); } } @@ -141,7 +143,7 @@ Status DIV2KOp::GetDIV2KLRDirRealName(const std::string &hr_dir_key, const std:: out_str += ("\t" + item.first + ": " + item.second + ",\n"); }); out_str += "\n}"; - RETURN_STATUS_UNEXPECTED("Invalid param, " + lr_dir_key + " not found in DatasetPramMap: \n" + out_str); + RETURN_STATUS_UNEXPECTED("Invalid param, dir: " + lr_dir_key + " not found under div2k dataset dir, " + out_str); } if (downgrade_2017.find(downgrade_) != downgrade_2017.end() && scale_2017.find(scale_) != scale_2017.end()) { @@ -158,8 +160,8 @@ Status DIV2KOp::GetDIV2KDataByUsage() { auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data()); if (!real_dataset_dir.has_value()) { - MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_; - RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_); + MS_LOG(ERROR) << "Invalid file path, div2k dataset dir: " << dataset_dir_ << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, div2k dataset dir: " + dataset_dir_ + " does not exist."); } Path dataset_dir(real_dataset_dir.value()); @@ -167,14 +169,15 @@ Status DIV2KOp::GetDIV2KDataByUsage() { Path lr_images_dir = dataset_dir / lr_dir_real_name_; if (!hr_images_dir.IsDirectory()) { - RETURN_STATUS_UNEXPECTED("Invalid path, " + hr_images_dir.ToString() + " is an invalid directory path."); + RETURN_STATUS_UNEXPECTED("Invalid path, div2k hr image dir: " + hr_images_dir.ToString() + " is not a directory."); } if (!lr_images_dir.IsDirectory()) { - RETURN_STATUS_UNEXPECTED("Invalid path, " + lr_images_dir.ToString() + " is an invalid directory path."); + RETURN_STATUS_UNEXPECTED("Invalid path, div2k lr image dir: " + lr_images_dir.ToString() + " is not a directory."); } auto hr_it = Path::DirIterator::OpenDirectory(&hr_images_dir); if (hr_it == nullptr) { - RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + hr_images_dir.ToString()); + RETURN_STATUS_UNEXPECTED("Invalid path, failed to open div2k hr image dir: " + hr_images_dir.ToString() + + ", permission denied."); } std::string image_name; @@ -202,12 +205,14 @@ Status DIV2KOp::GetDIV2KDataByUsage() { Path lr_image_file_path(lr_image_file_path_); if (!lr_image_file_path.Exists()) { - RETURN_STATUS_UNEXPECTED("Invalid file, " + lr_image_file_path.ToString() + " not found."); + RETURN_STATUS_UNEXPECTED("Invalid file, div2k image file: " + lr_image_file_path.ToString() + + " does not exist."); } image_hr_lr_map_[hr_image_file_path.ToString()] = lr_image_file_path.ToString(); } catch (const std::exception &err) { - RETURN_STATUS_UNEXPECTED("Invalid path, failed to load DIV2K Dataset: " + dataset_dir_); + RETURN_STATUS_UNEXPECTED("Invalid path, failed to load DIV2K Dataset from " + dataset_dir_ + ": " + + std::string(err.what())); } } for (auto item : image_hr_lr_map_) { @@ -220,7 +225,8 @@ Status DIV2KOp::CountDatasetInfo() { num_rows_ = static_cast(image_hr_lr_pairs_.size()); if (num_rows_ == 0) { RETURN_STATUS_UNEXPECTED( - "Invalid data, no valid data matching the dataset API DIV2KDataset. Please check file path or dataset API."); + "Invalid data, no valid data matching the dataset API 'DIV2KDataset'. Please check dataset API or file path: " + + dataset_dir_ + "."); } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/emnist_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/emnist_op.cc index fa0fb26049f..4c4d19cce63 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/emnist_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/emnist_op.cc @@ -58,11 +58,12 @@ Status EMnistOp::WalkAllFiles() { const std::string train_prefix = "-train"; const std::string test_prefix = "-test"; auto realpath = FileUtils::GetRealPath(folder_path_.data()); - CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Get real path failed: " + folder_path_); + CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Invalid file path, " + folder_path_ + " does not exist."); Path dir(realpath.value()); auto dir_it = Path::DirIterator::OpenDirectory(&dir); if (dir_it == nullptr) { - RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + dir.ToString()); + RETURN_STATUS_UNEXPECTED("Invalid path, failed to open emnist dataset dir: " + dir.ToString() + + ", the directory is not a directory or permission denied."); } std::string prefix; prefix = "emnist-" + name_; // used to match usage == "all". @@ -88,7 +89,9 @@ Status EMnistOp::WalkAllFiles() { std::sort(image_names_.begin(), image_names_.end()); std::sort(label_names_.begin(), label_names_.end()); CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(), - "Invalid data, num of images is not equal to num of labels."); + "Invalid data, num of image files should be equal to num of label files under " + + realpath.value() + ", but got num of images: " + std::to_string(image_names_.size()) + + ", num of labels: " + std::to_string(label_names_.size()) + "."); return Status::OK(); } @@ -118,12 +121,12 @@ Status EMnistOp::CountTotalRows(const std::string &dir, const std::string &name, for (size_t i = 0; i < op->image_names_.size(); ++i) { std::ifstream image_reader; image_reader.open(op->image_names_[i], std::ios::binary); - CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), - "Invalid file, failed to open image file: " + op->image_names_[i]); + CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), "Invalid file, failed to open " + op->image_names_[i] + + ": the image file is damaged or permission denied."); std::ifstream label_reader; label_reader.open(op->label_names_[i], std::ios::binary); - CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), - "Invalid file, failed to open label file: " + op->label_names_[i]); + CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), "Invalid file, failed to open " + op->label_names_[i] + + ": the label file is damaged or permission denied."); uint32_t num_images; Status s = op->CheckImage(op->image_names_[i], &image_reader, &num_images); image_reader.close(); @@ -134,8 +137,10 @@ Status EMnistOp::CountTotalRows(const std::string &dir, const std::string &name, label_reader.close(); RETURN_IF_NOT_OK(s); - CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), - "Invalid data, num of images is not equal to num of labels."); + CHECK_FAIL_RETURN_UNEXPECTED( + (num_images == num_labels), + "Invalid data, num of images should be equal to num of labels, but got num of images: " + + std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + "."); *count = *count + num_images; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/fake_image_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/fake_image_op.cc index 08e43958ab9..3afdc01e569 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/fake_image_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/fake_image_op.cc @@ -90,7 +90,8 @@ void FakeImageOp::Print(std::ostream &out, bool show_all) const { Status FakeImageOp::GetClassIds(std::map> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty() || label_list_.empty()) { if (label_list_.empty()) { - RETURN_STATUS_UNEXPECTED("No image found in dataset. Check if image was generated successfully."); + RETURN_STATUS_UNEXPECTED( + "[Internal ERROR] No image found in dataset. Check if image was generated successfully."); } else { RETURN_STATUS_UNEXPECTED( "[Internal ERROR] Map for storing image-index pair is nullptr or has been set in other place, " @@ -126,7 +127,7 @@ Status FakeImageOp::PrepareData() { label_list_.shrink_to_fit(); num_rows_ = label_list_.size(); - CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "Generate image failed, please check dataset API."); + CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "Invalid data, generate fake data failed, please check dataset API."); image_tensor_.clear(); image_tensor_.resize(num_rows_); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/fashion_mnist_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/fashion_mnist_op.cc index 6fed946a95c..3dfe5e054db 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/fashion_mnist_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/fashion_mnist_op.cc @@ -56,12 +56,12 @@ Status FashionMnistOp::CountTotalRows(const std::string &dir, const std::string for (size_t i = 0; i < op->image_names_.size(); ++i) { std::ifstream image_reader; image_reader.open(op->image_names_[i], std::ios::binary); - CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), - "Invalid file, failed to open image file: " + op->image_names_[i]); + CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), "Invalid file, failed to open " + op->image_names_[i] + + ": the image file is damaged or permission denied."); std::ifstream label_reader; label_reader.open(op->label_names_[i], std::ios::binary); - CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), - "Invalid file, failed to open label file: " + op->label_names_[i]); + CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), "Invalid file, failed to open " + op->label_names_[i] + + ": the label file is damaged or permission denied."); uint32_t num_images; Status s = op->CheckImage(op->image_names_[i], &image_reader, &num_images); image_reader.close(); @@ -72,8 +72,10 @@ Status FashionMnistOp::CountTotalRows(const std::string &dir, const std::string label_reader.close(); RETURN_IF_NOT_OK(s); - CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), - "Invalid data, num of images is not equal to num of labels."); + CHECK_FAIL_RETURN_UNEXPECTED( + (num_images == num_labels), + "Invalid data, num of images should be equal to num of labels, but got num of images: " + + std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + "."); *count = *count + num_images; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc index 405602f81a7..0f8e85411e9 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc @@ -48,7 +48,8 @@ Status FlickrOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { if (decode_ == true) { Status rc = Decode(image, &image); if (rc.IsError()) { - std::string err = "Invalid data, failed to decode image: " + data.first; + std::string err = + "Invalid image, failed to decode " + data.first + ": the image is damaged or permission denied!"; RETURN_STATUS_UNEXPECTED(err); } } @@ -76,13 +77,14 @@ void FlickrOp::Print(std::ostream &out, bool show_all) const { Status FlickrOp::PrepareData() { auto real_file_path = FileUtils::GetRealPath(file_path_.data()); if (!real_file_path.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_path_; - RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_path_); + MS_LOG(ERROR) << "Invalid file path, " << file_path_ << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_path_ + " does not exist."); } std::ifstream file_handle(real_file_path.value()); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Flickr annotation file: " + file_path_); + RETURN_STATUS_UNEXPECTED("Invalid annotation file, failed to open " + file_path_ + + " : the file is damaged or permission denied."); } std::string line; @@ -102,16 +104,16 @@ Status FlickrOp::PrepareData() { image_name = line.substr(0, flag_idx - 2); // -2 because "#[0-4]\t" if (image_name.empty()) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, image_name is not found in Flickr annotation file: " + file_path_ + - "; line: " + line); + RETURN_STATUS_UNEXPECTED("Invalid file, the attribute of image_name is missing in flickr dataset file: " + + file_path_ + ", line: " + line); } image_file_path = (dataset_dir / image_name).ToString(); std::string annotation = line.substr(flag_idx + 1); if (annotation.empty()) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, annotation is not found in Flickr annotation file: " + file_path_ + - "; line: " + line); + RETURN_STATUS_UNEXPECTED("Invalid file, the attribute of annotation is missing in flickr dataset file: " + + file_path_ + ", line: " + line); } bool valid = false; @@ -127,7 +129,8 @@ Status FlickrOp::PrepareData() { image_annotation_map_[image_file_path].emplace_back(annotation); } catch (const std::exception &err) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Flickr annotation file: " + file_path_); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse flickr dataset file: " + file_path_ + ": " + + std::string(err.what())); } } @@ -146,8 +149,8 @@ Status FlickrOp::PrepareData() { Status FlickrOp::CheckImageType(const std::string &file_name, bool *valid) { auto real_file_name = FileUtils::GetRealPath(file_name.data()); if (!real_file_name.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_name; - RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_name); + MS_LOG(ERROR) << "Invalid file path, flickr dataset file: " << file_name << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, flickr dataset file: " + file_name + " does not exist."); } std::ifstream file_handle; @@ -155,14 +158,16 @@ Status FlickrOp::CheckImageType(const std::string &file_name, bool *valid) { *valid = false; file_handle.open(real_file_name.value(), std::ios::binary | std::ios::in); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open image file: " + file_name); + RETURN_STATUS_UNEXPECTED("Invalid flickr file, failed to open " + file_name + + ": the file is damaged or permission denied."); } unsigned char file_type[read_num]; (void)file_handle.read(reinterpret_cast(file_type), read_num); if (file_handle.fail()) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name); + RETURN_STATUS_UNEXPECTED("Invalid flickr file, failed to read " + file_name + + ": the file is damaged or the file content is incomplete."); } file_handle.close(); if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc index ca7d3ab0276..0d9545574c1 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/generator_op.cc @@ -64,7 +64,7 @@ Status GeneratorOp::CreateGeneratorObject() { // Acquire Python GIL py::gil_scoped_acquire gil_acquire; if (Py_IsInitialized() == 0) { - return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized."); + return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized."); } try { py::array sample_ids; @@ -93,15 +93,17 @@ Status GeneratorOp::Init() { Status GeneratorOp::PyRowToTensorRow(py::object py_data, TensorRow *tensor_row) { if (!py::isinstance(py_data)) { return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__, - "Invalid data, Generator should return a tuple of NumPy arrays, currently returned is not a tuple."); + "Invalid python function, the 'source' of 'GeneratorDataset' should return a tuple of NumPy arrays, " + "but got " + + std::string(py_data.get_type().str())); } py::tuple py_row = py_data.cast(); // Check if returned number of columns matches with column names if (py_row.size() != column_names_.size()) { return Status( StatusCode::kMDPyFuncException, __LINE__, __FILE__, - "Invalid data, Generator should return same number of NumPy arrays as specified in column_names, the size of" - " column_names is:" + + "Invalid python function, the 'source' of 'GeneratorDataset' should return same number of NumPy arrays as " + "specified in column_names, the size of column_names is:" + std::to_string(column_names_.size()) + " and number of returned NumPy array is:" + std::to_string(py_row.size())); } @@ -110,15 +112,18 @@ Status GeneratorOp::PyRowToTensorRow(py::object py_data, TensorRow *tensor_row) py::object ret_py_ele = py_row[i]; if (!py::isinstance(ret_py_ele)) { return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__, - "Invalid data, Generator should return a tuple of NumPy arrays. Ensure each item in tuple that " - "returned by source function of GeneratorDataset be NumPy array."); + "Invalid python function, 'GeneratorDataset' should return a tuple of NumPy arrays, but got " + + std::string(ret_py_ele.get_type().str())); } std::shared_ptr tensor; RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_ele.cast(), &tensor)); if ((!column_types_.empty()) && (column_types_[i] != DataType::DE_UNKNOWN) && (column_types_[i] != tensor->type())) { return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__, - "Invalid data, type of returned data in GeneratorDataset is not same with specified column_types."); + "Invalid python function, type of returned data in 'GeneratorDataset' should be same with " + "specified column_types, but the type of returned data: " + + std::string(ret_py_ele.get_type().str()) + + ", specified column type: " + column_types_[i].ToString()); } tensor_row->push_back(tensor); } @@ -173,7 +178,7 @@ Status GeneratorOp::operator()() { { py::gil_scoped_acquire gil_acquire; if (Py_IsInitialized() == 0) { - return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); + return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); } try { #ifndef ENABLE_SECURITY diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc index 76da5f3e127..ff94c74f920 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc @@ -93,7 +93,8 @@ Status ImageFolderOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { if (decode_ == true) { Status rc = Decode(image, &image); if (rc.IsError()) { - std::string err = "Invalid data, failed to decode image: " + folder_path_ + (pair_ptr->first); + std::string err = "Invalid image, " + folder_path_ + (pair_ptr->first) + + " decode failed, the image is broken or permission denied."; RETURN_STATUS_UNEXPECTED(err); } } @@ -121,7 +122,7 @@ void ImageFolderOp::Print(std::ostream &out, bool show_all) const { Status ImageFolderOp::GetClassIds(std::map> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { if (image_label_pairs_.empty()) { - RETURN_STATUS_UNEXPECTED("Invalid data, " + DatasetName(true) + + RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + DatasetName(true) + "Dataset API can't read the data file(interface mismatch or no data found). Check " + DatasetName() + " file path: " + folder_path_); } else { @@ -156,7 +157,7 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) { Path folder(folder_path_ + folder_name); std::shared_ptr dirItr = Path::DirIterator::OpenDirectory(&folder); if (folder.Exists() == false || dirItr == nullptr) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + folder_name); + RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + folder_name + " does not exist or permission denied."); } std::set imgs; // use this for ordering while (dirItr->HasNext()) { @@ -193,8 +194,8 @@ Status ImageFolderOp::RecursiveWalkFolder(Path *dir) { RETURN_IF_NOT_OK(folder_name_queue_->EmplaceBack(subdir.ToString().substr(dirname_offset_))); } if (recursive_ == true) { - MS_LOG(ERROR) << "RecursiveWalkFolder(&subdir) functionality is disabled permanently. No recursive walk of " - << "directory will be performed."; + MS_LOG(ERROR) << "[Internal ERROR] RecursiveWalkFolder(&subdir) functionality is disabled permanently. " + << "No recursive walk of directory will be performed."; } } } @@ -206,7 +207,7 @@ Status ImageFolderOp::StartAsyncWalk() { TaskManager::FindMe()->Post(); Path dir(folder_path_); if (dir.Exists() == false || dir.IsDirectory() == false) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + folder_path_); + RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + folder_path_ + " may not exist or the path is not a directory."); } dirname_offset_ = folder_path_.length(); RETURN_IF_NOT_OK(RecursiveWalkFolder(&dir)); @@ -242,10 +243,9 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se std::string err_msg = ""; int64_t row_cnt = 0; err_msg += (dir.Exists() == false || dir.IsDirectory() == false) - ? "Invalid parameter, input path is invalid or not set, path: " + path + ? "Invalid dataset_dir, " + path + " does not exist or the path is not a directory. " : ""; - err_msg += - (num_classes == nullptr && num_rows == nullptr) ? "Invalid parameter, num_class and num_rows are null.\n" : ""; + err_msg += (num_classes == nullptr && num_rows == nullptr) ? "[Internal ERROR] num_class and num_rows are null." : ""; if (err_msg.empty() == false) { RETURN_STATUS_UNEXPECTED(err_msg); } @@ -266,7 +266,7 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se } else { for (const auto &p : class_index) { CHECK_FAIL_RETURN_UNEXPECTED(folder_names.find(p.first) != folder_names.end(), - "Invalid parameter, folder: " + p.first + " doesn't exist in " + path + " ."); + "Invalid subdirectory, class: " + p.first + " doesn't exist in " + path + " ."); } (*num_classes) = class_index.size(); } @@ -277,7 +277,8 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se Path subdir(folder_paths.front()); dir_itr = Path::DirIterator::OpenDirectory(&subdir); if (subdir.Exists() == false || dir_itr == nullptr) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + subdir.ToString()); + RETURN_STATUS_UNEXPECTED("Invalid subdirectory, ImageFolder Dataset subdirectory: " + subdir.ToString() + + " does not exist or permission denied"); } while (dir_itr->HasNext()) { if (exts.empty() || exts.find(subdir.Extension()) != exts.end()) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.cc index fc5fac10524..e2bf6714fff 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/io_block.cc @@ -64,7 +64,7 @@ FilenameBlock::FilenameBlock(IOBlockFlags io_block_flags) // Gets the filename from the block using the provided index container Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj &index) const { if (out_filename == nullptr) { - RETURN_STATUS_UNEXPECTED("Failed to get filename from FilenameBlock."); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] Failed to get filename from FilenameBlock."); } // a FilenameBlock only has one key. Call base class method to fetch that key @@ -77,7 +77,7 @@ Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj< auto &it = r.first; *out_filename = it.value(); } else { - RETURN_STATUS_UNEXPECTED("Could not find filename from index."); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] Could not find filename from index."); } return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/lj_speech_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/lj_speech_op.cc index 43acfbc5f2b..c81bbbbb377 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/lj_speech_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/lj_speech_op.cc @@ -38,15 +38,15 @@ LJSpeechOp::LJSpeechOp(const std::string &file_dir, int32_t num_workers, int32_t Status LJSpeechOp::PrepareData() { auto real_path = FileUtils::GetRealPath(folder_path_.data()); if (!real_path.has_value()) { - RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + folder_path_); + RETURN_STATUS_UNEXPECTED("Invalid file path, LJSpeech Dataset folder: " + folder_path_ + " does not exist."); } Path root_folder(real_path.value()); Path metadata_file_path = root_folder / "metadata.csv"; CHECK_FAIL_RETURN_UNEXPECTED(metadata_file_path.Exists() && !metadata_file_path.IsDirectory(), - "Invalid file, failed to find metadata file: " + metadata_file_path.ToString()); + "Invalid file, failed to find LJSpeech metadata file: " + metadata_file_path.ToString()); std::ifstream csv_reader(metadata_file_path.ToString()); CHECK_FAIL_RETURN_UNEXPECTED(csv_reader.is_open(), - "Invalid file, failed to open metadata file: " + metadata_file_path.ToString() + + "Invalid file, failed to open LJSpeech metadata file: " + metadata_file_path.ToString() + ", make sure file not damaged or permission denied."); std::string line = ""; while (getline(csv_reader, line)) { @@ -64,8 +64,8 @@ Status LJSpeechOp::PrepareData() { } if (meta_info_list_.empty()) { csv_reader.close(); - RETURN_STATUS_UNEXPECTED( - "Reading failed, unable to read valid data from the metadata file: " + metadata_file_path.ToString() + "."); + RETURN_STATUS_UNEXPECTED("Reading failed, unable to read valid data from the LJSpeech metadata file: " + + metadata_file_path.ToString() + "."); } num_rows_ = meta_info_list_.size(); csv_reader.close(); @@ -76,7 +76,7 @@ Status LJSpeechOp::PrepareData() { // 1 function call produces 1 TensorTow Status LJSpeechOp::LoadTensorRow(row_id_type index, TensorRow *trow) { int32_t num_items = meta_info_list_.size(); - CHECK_FAIL_RETURN_UNEXPECTED(index >= 0 && index < num_items, "The input index is out of range."); + CHECK_FAIL_RETURN_UNEXPECTED(index >= 0 && index < num_items, "[Internal ERROR] The input index is out of range."); std::shared_ptr waveform; std::shared_ptr sample_rate_scalar; std::shared_ptr transcription, normalized_transcription; @@ -118,7 +118,7 @@ void LJSpeechOp::Print(std::ostream &out, bool show_all) const { Status LJSpeechOp::CountTotalRows(const std::string &dir, int64_t *count) { auto real_path = FileUtils::GetRealPath(dir.data()); if (!real_path.has_value()) { - RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + dir); + RETURN_STATUS_UNEXPECTED("Invalid file, " + dir + " does not exist."); } Path root_folder(real_path.value()); Path metadata_file_path = root_folder / "metadata.csv"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc index 25b89c2d900..fcdd44c6ae9 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc @@ -63,7 +63,8 @@ Status ManifestOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { if (decode_ == true) { Status rc = Decode(image, &image); if (rc.IsError()) { - std::string err = "Invalid data, failed to decode image: " + data.first; + std::string err = + "Invalid image, failed to decode: " + data.first + ", the image is damaged or permission denied."; RETURN_STATUS_UNEXPECTED(err); } } @@ -91,7 +92,7 @@ void ManifestOp::Print(std::ostream &out, bool show_all) const { Status ManifestOp::GetClassIds(std::map> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty() || image_labelname_.empty()) { if (image_labelname_.empty()) { - RETURN_STATUS_UNEXPECTED("Invalid data, no image found in dataset."); + RETURN_STATUS_UNEXPECTED("Invalid manifest file, image data is missing in " + file_); } else { RETURN_STATUS_UNEXPECTED( "[Internal ERROR] Map for containing image-index pair is nullptr or has been set in other place," @@ -120,13 +121,14 @@ Status ManifestOp::GetClassIds(std::map> *cls_ids) Status ManifestOp::PrepareData() { auto realpath = FileUtils::GetRealPath(file_.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_; - RETURN_STATUS_UNEXPECTED("Invalid data, get real path failed, path=" + file_); + MS_LOG(ERROR) << "Invalid file path, " << file_ << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_ + " does not exist."); } std::ifstream file_handle(realpath.value()); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Manifest file: " + file_); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file_ + + ": manifest file is damaged or permission denied!"); } std::string line; std::set classes; @@ -137,7 +139,7 @@ Status ManifestOp::PrepareData() { std::string image_file_path = js.value("source", ""); if (image_file_path == "") { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, 'source' is not found in Manifest file: " + file_ + " at line " + + RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'source' is missing in file: " + file_ + " at line " + std::to_string(line_count)); } // If image is not JPEG/PNG/GIF/BMP, drop it @@ -149,7 +151,7 @@ Status ManifestOp::PrepareData() { std::string usage = js.value("usage", ""); if (usage == "") { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, 'usage' is not found in Manifest file: " + file_ + " at line " + + RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'usage' is missing in file: " + file_ + " at line " + std::to_string(line_count)); } (void)std::transform(usage.begin(), usage.end(), usage.begin(), ::tolower); @@ -164,7 +166,7 @@ Status ManifestOp::PrepareData() { classes.insert(label_name); if (label_name == "") { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, 'name' of label is not found in Manifest file: " + file_ + + RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'name' attribute of label is missing in file: " + file_ + " at line " + std::to_string(line_count)); } if (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) { @@ -180,7 +182,8 @@ Status ManifestOp::PrepareData() { line_count++; } catch (const std::exception &err) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse manifest file: " + file_); + RETURN_STATUS_UNEXPECTED("Invalid manifest file, parse ManiFest file: " + file_ + " failed, " + + std::string(err.what())); } } num_classes_ = classes.size(); @@ -193,8 +196,8 @@ Status ManifestOp::PrepareData() { Status ManifestOp::CheckImageType(const std::string &file_name, bool *valid) { auto realpath = FileUtils::GetRealPath(file_name.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_name; - RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_name); + MS_LOG(ERROR) << "Invalid file path, " << file_name << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_name + " does not exist."); } std::ifstream file_handle; @@ -202,14 +205,16 @@ Status ManifestOp::CheckImageType(const std::string &file_name, bool *valid) { *valid = false; file_handle.open(realpath.value(), std::ios::binary | std::ios::in); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open image file: " + file_name); + RETURN_STATUS_UNEXPECTED("Invalid manifest file, failed to open " + file_name + + " : the manifest file is damaged or permission denied."); } unsigned char file_type[read_num]; (void)file_handle.read(reinterpret_cast(file_type), read_num); if (file_handle.fail()) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name); + RETURN_STATUS_UNEXPECTED("Invalid manifest file, failed to read " + file_name + + " : the manifest file is damaged or permission denied."); } file_handle.close(); if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc index 67972876c91..b6151a11e3a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc @@ -68,7 +68,8 @@ Status MindRecordOp::Init() { data_schema_ = std::make_unique(); std::vector col_names = shard_reader_->GetShardColumn()->GetColumnName(); - CHECK_FAIL_RETURN_UNEXPECTED(!col_names.empty(), "Invalid data, no column names are specified."); + CHECK_FAIL_RETURN_UNEXPECTED(!col_names.empty(), + "Invalid column, no column names are specified, check mindrecord file."); std::vector col_data_types = shard_reader_->GetShardColumn()->GeColumnDataType(); std::vector> col_shapes = shard_reader_->GetShardColumn()->GetColumnShape(); @@ -107,9 +108,8 @@ Status MindRecordOp::Init() { if (!load_all_cols) { std::unique_ptr tmp_schema = std::make_unique(); for (std::string colname : columns_to_load_) { - CHECK_FAIL_RETURN_UNEXPECTED( - colname_to_ind.find(colname) != colname_to_ind.end(), - "Invalid data, specified loading column name: " + colname + " does not exist in data file."); + CHECK_FAIL_RETURN_UNEXPECTED(colname_to_ind.find(colname) != colname_to_ind.end(), + "Invalid column, " + colname + " does not exist in data file."); RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->Column(colname_to_ind[colname]))); } data_schema_ = std::move(tmp_schema); @@ -177,7 +177,7 @@ Status MindRecordOp::WorkerEntry(int32_t worker_id) { } RETURN_IF_NOT_OK(worker_in_queues_[worker_id]->PopFront(&io_block)); } - RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker."); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] Unexpected nullptr received in worker."); } Status MindRecordOp::GetRowFromReader(TensorRow *fetched_row, uint64_t row_id, int32_t worker_id) { @@ -231,14 +231,15 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vectorGetColumnFromJson(column_name, sample_json_, &data_ptr, &n_bytes)); } else if (category == mindrecord::ColumnInBlob) { CHECK_FAIL_RETURN_UNEXPECTED(sample_bytes_.find(column_name) != sample_bytes_.end(), - "Invalid data, failed to retrieve blob data from padding sample."); + "Invalid padded_sample, failed to retrieve blob data from padding sample, " + "check 'padded_sample'."); std::string ss(sample_bytes_[column_name]); n_bytes = ss.size(); data_ptr = std::make_unique(n_bytes); std::copy(ss.begin(), ss.end(), data_ptr.get()); } else { - RETURN_STATUS_UNEXPECTED("Invalid data, retrieved data type is unknown."); + RETURN_STATUS_UNEXPECTED("Invalid datatype, retrieved data type is unknown."); } if (data == nullptr) { data = reinterpret_cast(data_ptr.get()); @@ -254,7 +255,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector> *cls_ids) const { if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) { if (image_label_pairs_.empty()) { - RETURN_STATUS_UNEXPECTED("Invalid data, no image found in " + DatasetName() + " file."); + RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, image data is missing."); } else { RETURN_STATUS_UNEXPECTED( "[Internal ERROR] Map for containing image-index pair is nullptr or has been set in other place," @@ -89,7 +89,7 @@ Status MnistOp::ReadFromReader(std::ifstream *reader, uint32_t *result) { uint32_t res = 0; reader->read(reinterpret_cast(&res), 4); CHECK_FAIL_RETURN_UNEXPECTED(!reader->fail(), - "Invalid data, failed to read 4 bytes from " + DatasetName() + " file."); + "Invalid file, failed to read 4 bytes from " + DatasetName() + " file."); *result = SwapEndian(res); return Status::OK(); } @@ -100,17 +100,22 @@ uint32_t MnistOp::SwapEndian(uint32_t val) const { } Status MnistOp::CheckImage(const std::string &file_name, std::ifstream *image_reader, uint32_t *num_images) { - CHECK_FAIL_RETURN_UNEXPECTED(image_reader->is_open(), - "Invalid file, failed to open " + DatasetName() + " image file: " + file_name); + CHECK_FAIL_RETURN_UNEXPECTED(image_reader->is_open(), "Invalid " + DatasetName() + " file, failed to open " + + file_name + " : the file is damaged or permission denied."); int64_t image_len = image_reader->seekg(0, std::ios::end).tellg(); (void)image_reader->seekg(0, std::ios::beg); // The first 16 bytes of the image file are type, number, row and column - CHECK_FAIL_RETURN_UNEXPECTED(image_len >= 16, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name); + CHECK_FAIL_RETURN_UNEXPECTED(image_len >= 16, + "Invalid " + DatasetName() + " file, the first data length of " + file_name + + " should be 16 bytes(contains type, number, row and column), but got " + + std::to_string(image_len) + "."); uint32_t magic_number; RETURN_IF_NOT_OK(ReadFromReader(image_reader, &magic_number)); CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kMnistImageFileMagicNumber, - "Invalid file, this is not the " + DatasetName() + " image file: " + file_name); + "Invalid " + DatasetName() + " file, the image number of " + file_name + " should be " + + std::to_string(kMnistImageFileMagicNumber) + ", but got " + + std::to_string(magic_number)); uint32_t num_items; RETURN_IF_NOT_OK(ReadFromReader(image_reader, &num_items)); @@ -120,28 +125,38 @@ Status MnistOp::CheckImage(const std::string &file_name, std::ifstream *image_re RETURN_IF_NOT_OK(ReadFromReader(image_reader, &cols)); // The image size of the Mnist dataset is fixed at [28,28] CHECK_FAIL_RETURN_UNEXPECTED((rows == kMnistImageRows) && (cols == kMnistImageCols), - "Invalid data, shape of image is not equal to (28, 28)."); + "Invalid " + DatasetName() + " file, shape of image in " + file_name + + " should be (28, 28), but got (" + std::to_string(rows) + ", " + std::to_string(cols) + + ")."); CHECK_FAIL_RETURN_UNEXPECTED((image_len - 16) == num_items * rows * cols, - "Invalid data, got truncated data len: " + std::to_string(image_len - 16) + - ", which is not equal to real data len: " + std::to_string(num_items * rows * cols)); + "Invalid " + DatasetName() + " file, truncated data length of " + file_name + + " should be " + std::to_string(image_len - 16) + ", but got " + + std::to_string(num_items * rows * cols)); *num_images = num_items; return Status::OK(); } Status MnistOp::CheckLabel(const std::string &file_name, std::ifstream *label_reader, uint32_t *num_labels) { - CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(), - "Invalid file, failed to open " + DatasetName() + " label file: " + file_name); + CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(), "Invalid " + DatasetName() + " file, failed to open " + + file_name + " : the file is damaged or permission denied!"); int64_t label_len = label_reader->seekg(0, std::ios::end).tellg(); (void)label_reader->seekg(0, std::ios::beg); // The first 8 bytes of the image file are type and number - CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 8, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name); + CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 8, "Invalid " + DatasetName() + " file, the first data length of " + + file_name + " should be 8 bytes(contains type and number), but got " + + std::to_string(label_len) + "."); uint32_t magic_number; RETURN_IF_NOT_OK(ReadFromReader(label_reader, &magic_number)); CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kMnistLabelFileMagicNumber, - "Invalid file, this is not the " + DatasetName() + " label file: " + file_name); + "Invalid " + DatasetName() + " file, the number of labels in " + file_name + + " should be " + std::to_string(kMnistLabelFileMagicNumber) + ", but got " + + std::to_string(magic_number) + "."); uint32_t num_items; RETURN_IF_NOT_OK(ReadFromReader(label_reader, &num_items)); - CHECK_FAIL_RETURN_UNEXPECTED((label_len - 8) == num_items, "Invalid data, number of labels is wrong."); + CHECK_FAIL_RETURN_UNEXPECTED((label_len - 8) == num_items, "Invalid " + DatasetName() + + " file, the data length of labels in " + file_name + + " should be " + std::to_string(label_len - 8) + + ", but got " + std::to_string(num_items) + "."); *num_labels = num_items; return Status::OK(); } @@ -151,7 +166,10 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la RETURN_IF_NOT_OK(CheckImage(image_names_[index], image_reader, &num_images)); RETURN_IF_NOT_OK(CheckLabel(label_names_[index], label_reader, &num_labels)); CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), - "Invalid data, num_images is not equal to num_labels. Ensure data file is not damaged."); + "Invalid " + DatasetName() + " file, the images number of " + image_names_[index] + + " should be equal to the labels number of " + label_names_[index] + + ", but got images number: " + std::to_string(num_images) + + ", labels number: " + std::to_string(num_labels) + "."); // The image size of the Mnist dataset is fixed at [28,28] int64_t size = kMnistImageRows * kMnistImageCols; auto images_buf = std::make_unique(size * num_images); @@ -163,13 +181,13 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la } (void)image_reader->read(images_buf.get(), size * num_images); if (image_reader->fail()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " image: " + image_names_[index] + - ", size:" + std::to_string(size * num_images) + ". Ensure data file is not damaged."); + RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, failed to read " + image_names_[index] + + " : the file is damaged or permission denied!"); } (void)label_reader->read(labels_buf.get(), num_images); if (label_reader->fail()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " label:" + label_names_[index] + - ", size: " + std::to_string(num_images) + ". Ensure data file is not damaged."); + RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, failed to read " + label_names_[index] + + " : the file is damaged or the file content is incomplete."); } TensorShape img_tensor_shape = TensorShape({kMnistImageRows, kMnistImageCols, 1}); for (int64_t j = 0; j != num_images; ++j) { @@ -244,8 +262,10 @@ Status MnistOp::WalkAllFiles() { std::sort(image_names_.begin(), image_names_.end()); std::sort(label_names_.begin(), label_names_.end()); - CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(), - "Invalid data, num of images is not equal to num of labels."); + CHECK_FAIL_RETURN_UNEXPECTED( + image_names_.size() == label_names_.size(), + "Invalid " + DatasetName() + " file, num of images should be equal to num of labels, but got num of images: " + + std::to_string(image_names_.size()) + ", num of labels: " + std::to_string(label_names_.size()) + "."); return Status::OK(); } @@ -279,7 +299,9 @@ Status MnistOp::CountTotalRows(const std::string &dir, const std::string &usage, uint32_t num_labels; RETURN_IF_NOT_OK(op->CheckLabel(op->label_names_[i], &label_reader, &num_labels)); CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), - "Invalid data, num of images is not equal to num of labels."); + "Invalid " + op->DatasetName() + + " file, num of images should be equal to num of labels, but got num of images: " + + std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + "."); *count = *count + num_images; // Close the readers diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.cc index 5978bc9c0cf..4fa96c2ed8d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.cc @@ -215,7 +215,8 @@ bool NonMappableLeafOp::NeedPushFileToBlockQueue(const std::string &file_name, i bool push = false; int64_t start_index = device_id_ * num_rows_per_shard_; if (device_id_ + 1 < 0) { - MS_LOG(ERROR) << "Device id is invalid, got " + std::to_string(device_id_); + MS_LOG(ERROR) << "Invalid device id, device id should be greater than or equal 0, but got " + << std::to_string(device_id_); return false; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/photo_tour_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/photo_tour_op.cc index 4975125d8bd..bf7326bb03e 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/photo_tour_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/photo_tour_op.cc @@ -141,7 +141,8 @@ Status PhotoTourOp::GetFileContent(const std::string &info_file, std::string *an RETURN_UNEXPECTED_IF_NULL(ans); std::ifstream reader; reader.open(info_file); - CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open PhotoTour info file: " + info_file); + CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open " + info_file + + ": PhotoTour info file is damaged or permission denied."); (void)reader.seekg(0, std::ios::end); std::size_t size = reader.tellg(); (void)reader.seekg(0, std::ios::beg); @@ -183,7 +184,9 @@ Status PhotoTourOp::ReadInfoFile(const std::string &data_dir, const std::string switch (col_idx) { case ID_3DPOINT: { std::string item = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour info file failed: " + info_file_path); + CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), + "Invalid data, reading PhotoTour info file failed: " + info_file_path + + " at line: " + std::to_string(pos) + ", the content should not be empty."); int id_3dpoint = std::atoi(item.c_str()); labels_.push_back(id_3dpoint); col_idx = UNKNOWN; @@ -191,7 +194,9 @@ Status PhotoTourOp::ReadInfoFile(const std::string &data_dir, const std::string } case UNKNOWN: { std::string item2 = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(), "Reading PhotoTour info file failed: " + info_file_path); + CHECK_FAIL_RETURN_UNEXPECTED( + !item2.empty(), "Invalid data, Reading PhotoTour info file failed: " + info_file_path + + " at line: " + std::to_string(pos) + ", the content in file should not be empty."); col_idx = ID_3DPOINT; break; } @@ -225,34 +230,44 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri switch (col_idx) { case PATCH_ID1: { std::string item = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); + CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), + "Invalid data,Reading PhotoTour matched file failed: " + info_file_path + + " at line: " + std::to_string(pos) + ", the content should not be empty."); patch_id1 = std::atoi(item.c_str()); col_idx = LABEL1; break; } case LABEL1: { std::string item = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); + CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), + "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + + " at line: " + std::to_string(pos) + ", the content should not be empty."); label1 = std::atoi(item.c_str()); col_idx = UNUSED1; break; } case UNUSED1: { std::string item = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); + CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), + "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + + " at line: " + std::to_string(pos) + ", the content should not be empty."); col_idx = PATCH_ID2; break; } case PATCH_ID2: { std::string item = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); + CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), + "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + + " at line: " + std::to_string(pos) + ", the content should not be empty."); patch_id2 = std::atoi(item.c_str()); col_idx = LABEL2; break; } case LABEL2: { std::string item = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); + CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), + "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + + " at line: " + std::to_string(pos) + ", the content should not be empty."); label2 = std::atoi(item.c_str()); col_idx = UNUSED2; matches_.push_back(std::make_tuple(patch_id1, patch_id2, uint32_t(label1 == label2))); @@ -260,13 +275,17 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri } case UNUSED2: { std::string item = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path); + CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), + "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + + " at line: " + std::to_string(pos) + ", the content should not be empty."); col_idx = UNUSED3; break; } case UNUSED3: { std::string item2 = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(), "Reading PhotoTour matched file failed: " + info_file_path); + CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(), + "Invalid data, Reading PhotoTour matched file failed: " + info_file_path + + " at line: " + std::to_string(pos) + ", the content should not be empty."); col_idx = PATCH_ID1; break; } @@ -281,8 +300,9 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri Status PhotoTourOp::GetPhotoTourDataTensor(uint32_t index, std::shared_ptr *image_tensor) { RETURN_UNEXPECTED_IF_NULL(image_tensor); - CHECK_FAIL_RETURN_UNEXPECTED(index < kLens.at(name_), - "Index exceeds the maximum count of image, got: " + std::to_string(index)); + CHECK_FAIL_RETURN_UNEXPECTED( + index < kLens.at(name_), + "[Internal ERROR] Index exceeds the maximum count of image, got: " + std::to_string(index)); int image_id = index / (kPatchNumPerRow * kPatchNumPerCol); int row_in_image = (index % (kPatchNumPerRow * kPatchNumPerCol)) / kPatchNumPerRow; @@ -320,7 +340,7 @@ Status PhotoTourOp::PrepareData() { chosen_dataset_folder_path_ = (Path(dataset_dir_) / Path(name_)).ToString(); train_ = kTrain.at(usage_); auto real_folder_path = FileUtils::GetRealPath(chosen_dataset_folder_path_.data()); - CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Get real path failed: " + chosen_dataset_folder_path_); + CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), chosen_dataset_folder_path_ + " does not exist."); std::vector file_names; cv::glob(real_folder_path.value(), file_names); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/places365_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/places365_op.cc index 3359a69cb75..9d55c214978 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/places365_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/places365_op.cc @@ -107,7 +107,8 @@ Status Places365Op::GetFileContent(const std::string &info_file, std::string *an RETURN_UNEXPECTED_IF_NULL(ans); std::ifstream reader; reader.open(info_file); - CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open Places365 file: " + info_file); + CHECK_FAIL_RETURN_UNEXPECTED( + !reader.fail(), "Invalid file, failed to open " + info_file + ": Places365 file is damaged or permission denied."); reader.seekg(0, std::ios::end); std::size_t size = reader.tellg(); reader.seekg(0, std::ios::beg); @@ -153,21 +154,21 @@ Status Places365Op::LoadCategories(const std::string &category_meta_name) { while ((pos = s.find(" ")) != std::string::npos) { switch (col_idx) { case CATEGORY: { - CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), - "Reading places365 category file failed: " + category_meta_name); + CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " + + category_meta_name + ", space characters not found."); category = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!category.empty(), - "Reading places365 category file failed: " + category_meta_name); + CHECK_FAIL_RETURN_UNEXPECTED(!category.empty(), "Invalid data, Reading places365 category file failed: " + + category_meta_name + ", space characters not found."); // switch the type of substring. col_idx = LABEL; break; } case LABEL: { - CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), - "Reading places365 category file failed: " + category_meta_name); + CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " + + category_meta_name + ", space characters not found."); std::string label_item = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!label_item.empty(), - "Reading places365 category file failed: " + category_meta_name); + CHECK_FAIL_RETURN_UNEXPECTED(!label_item.empty(), "Invalid data, Reading places365 category file failed: " + + category_meta_name + ", space characters not found."); label = std::atoi(label_item.c_str()); // switch the type of substring. col_idx = CATEGORY; @@ -204,19 +205,21 @@ Status Places365Op::LoadFileLists(const std::string &filelists_meta_name) { while ((pos = s.find(" ")) != std::string::npos) { switch (col_idx) { case PATH: { - CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), - "Reading places365 category file failed: " + filelists_meta_name); + CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " + + filelists_meta_name + ", space characters not found."); path = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!path.empty(), "Reading places365 filelist file failed: " + filelists_meta_name); + CHECK_FAIL_RETURN_UNEXPECTED(!path.empty(), "Invalid data, Reading places365 filelist file failed: " + + filelists_meta_name + ", space characters not found."); // switch the type of substring. col_idx = LABEL; break; } case LABEL: { - CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), - "Reading places365 category file failed: " + filelists_meta_name); + CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " + + filelists_meta_name + ", space characters not found."); std::string item = get_splited_str(pos); - CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading places365 filelist file failed: " + filelists_meta_name); + CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Invalid data, Reading places365 filelist file failed: " + + filelists_meta_name + ", space characters not found."); label = std::atoi(item.c_str()); // switch the type of substring. col_idx = PATH; @@ -233,13 +236,15 @@ Status Places365Op::LoadFileLists(const std::string &filelists_meta_name) { Status Places365Op::GetPlaces365DataTensor(uint32_t index, std::shared_ptr *image_tensor) { std::string file_path = image_path_label_pairs_[index].first; - CHECK_FAIL_RETURN_UNEXPECTED(Path(file_path).Exists(), file_path + " File not exists."); + CHECK_FAIL_RETURN_UNEXPECTED(Path(file_path).Exists(), + "Invalid file path, Places365 image: " + file_path + " does not exists."); RETURN_IF_NOT_OK(Tensor::CreateFromFile(file_path, image_tensor)); if (decode_) { Status rc = Decode(*image_tensor, image_tensor); if (rc.IsError()) { *image_tensor = nullptr; - std::string err_msg = "Invalid data, failed to decode image: " + file_path; + std::string err_msg = + "Invalid image, failed to decode " + file_path + ": the image is damaged or permission denied."; return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); } } @@ -249,14 +254,15 @@ Status Places365Op::GetPlaces365DataTensor(uint32_t index, std::shared_ptr 0, - "Invalid data, no valid data matching the dataset API Places365Dataset. Please check file path or dataset API."); + "Invalid data, no valid data matching the dataset API Places365Dataset. Please check dataset API or file path: " + + root_ + "."); return Status::OK(); } @@ -281,7 +287,7 @@ Status Places365Op::CountTotalRows(const std::string &dir, const std::string &us for (size_t i = 0; i < op->image_path_label_pairs_.size(); ++i) { CHECK_FAIL_RETURN_UNEXPECTED(Path(op->image_path_label_pairs_[i].first).Exists(), - op->image_path_label_pairs_[i].first + " File not exists."); + "Invalid file path, " + op->image_path_label_pairs_[i].first + " does not exists."); } *count = op->image_path_label_pairs_.size(); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/qmnist_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/qmnist_op.cc index 629572aab43..f71bd08793c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/qmnist_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/qmnist_op.cc @@ -111,7 +111,9 @@ Status QMnistOp::CountTotalRows(const std::string &dir, const std::string &usage uint32_t num_labels; RETURN_IF_NOT_OK(op->CheckLabel(op->label_names_[i], &label_reader, &num_labels)); CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), - "Invalid data, num of images is not equal to num of labels."); + "Invalid data, num of images should be equal to num of labels loading from " + dir + + ", but got num of images: " + std::to_string(num_images) + + ", num of labels: " + std::to_string(num_labels) + "."); if (usage == "test10k") { // only use the first 10k samples and drop the last 50k samples @@ -141,7 +143,8 @@ Status QMnistOp::WalkAllFiles() { const std::string nist_prefix = "xnist"; auto real_folder_path = FileUtils::GetRealPath(folder_path_.data()); - CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Get real path failed: " + folder_path_); + CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), + "Invalid QMnist folder, " + folder_path_ + " does not exist or permission denied!"); Path root_dir(real_folder_path.value()); if (usage_ == "train") { @@ -162,20 +165,25 @@ Status QMnistOp::WalkAllFiles() { label_names_.push_back((root_dir / Path(nist_prefix + "-" + label_ext)).ToString()); } - CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(), - "Invalid data, num of images is not equal to num of labels."); + CHECK_FAIL_RETURN_UNEXPECTED( + image_names_.size() == label_names_.size(), + "Invalid data, num of Qmnist image files should be equal to num of Qmnist label files under directory:" + + folder_path_ + ", but got num of image files: " + std::to_string(image_names_.size()) + + ", num of label files: " + std::to_string(label_names_.size()) + "."); for (size_t i = 0; i < image_names_.size(); i++) { Path file_path(image_names_[i]); - CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), - "Failed to find " + DatasetName() + " image file: " + file_path.ToString()); + CHECK_FAIL_RETURN_UNEXPECTED( + file_path.Exists() && !file_path.IsDirectory(), + "Invalid file path, Qmnist data file: " + file_path.ToString() + " does not exist or is a directory."); MS_LOG(INFO) << DatasetName(true) << " operator found image file at " << file_path.ToString() << "."; } for (size_t i = 0; i < label_names_.size(); i++) { Path file_path(label_names_[i]); - CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(), - "Failed to find " + DatasetName() + " label file: " + file_path.ToString()); + CHECK_FAIL_RETURN_UNEXPECTED( + file_path.Exists() && !file_path.IsDirectory(), + "Invalid file path, Qmnist data file: " + file_path.ToString() + " does not exist or is a directory."); MS_LOG(INFO) << DatasetName(true) << " operator found label file at " << file_path.ToString() << "."; } @@ -189,7 +197,9 @@ Status QMnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *l RETURN_IF_NOT_OK(CheckImage(image_names_[index], image_reader, &num_images)); RETURN_IF_NOT_OK(CheckLabel(label_names_[index], label_reader, &num_labels)); CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels), - "Invalid data, num_images is not equal to num_labels. Ensure data file is not damaged."); + "Invalid data, num of images should be equal to num of labels loading from " + + folder_path_ + ", but got num of images: " + std::to_string(num_images) + + ", num of labels: " + std::to_string(num_labels) + "."); // The image size of the QMNIST dataset is fixed at [28,28] int64_t image_size = kQMnistImageRows * kQMnistImageCols; @@ -216,16 +226,16 @@ Status QMnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *l } (void)image_reader->read(images_buf.get(), image_size * num_images); if (image_reader->fail()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " image: " + image_names_[index] + - ", size:" + std::to_string(image_size * num_images) + - ". Ensure data file is not damaged."); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + std::to_string(image_size * num_images) + + " bytes from " + image_names_[index] + + ": the data file is damaged or the content is incomplete."); } // uint32_t use 4 bytes in memory (void)label_reader->read(reinterpret_cast(labels_buf.get()), label_length * num_labels * 4); if (label_reader->fail()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " label:" + label_names_[index] + - ", size: " + std::to_string(label_length * num_labels) + - ". Ensure data file is not damaged."); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + std::to_string(label_length * num_labels * 4) + + " bytes from " + label_names_[index] + + ": the data file is damaged or content is incomplete."); } TensorShape image_tensor_shape = TensorShape({kQMnistImageRows, kQMnistImageCols, 1}); TensorShape label_tensor_shape = TensorShape({kQMnistLabelLength}); @@ -258,23 +268,32 @@ Status QMnistOp::CheckLabel(const std::string &file_name, std::ifstream *label_r RETURN_UNEXPECTED_IF_NULL(label_reader); RETURN_UNEXPECTED_IF_NULL(num_labels); CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(), - "Invalid file, failed to open " + DatasetName() + " label file: " + file_name); + "Invalid file, failed to open " + file_name + ": the label file is permission denied."); int64_t label_len = label_reader->seekg(0, std::ios::end).tellg(); (void)label_reader->seekg(0, std::ios::beg); // The first 12 bytes of the label file are type, number and length - CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 12, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name); + CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 12, + "Invalid file, load " + file_name + + " failed: the first 12 bytes of the label file should be type, number and length, " + + "but got the first read bytes : " + std::to_string(label_len)); uint32_t magic_number; RETURN_IF_NOT_OK(ReadFromReader(label_reader, &magic_number)); CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kQMnistLabelFileMagicNumber, - "Invalid file, this is not the " + DatasetName() + " label file: " + file_name); + "Invalid label file, the number of labels loading from " + file_name + " should be " + + std::to_string(kQMnistLabelFileMagicNumber) + ", but got " + + std::to_string(magic_number) + "."); uint32_t num_items; RETURN_IF_NOT_OK(ReadFromReader(label_reader, &num_items)); uint32_t length; RETURN_IF_NOT_OK(ReadFromReader(label_reader, &length)); - CHECK_FAIL_RETURN_UNEXPECTED(length == kQMnistLabelLength, "Invalid data, length of labels is not equal to 8."); + CHECK_FAIL_RETURN_UNEXPECTED(length == kQMnistLabelLength, "Invalid data, length of every label loading from " + + file_name + " should be equal to 8, but got " + + std::to_string(length) + "."); CHECK_FAIL_RETURN_UNEXPECTED((label_len - 12) == num_items * kQMnistLabelLength * 4, - "Invalid data, number of labels is wrong."); + "Invalid data, the total bytes of labels loading from Qmnist label file: " + file_name + + " should be " + std::to_string(label_len - 12) + ", but got " + + std::to_string(num_items * kQMnistLabelLength * 4) + "."); *num_labels = num_items; return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc index 0e879b79d60..8a2e79b7764 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc @@ -98,7 +98,7 @@ void RandomDataOp::GenerateSchema() { std::make_unique(col_name, DataType(newType), TensorImpl::kFlexible, rank, new_shape.get()); Status rc = data_schema_->AddColumn(*new_col); - if (rc.IsError()) MS_LOG(ERROR) << "Failed to generate a schema. Message:" << rc; + if (rc.IsError()) MS_LOG(ERROR) << "[Internal ERROR] Failed to generate a schema. Message:" << rc; } } @@ -136,7 +136,8 @@ Status RandomDataOp::CreateRandomRow(TensorRow *new_row) { buf = std::make_unique(size_in_bytes); int ret_code = memset_s(buf.get(), size_in_bytes, random_byte, size_in_bytes); if (ret_code != 0) { - return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor."); + return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, + "[Internal ERROR] memset_s failed to set random bytes for a tensor."); } RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.Type(), buf.get(), &new_tensor)); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc index c2e7ca21540..874fa490c6a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.cc @@ -53,7 +53,7 @@ Status DistributedSamplerRT::InitSampler() { CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0, "Invalid parameter, num_samples must be greater than 0, but got " + std::to_string(num_samples_) + ".\n"); CHECK_FAIL_RETURN_UNEXPECTED( - num_rows_ > 0, "Invalid parameter, num_rows must be greater than 0, but got " + std::to_string(num_rows_) + ".\n"); + num_rows_ > 0, "[Internal ERROR] num_rows must be greater than 0, but got " + std::to_string(num_rows_) + ".\n"); CHECK_FAIL_RETURN_UNEXPECTED( device_id_ < num_devices_ && device_id_ >= 0 && num_rows_ > 0 && num_samples_ > 0, "Invalid parameter, num_shard must be greater than shard_id and greater than 0, got num_shard: " + @@ -96,7 +96,7 @@ Status DistributedSamplerRT::GetNextSample(TensorRow *out) { RETURN_UNEXPECTED_IF_NULL(out); if (cnt_ > samples_per_tensor_) { RETURN_STATUS_UNEXPECTED( - "Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" + + "[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" + std::to_string(cnt_) + ", samples_per_tensor(num_samples): " + std::to_string(samples_per_tensor_)); } else if (cnt_ == samples_per_tensor_ && (non_empty_ || !even_dist_)) { (*out) = TensorRow(TensorRow::kFlagEOE); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/mind_record_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/mind_record_sampler.cc index 0c8faab28d3..1f19e194f63 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/mind_record_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/mind_record_sampler.cc @@ -29,7 +29,7 @@ Status MindRecordSamplerRT::GetNextSample(TensorRow *out) { RETURN_UNEXPECTED_IF_NULL(out); if (next_id_ > num_samples_) { RETURN_STATUS_UNEXPECTED( - "Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " + + "[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " + std::to_string(next_id_) + ", num_samples_: " + std::to_string(num_samples_)); } else if (next_id_ == num_samples_) { (*out) = TensorRow(TensorRow::kFlagEOE); @@ -53,8 +53,8 @@ Status MindRecordSamplerRT::InitSampler() { if (!sample_ids_) { // Note, sample_ids_.empty() is okay and will just give no sample ids. RETURN_STATUS_UNEXPECTED( - "Init Sampler failed as sample_ids is empty, here ShardReader did not provide a valid sample ids vector via" - " MindRecordSamplerRT"); + "[Internal ERROR]Init Sampler failed as sample_ids is empty, here ShardReader did not provide a valid sample ids " + "vector via MindRecordSamplerRT."); } // Usually, the num samples is given from the user interface. In our case, that data is in mindrecord. diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc index 53b6066f3db..cc593cc047e 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/pk_sampler.cc @@ -44,7 +44,7 @@ Status PKSamplerRT::InitSampler() { // Compute that here for this case to find the total number of samples that are available to return. // (in this case, samples per class * total classes). if (samples_per_class_ > std::numeric_limits::max() / static_cast(labels_.size())) { - RETURN_STATUS_UNEXPECTED("Overflow in counting num_rows"); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] Overflow in counting num_rows"); } num_rows_ = samples_per_class_ * static_cast(labels_.size()); @@ -72,7 +72,7 @@ Status PKSamplerRT::GetNextSample(TensorRow *out) { RETURN_UNEXPECTED_IF_NULL(out); if (next_id_ > num_samples_ || num_samples_ == 0) { RETURN_STATUS_UNEXPECTED( - "Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " + + "[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " + std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_)); } else if (next_id_ == num_samples_) { (*out) = TensorRow(TensorRow::kFlagEOE); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.cc index 889c0cc3022..bf9bc313294 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/python_sampler.cc @@ -39,7 +39,7 @@ Status PythonSamplerRT::GetNextSample(TensorRow *out) { { py::gil_scoped_acquire gil_acquire; if (Py_IsInitialized() == 0) { - return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); + return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); } try { py::object py_ret = py_sampler_instance.attr("_get_indices")(); @@ -57,7 +57,8 @@ Status PythonSamplerRT::GetNextSample(TensorRow *out) { return Status(StatusCode::kMDPyFuncException, e.what()); } catch (const py::cast_error &e) { return Status(StatusCode::kMDPyFuncException, - "Invalid data, python sampler iterator should return an integer index."); + "Invalid data, Python sampler iterator should return an integer index, but error raised: " + + std::string(e.what())); } } (*out) = {sample_ids}; @@ -71,7 +72,7 @@ Status PythonSamplerRT::InitSampler() { return Status::OK(); } CHECK_FAIL_RETURN_UNEXPECTED( - num_rows_ > 0, "Invalid parameter, num_rows must be greater than 0, but got " + std::to_string(num_rows_)); + num_rows_ > 0, "[Internal ERROR] num_rows must be greater than 0, but got " + std::to_string(num_rows_)); // Special value of 0 for num_samples means that the user wants to sample the entire set of data. // If the user asked to sample more rows than exists in the dataset, adjust the num_samples accordingly. if (num_samples_ == 0 || num_samples_ > num_rows_) { @@ -80,12 +81,13 @@ Status PythonSamplerRT::InitSampler() { { py::gil_scoped_acquire gil_acquire; if (Py_IsInitialized() == 0) { - return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); + return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); } try { py_sampler_instance.attr("_handshake")(num_rows_, num_samples_); } catch (const py::error_already_set &e) { - return Status(StatusCode::kMDPyFuncException, e.what()); + return Status(StatusCode::kMDPyFuncException, + "[Internal ERROR] python sampler execute _handshake failed: " + std::string(e.what())); } } @@ -98,7 +100,7 @@ Status PythonSamplerRT::ResetSampler() { need_to_reset_ = false; py::gil_scoped_acquire gil_acquire; if (Py_IsInitialized() == 0) { - return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); + return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); } try { py_sampler_instance.attr("reset")(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc index d0233bd550a..6c94e44ea60 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/random_sampler.cc @@ -36,8 +36,9 @@ RandomSamplerRT::RandomSamplerRT(bool replacement, int64_t num_samples, bool res Status RandomSamplerRT::GetNextSample(TensorRow *out) { RETURN_UNEXPECTED_IF_NULL(out); if (next_id_ > num_samples_) { - RETURN_STATUS_UNEXPECTED("Sampler index must be less than or equal to num_samples(total rows in dataset), but got" + - std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_)); + RETURN_STATUS_UNEXPECTED( + "[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got" + + std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_)); } else if (next_id_ == num_samples_) { (*out) = TensorRow(TensorRow::kFlagEOE); } else { @@ -81,7 +82,7 @@ Status RandomSamplerRT::InitSampler() { } CHECK_FAIL_RETURN_UNEXPECTED( num_samples_ > 0 && num_rows_ > 0, - "Invalid parameter, num_samples and num_rows must be greater than 0, but got num_samples: " + + "[Internal ERROR] num_samples and num_rows must be greater than 0, but got num_samples: " + std::to_string(num_samples_) + ", num_rows: " + std::to_string(num_rows_)); samples_per_tensor_ = samples_per_tensor_ > num_samples_ ? num_samples_ : samples_per_tensor_; rnd_.seed(seed_); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc index a363504ff4c..4a6cefd9651 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc @@ -28,7 +28,7 @@ Status RandomAccessOp::GetNumRowsInDataset(int64_t *num) const { // Here, it is just a getter method to return the value. However, it is invalid if there is // not a value set for this count, so generate a failure if that is the case. if (num == nullptr || num_rows_ == -1) { - RETURN_STATUS_UNEXPECTED("Get num rows in Dataset failed, num_rows has not been set yet."); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] Get num rows in Dataset failed, num_rows has not been set yet."); } (*num) = num_rows_; return Status::OK(); @@ -55,7 +55,7 @@ Status SamplerRT::HandshakeRandomAccessOp(const RandomAccessOp *op) { RETURN_IF_NOT_OK(child_sampler->HandshakeRandomAccessOp(op)); } - CHECK_FAIL_RETURN_UNEXPECTED(op != nullptr, "RandomAccessOp init failed, as it is nullptr."); + CHECK_FAIL_RETURN_UNEXPECTED(op != nullptr, "[Internal ERROR] RandomAccessOp init failed, as it is nullptr."); // If there's a child sampler, set the row count to be it's sample count if (HasChildSampler()) { @@ -114,7 +114,7 @@ Status SamplerRT::GetAllIdsThenReset(py::array *data) { { py::gil_scoped_acquire gil_acquire; if (Py_IsInitialized() == 0) { - return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized"); + return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized"); } try { RETURN_IF_NOT_OK(sample_ids->GetDataAsNumpy(data)); @@ -127,7 +127,9 @@ Status SamplerRT::GetAllIdsThenReset(py::array *data) { #endif Status SamplerRT::SetNumSamples(int64_t num_samples) { - CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "Invalid parameter, num_samples must be greater than or equal to 0."); + CHECK_FAIL_RETURN_UNEXPECTED( + num_samples >= 0, + "Invalid parameter, 'num_samples' must be greater than or equal to 0, but got " + std::to_string(num_samples)); num_samples_ = num_samples; return Status::OK(); } @@ -161,13 +163,13 @@ Status SamplerRT::AddChild(std::shared_ptr child) { // Only samplers can be added, not any other DatasetOp. std::shared_ptr sampler = std::dynamic_pointer_cast(child); if (!sampler) { - std::string err_msg("Cannot add child, child is not a sampler object."); + std::string err_msg("[Internal ERROR] Cannot add child, child is not a sampler object."); RETURN_STATUS_UNEXPECTED(err_msg); } // Samplers can have at most 1 child. if (!child_.empty()) { - std::string err_msg("Cannot add child sampler, this sampler already has a child."); + std::string err_msg("[Internal ERROR] Cannot add child sampler, this sampler already has a child."); RETURN_STATUS_UNEXPECTED(err_msg); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.h index 5294f1fe1a1..be485a422bb 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.h @@ -42,7 +42,7 @@ class RandomAccessOp { // @param std::map> * map // @return Status The status code returned virtual Status GetClassIds(std::map> *map) const { - RETURN_STATUS_UNEXPECTED("GetClassIds needs to be override to support PK"); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] GetClassIds needs to be override to support PK."); } // default destructor diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc index 8ee0d2f6537..e9d56ace92c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.cc @@ -29,7 +29,7 @@ Status SequentialSamplerRT::GetNextSample(TensorRow *out) { RETURN_UNEXPECTED_IF_NULL(out); if (id_count_ > num_samples_) { RETURN_STATUS_UNEXPECTED( - "Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" + + "[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" + std::to_string(id_count_) + ", num_samples_: " + std::to_string(num_samples_)); } else if (id_count_ == num_samples_) { (*out) = TensorRow(TensorRow::kFlagEOE); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc index 378f2ce7b91..caec796643f 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc @@ -41,10 +41,9 @@ Status WeightedRandomSamplerRT::InitSampler() { if (num_samples_ == 0 || num_samples_ > num_rows_) { num_samples_ = num_rows_; } - CHECK_FAIL_RETURN_UNEXPECTED( - num_rows_ > 0 && num_samples_, - "Invalid parameter, num_samples and num_rows must be greater than 0, but got num_rows: " + - std::to_string(num_rows_) + ", num_samples: " + std::to_string(num_samples_)); + CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0 && num_samples_, + "[Internal ERROR] num_samples and num_rows must be greater than 0, but got num_rows: " + + std::to_string(num_rows_) + ", num_samples: " + std::to_string(num_samples_)); CHECK_FAIL_RETURN_UNEXPECTED(samples_per_tensor_ > 0, "Invalid parameter, samples_per_tensor(num_samples) must be greater than 0, but got " + std::to_string(samples_per_tensor_) + ".\n"); @@ -160,8 +159,9 @@ Status WeightedRandomSamplerRT::GetNextSample(TensorRow *out) { } if (genId >= num_rows_) { - RETURN_STATUS_UNEXPECTED("Generated indice is out of bound, expect range [0, num_data-1], got indice: " + - std::to_string(genId) + ", num_data: " + std::to_string(num_rows_ - 1)); + RETURN_STATUS_UNEXPECTED( + "[Internal ERROR] Generated indice is out of bound, expect range [0, num_data-1], got indice: " + + std::to_string(genId) + ", num_data: " + std::to_string(num_rows_ - 1)); } if (HasChildSampler()) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sbu_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sbu_op.cc index 12e684b5789..56353321ec4 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sbu_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sbu_op.cc @@ -76,7 +76,8 @@ Status SBUOp::ReadImageToTensor(const std::string &path, std::shared_ptr if (decode_ == true) { Status rc = Decode(*tensor, tensor); if (rc.IsError()) { - RETURN_STATUS_UNEXPECTED("Invalid data, failed to decode image: " + path); + RETURN_STATUS_UNEXPECTED("Invalid image, failed to decode image:" + path + + ", the image is damaged or permission denied."); } } return Status::OK(); @@ -129,18 +130,21 @@ Status SBUOp::PrepareData() { Path root_dir(real_folder_path.value()); url_path_ = root_dir / url_file_name; - CHECK_FAIL_RETURN_UNEXPECTED(url_path_.Exists() && !url_path_.IsDirectory(), - "Invalid file, failed to find SBU url file: " + url_path_.ToString()); + CHECK_FAIL_RETURN_UNEXPECTED( + url_path_.Exists() && !url_path_.IsDirectory(), + "Invalid file, SBU url file: " + url_path_.ToString() + " does not exist or is a directory."); MS_LOG(INFO) << "SBU operator found url file " << url_path_.ToString() << "."; caption_path_ = root_dir / caption_file_name; - CHECK_FAIL_RETURN_UNEXPECTED(caption_path_.Exists() && !caption_path_.IsDirectory(), - "Invalid file, failed to find SBU caption file: " + caption_path_.ToString()); + CHECK_FAIL_RETURN_UNEXPECTED( + caption_path_.Exists() && !caption_path_.IsDirectory(), + "Invalid file, SBU caption file: " + caption_path_.ToString() + " does not exist or is a directory."); MS_LOG(INFO) << "SBU operator found caption file " << caption_path_.ToString() << "."; image_folder_ = root_dir / image_folder_name; - CHECK_FAIL_RETURN_UNEXPECTED(image_folder_.Exists() && image_folder_.IsDirectory(), - "Invalid folder, failed to find SBU image folder: " + image_folder_.ToString()); + CHECK_FAIL_RETURN_UNEXPECTED( + image_folder_.Exists() && image_folder_.IsDirectory(), + "Invalid folder, SBU image folder:" + image_folder_.ToString() + " does not exist or is not a directory."); MS_LOG(INFO) << "SBU operator found image folder " << image_folder_.ToString() << "."; std::ifstream url_file_reader; @@ -149,10 +153,11 @@ Status SBUOp::PrepareData() { url_file_reader.open(url_path_.ToString(), std::ios::in); caption_file_reader.open(caption_path_.ToString(), std::ios::in); - CHECK_FAIL_RETURN_UNEXPECTED(url_file_reader.is_open(), - "Invalid file, failed to open SBU url file: " + url_path_.ToString()); - CHECK_FAIL_RETURN_UNEXPECTED(caption_file_reader.is_open(), - "Invalid file, failed to open SBU caption file: " + caption_path_.ToString()); + CHECK_FAIL_RETURN_UNEXPECTED(url_file_reader.is_open(), "Invalid file, failed to open " + url_path_.ToString() + + ": the SBU url file is permission denied."); + CHECK_FAIL_RETURN_UNEXPECTED( + caption_file_reader.is_open(), + "Invalid file, failed to open " + caption_path_.ToString() + ": the SBU caption file is permission denied."); Status rc = GetAvailablePairs(url_file_reader, caption_file_reader); url_file_reader.close(); @@ -172,8 +177,8 @@ Status SBUOp::GetAvailablePairs(std::ifstream &url_file_reader, std::ifstream &c while (std::getline(url_file_reader, url_line) && std::getline(caption_file_reader, caption_line)) { CHECK_FAIL_RETURN_UNEXPECTED( (url_line.empty() && caption_line.empty()) || (!url_line.empty() && !caption_line.empty()), - "Invalid data, SBU url and caption file are mismatched: " + url_path_.ToString() + " and " + - caption_path_.ToString()); + "Invalid data, SBU url: " + url_path_.ToString() + " and caption file: " + caption_path_.ToString() + + " load empty data at line: " + std::to_string(line_num) + "."); if (!url_line.empty() && !caption_line.empty()) { line_num++; RETURN_IF_NOT_OK(this->ParsePair(url_line, caption_line)); @@ -182,7 +187,8 @@ Status SBUOp::GetAvailablePairs(std::ifstream &url_file_reader, std::ifstream &c image_caption_pairs_.shrink_to_fit(); - CHECK_FAIL_RETURN_UNEXPECTED(image_caption_pairs_.size() > 0, "No valid images in " + image_folder_.ToString()); + CHECK_FAIL_RETURN_UNEXPECTED(image_caption_pairs_.size() > 0, + "Invalid data, no valid images in " + image_folder_.ToString() + ", check SBU dataset."); // base field of RandomAccessOp num_rows_ = image_caption_pairs_.size(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc index a0fd0d0d9ed..7d2a36f2f70 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc @@ -80,13 +80,14 @@ Status TextFileOp::LoadTensor(const std::string &line, TensorRow *out_row) { Status TextFileOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) { auto realpath = FileUtils::GetRealPath(file.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " get real path failed, path=" << file; - RETURN_STATUS_UNEXPECTED("Invalid file, " + DatasetName() + " get real path failed, path=" + file); + MS_LOG(ERROR) << "Invalid file path, " << file << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, " + file + " does not exist."); } std::ifstream handle(realpath.value()); if (!handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + file); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open text:" + file + + ", the file is damaged or permission denied."); } int64_t rows_total = 0; @@ -170,13 +171,13 @@ Status TextFileOp::FillIOBlockQueue(const std::vector &i_keys) { int64_t CountTotalRows(const std::string &file) { auto realpath = FileUtils::GetRealPath(file.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file; + MS_LOG(ERROR) << "Invalid file, " << file << " does not exist."; return 0; } std::ifstream handle(realpath.value()); if (!handle.is_open()) { - MS_LOG(ERROR) << "Invalid file, failed to open file: " << file; + MS_LOG(ERROR) << "Invalid file, failed to open text file:" << file << ", the file is damaged or permission denied."; return 0; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc index e1358ff909a..dfd958131e0 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc @@ -44,7 +44,7 @@ const int64_t kTFRecordFileLimit = 0x140000000; bool TFReaderOp::ValidateFirstRowCrc(const std::string &filename) { auto realpath = FileUtils::GetRealPath(filename.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filename; + MS_LOG(ERROR) << "Invalid file path, " << filename << " does not exist."; return false; } @@ -126,7 +126,7 @@ Status TFReaderOp::Init() { } if (total_rows_ < 0) { RETURN_STATUS_UNEXPECTED( - "Invalid parameter, num_samples or num_rows for TFRecordDataset must be greater than 0, but got: " + + "[Internal ERROR] num_samples or num_rows for TFRecordDataset must be greater than 0, but got: " + std::to_string(total_rows_)); } @@ -267,14 +267,14 @@ Status TFReaderOp::FillIOBlockNoShuffle() { Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, int64_t end_offset, int32_t worker_id) { auto realpath = FileUtils::GetRealPath(filename.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filename; - RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + filename); + MS_LOG(ERROR) << "Invalid file path, " << filename << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, " + filename + " does not exist."); } std::ifstream reader; reader.open(realpath.value()); if (!reader) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + filename); + RETURN_STATUS_UNEXPECTED("Invalid file, " + filename + " open failed: permission denied!"); } int64_t rows_read = 0; @@ -304,7 +304,7 @@ Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, i if (start_offset == kInvalidOffset || (rows_total >= start_offset && rows_total < end_offset)) { dataengine::Example tf_file; if (!tf_file.ParseFromString(serialized_example)) { - std::string errMsg = "Invalid file, failed to parse tfrecord file : " + filename; + std::string errMsg = "Failed to parse tfrecord file: " + filename + ", make sure protobuf version is suitable."; MS_LOG(DEBUG) << errMsg + ", details of string: " << serialized_example; RETURN_STATUS_UNEXPECTED(errMsg); } @@ -333,7 +333,8 @@ Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, TensorRow *ou const google::protobuf::Map &feature_map = example_features.feature(); auto iter_column = feature_map.find(current_col.Name()); if (iter_column == feature_map.end()) { - RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.Name() + " does not exist."); + RETURN_STATUS_UNEXPECTED("Invalid columns_list, column name: " + current_col.Name() + + " does not exist in tfrecord file, check tfrecord files."); } const dataengine::Feature &column_values_list = iter_column->second; RETURN_IF_NOT_OK(LoadFeature(out_row, column_values_list, current_col, col)); @@ -383,11 +384,13 @@ Status TFReaderOp::LoadFeature(TensorRow *tensor_row, const dataengine::Feature break; } case dataengine::Feature::KindCase::KIND_NOT_SET: { - std::string err_msg = "Invalid data, column type in tf record file must be uint8, int64 or float32."; + std::string err_msg = + "Unrecognized datatype, column type in tfrecord file must be uint8, int64 or float32, check tfrecord file."; RETURN_STATUS_UNEXPECTED(err_msg); } default: { - std::string err_msg = "Invalid data, column type in tf record file must be uint8, int64 or float32."; + std::string err_msg = + "Unrecognized datatype, column type in tfrecord file must be uint8, int64 or float32, check tfrecord file."; RETURN_STATUS_UNEXPECTED(err_msg); } } @@ -404,8 +407,8 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng // Must be single byte type for each element! if (current_col.Type() != DataType::DE_UINT8 && current_col.Type() != DataType::DE_INT8 && current_col.Type() != DataType::DE_STRING) { - std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + - ", data type should be int8, uint8 or string, but got " + current_col.Type().ToString(); + std::string err_msg = "Invalid column type, the column type of " + current_col.Name() + + " should be int8, uint8 or string, but got " + current_col.Type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -439,7 +442,8 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng for (int i = 1; i < cur_shape.Size(); ++i) { if (cur_shape[i] == TensorShape::kDimUnknown) { std::string err_msg = - "Invalid data, more than one unknown dimension in the shape of column: " + current_col.Name(); + "Invalid data dimension, only one dimension shape supported is -1, but the 0th and the" + + std::to_string(i) + "th dimension shape of " + current_col.Name() + " are both -1."; RETURN_STATUS_UNEXPECTED(err_msg); } new_pad_size *= cur_shape[i]; @@ -447,10 +451,10 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng pad_size = new_pad_size; } else { if (cur_shape.known() && cur_shape.NumOfElements() != max_size) { - std::string err_msg = "Invalid data, shape in schema's column '" + current_col.Name() + "' is incorrect." + - "\nshape received: " + cur_shape.ToString() + - "\ntotal elements in shape received: " + std::to_string(cur_shape.NumOfElements()) + - "\nexpected total elements in shape: " + std::to_string(max_size); + std::string err_msg = "Data dimensions of '" + current_col.Name() + + "' do not match, the expected total elements of shape " + cur_shape.ToString() + + " should be " + std::to_string(max_size) + ", but got " + + std::to_string(cur_shape.NumOfElements()); RETURN_STATUS_UNEXPECTED(err_msg); } } @@ -469,8 +473,8 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng // KFloatList can only map to DE types: // DE_FLOAT32 if (current_col.Type() != DataType::DE_FLOAT32) { - std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + - ", data type should be string, but got " + current_col.Type().ToString(); + std::string err_msg = "Invalid column type, the column type of " + current_col.Name() + + " should be string, but got " + current_col.Type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -507,9 +511,9 @@ Status TFReaderOp::LoadIntListSwitch(const ColDescriptor ¤t_col, const dat } else if (current_col.Type() == DataType::DE_INT8) { RETURN_IF_NOT_OK(LoadIntList(current_col, column_values_list, num_elements, tensor)); } else { - std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.Name() + - ", data type should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8" + - ", but got " + current_col.Type().ToString(); + std::string err_msg = "Invalid column type, the column type of " + current_col.Name() + + " should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8, but got " + + current_col.Type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -522,8 +526,8 @@ template Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, int32_t *num_elements, std::shared_ptr *tensor) { if (!(current_col.Type().IsInt())) { - std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + - ", data type should be int, but got " + current_col.Type().ToString(); + std::string err_msg = "Invalid column type, the column type of " + current_col.Name() + " should be int, but got " + + current_col.Type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -551,8 +555,8 @@ Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengin Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector columns_to_load) { auto realpath = FileUtils::GetRealPath(tf_file.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << tf_file; - RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + tf_file); + MS_LOG(ERROR) << "Invalid file path, " << tf_file << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, " + tf_file + " does not exist."); } std::ifstream reader; @@ -572,7 +576,8 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vectorfirst; @@ -609,10 +614,12 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector> async_results; if (threads <= 0) { - RETURN_STATUS_UNEXPECTED("Invalid data, the threads of TFReader should be greater than zero, but got zero."); + RETURN_STATUS_UNEXPECTED( + "Invalid threads number, the threads number of TFReader should be greater than zero, but got " + + std::to_string(threads) + "."); } int64_t chunk_size = filenames.size() / threads; int64_t remainder = filenames.size() % threads; @@ -672,7 +681,7 @@ Status TFReaderOp::CountTotalRows(int64_t *out_total_rows, const std::vector &file for (int i = begin; i < end; i++) { auto realpath = FileUtils::GetRealPath(filenames[i].data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filenames[i]; + MS_LOG(ERROR) << "Invalid file path, " << filenames[i] << " does not exist."; continue; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/usps_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/usps_op.cc index 9e8b477240d..8218d1aa876 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/usps_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/usps_op.cc @@ -107,7 +107,7 @@ int64_t USPSOp::CountRows(const std::string &data_file) { std::ifstream data_file_reader; data_file_reader.open(data_file, std::ios::in); if (!data_file_reader.is_open()) { - MS_LOG(ERROR) << "Invalid file, failed to open file: " << data_file; + MS_LOG(ERROR) << "Invalid file, failed to open " << data_file << ": the file is permission denied."; return 0; } @@ -124,7 +124,8 @@ int64_t USPSOp::CountRows(const std::string &data_file) { Status USPSOp::GetFiles() { auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data()); - CHECK_FAIL_RETURN_UNEXPECTED(real_dataset_dir.has_value(), "Get real path failed: " + dataset_dir_); + CHECK_FAIL_RETURN_UNEXPECTED(real_dataset_dir.has_value(), + "Invalid file path, USPS dataset dir: " + dataset_dir_ + " does not exist."); Path root_dir(real_dataset_dir.value()); const Path train_file_name("usps"); @@ -144,16 +145,18 @@ Status USPSOp::GetFiles() { if (use_train) { Path train_path = root_dir / train_file_name; - CHECK_FAIL_RETURN_UNEXPECTED(train_path.Exists() && !train_path.IsDirectory(), - "Invalid file, failed to find USPS train data file: " + train_path.ToString()); + CHECK_FAIL_RETURN_UNEXPECTED( + train_path.Exists() && !train_path.IsDirectory(), + "Invalid file, USPS dataset train file: " + train_path.ToString() + " does not exist or is a directory."); data_files_list_.emplace_back(train_path.ToString()); MS_LOG(INFO) << "USPS operator found train data file " << train_path.ToString() << "."; } if (use_test) { Path test_path = root_dir / test_file_name; - CHECK_FAIL_RETURN_UNEXPECTED(test_path.Exists() && !test_path.IsDirectory(), - "Invalid file, failed to find USPS test data file: " + test_path.ToString()); + CHECK_FAIL_RETURN_UNEXPECTED( + test_path.Exists() && !test_path.IsDirectory(), + "Invalid file, USPS dataset test file: " + test_path.ToString() + " does not exist or is a directory."); data_files_list_.emplace_back(test_path.ToString()); MS_LOG(INFO) << "USPS operator found test data file " << test_path.ToString() << "."; } @@ -163,7 +166,8 @@ Status USPSOp::GetFiles() { Status USPSOp::LoadFile(const std::string &data_file, int64_t start_offset, int64_t end_offset, int32_t worker_id) { std::ifstream data_file_reader(data_file); if (!data_file_reader.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + data_file); + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open USPS dataset file: " + data_file + + ", the file is permission denied."); } int64_t rows_total = 0; @@ -210,8 +214,8 @@ Status USPSOp::LoadTensor(std::string *line, TensorRow *trow) { auto images_buffer = std::make_unique(kUSPSImageSize); auto labels_buffer = std::make_unique(1); if (images_buffer == nullptr || labels_buffer == nullptr) { - MS_LOG(ERROR) << "Failed to allocate memory for USPS buffer."; - RETURN_STATUS_UNEXPECTED("Failed to allocate memory for USPS buffer."); + MS_LOG(ERROR) << "[Internal ERROR] Failed to allocate memory for USPS buffer."; + RETURN_STATUS_UNEXPECTED("[Internal ERROR] Failed to allocate memory for USPS buffer."); } RETURN_IF_NOT_OK(this->ParseLine(line, images_buffer, labels_buffer)); @@ -245,10 +249,12 @@ Status USPSOp::ParseLine(std::string *line, const std::unique_ptrerase(0, pos + 1); } - CHECK_FAIL_RETURN_UNEXPECTED(split_num == (kUSPSImageSize + 1), "Invalid data, USPS data file is corrupted."); + CHECK_FAIL_RETURN_UNEXPECTED(split_num == (kUSPSImageSize + 1), + "Invalid data, the number of split characters ':' in USPS data file is corrupted, " + "should be " + + std::to_string(kUSPSImageSize + 1) + ", but got " + std::to_string(split_num) + "."); return Status::OK(); } @@ -274,7 +283,7 @@ Status USPSOp::CalculateNumRowsPerShard() { } std::string file_list = ss.str(); RETURN_STATUS_UNEXPECTED( - "Invalid data, USPSDataset API can't read the data file (interface mismatch or no data found). " + "Invalid data, 'USPSDataset' API can't read the data file (interface mismatch or no data found). " "Check file: " + file_list); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc index 9962de17326..e8c2a50c23c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc @@ -118,14 +118,15 @@ Status VOCOp::ParseImageIds() { auto realpath = FileUtils::GetRealPath(image_sets_file.data()); if (!realpath.has_value()) { - MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << image_sets_file; - RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + image_sets_file); + MS_LOG(ERROR) << "Invalid file path, " << image_sets_file << " does not exist."; + RETURN_STATUS_UNEXPECTED("Invalid file path, " + image_sets_file + " does not exist."); } std::ifstream in_file; in_file.open(realpath.value()); if (in_file.fail()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + image_sets_file); + RETURN_STATUS_UNEXPECTED("Invalid ImageSets file, failed to open ImageSets file: " + image_sets_file + + ", the file is damaged or permission denied."); } std::string id; while (getline(in_file, id)) { @@ -187,28 +188,30 @@ Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float std::string invalid_bbox = "{" + std::to_string(static_cast(xmin)) + ", " + std::to_string(static_cast(ymin)) + ", " + std::to_string(static_cast(xmax)) + ", " + std::to_string(static_cast(ymax)) + "}"; - RETURN_STATUS_UNEXPECTED("Invalid bndbox: " + invalid_bbox + " found in " + path); + RETURN_STATUS_UNEXPECTED("Invalid bndbox, the coordinate of bndbox in " + path + + " should be greater than 0, but got " + invalid_bbox); } return Status::OK(); } Status VOCOp::ParseAnnotationBbox(const std::string &path) { if (!Path(path).Exists()) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + path); + RETURN_STATUS_UNEXPECTED("Invalid file path, " + path + " does not exist."); } Annotation annotation; XMLDocument doc; XMLError e = doc.LoadFile(common::SafeCStr(path)); if (e != XMLError::XML_SUCCESS) { - RETURN_STATUS_UNEXPECTED("Invalid file, failed to load xml file: " + path); + RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load " + path + ": the xml file is damaged or incorrect format."); } XMLElement *root = doc.RootElement(); if (root == nullptr) { - RETURN_STATUS_UNEXPECTED("Invalid data, failed to load root element for xml file."); + RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load root element of " + path + + ": the format of xml file is incorrect."); } XMLElement *object = root->FirstChildElement("object"); if (object == nullptr) { - RETURN_STATUS_UNEXPECTED("Invalid data, no object found in " + path); + RETURN_STATUS_UNEXPECTED("Invalid xml, the node of object is missing in " + path + "."); } while (object != nullptr) { std::string label_name; @@ -226,7 +229,7 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) { ParseNodeValue(bbox_node, "ymax", &ymax); RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path)); } else { - RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path); + RETURN_STATUS_UNEXPECTED("Invalid xml, the node of bndbox is missing in " + path); } if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 && @@ -254,7 +257,8 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co if (decode_ == true) { Status rc = Decode(*tensor, tensor); if (rc.IsError()) { - RETURN_STATUS_UNEXPECTED("Invalid data, failed to decode image: " + path); + RETURN_STATUS_UNEXPECTED("Invalid image, failed to decode " + path + + ": the image is damaged or permission denied."); } } return Status::OK(); @@ -280,7 +284,7 @@ Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) { } CHECK_FAIL_RETURN_UNEXPECTED( item.second.size() == 6, - "Invalid parameter, annotation only support 6 parameters, but got " + std::to_string(item.second.size())); + "[Internal ERROR] annotation only support 6 parameters, but got " + std::to_string(item.second.size())); std::vector tmp_bbox = {(item.second)[0], (item.second)[1], (item.second)[2], (item.second)[3]}; bbox_data.insert(bbox_data.end(), tmp_bbox.begin(), tmp_bbox.end()); @@ -328,8 +332,8 @@ Status VOCOp::GetClassIndexing(std::vector dir_itr = Path::DirIterator::OpenDirectory(&dir); RETURN_UNEXPECTED_IF_NULL(dir_itr); @@ -101,8 +101,9 @@ Status YesNoOp::Split(const std::string &line, std::vector *split_num) split_num->emplace_back(stoi(split[i])); } } catch (const std::exception &e) { - MS_LOG(ERROR) << "Converting char to int confront with an error in function stoi()."; - RETURN_STATUS_UNEXPECTED("Converting char to int confront with an error in function stoi()."); + MS_LOG(ERROR) << "[Internal ERROR] Converting char to int confront with an error in function stoi: " << e.what(); + RETURN_STATUS_UNEXPECTED("[Internal ERROR] Converting char to int confront with an error in function stoi: " + + std::string(e.what())); } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc index b29c980eb32..455745e55cb 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/take_op.cc @@ -38,7 +38,7 @@ void TakeOp::Print(std::ostream &out, bool show_all) const { } } -Status TakeOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); } +Status TakeOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] TakeOp is an inlined operator."); } Status TakeOp::GetNextRow(TensorRow *row) { RETURN_UNEXPECTED_IF_NULL(row); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc index 6a12db071ce..39b6fa753b5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc @@ -101,9 +101,7 @@ Status ZipOp::ComputeColMap() { int32_t old_id = pair.second; // check if name already exists in column name descriptor if (column_name_id_map_.count(name) == 1) { - RETURN_STATUS_UNEXPECTED("Invalid parameter, key: " + name + - " already exists when zipping datasets. Check for duplicate key names in different " - "dataset."); + RETURN_STATUS_UNEXPECTED("Invalid data, duplicate column " + name + " already exists when zipping datasets."); } column_name_id_map_[name] = old_id + colsCurrent; } @@ -115,7 +113,7 @@ Status ZipOp::ComputeColMap() { return Status::OK(); } -Status ZipOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); } +Status ZipOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ZipOp is an inlined operator."); } Status ZipOp::GetNextRow(TensorRow *row) { RETURN_UNEXPECTED_IF_NULL(row); diff --git a/tests/ut/python/dataset/test_datasets_cifarop.py b/tests/ut/python/dataset/test_datasets_cifarop.py index b478b435f79..4dfbd1ea8f3 100644 --- a/tests/ut/python/dataset/test_datasets_cifarop.py +++ b/tests/ut/python/dataset/test_datasets_cifarop.py @@ -210,7 +210,7 @@ def test_cifar10_exception(): with pytest.raises(ValueError, match=error_msg_6): ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=256) - error_msg_7 = "no .bin files found" + error_msg_7 = r"cifar\(.bin\) files are missing" with pytest.raises(RuntimeError, match=error_msg_7): ds1 = ds.Cifar10Dataset(NO_BIN_DIR) for _ in ds1.__iter__(): @@ -360,7 +360,7 @@ def test_cifar100_exception(): with pytest.raises(ValueError, match=error_msg_6): ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=256) - error_msg_7 = "no .bin files found" + error_msg_7 = r"cifar\(.bin\) files are missing" with pytest.raises(RuntimeError, match=error_msg_7): ds1 = ds.Cifar100Dataset(NO_BIN_DIR) for _ in ds1.__iter__(): diff --git a/tests/ut/python/dataset/test_datasets_coco.py b/tests/ut/python/dataset/test_datasets_coco.py index 033a2732333..2d814ec7aa5 100644 --- a/tests/ut/python/dataset/test_datasets_coco.py +++ b/tests/ut/python/dataset/test_datasets_coco.py @@ -300,7 +300,7 @@ def test_coco_case_exception(): pass assert False except RuntimeError as e: - assert "required node not found in JSON" in str(e) + assert "the attribute of 'images' is missing" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_CATEGORY_ID_FILE, task="Detection") @@ -308,7 +308,7 @@ def test_coco_case_exception(): pass assert False except RuntimeError as e: - assert "category_id can't find in categories" in str(e) + assert "the attribute of 'category_id': 7 is missing" in str(e) try: data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_FILE, task="Detection") @@ -316,7 +316,7 @@ def test_coco_case_exception(): pass assert False except RuntimeError as e: - assert "failed to open JSON file" in str(e) + assert "Invalid annotation file, Coco Dataset annotation file:" in str(e) try: sampler = ds.PKSampler(3) diff --git a/tests/ut/python/dataset/test_datasets_csv.py b/tests/ut/python/dataset/test_datasets_csv.py index 5cadb21c969..432d9d5469e 100644 --- a/tests/ut/python/dataset/test_datasets_csv.py +++ b/tests/ut/python/dataset/test_datasets_csv.py @@ -239,7 +239,7 @@ def test_csv_dataset_exception(): with pytest.raises(Exception) as err: for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): pass - assert "failed to parse file" in str(err.value) + assert "failed to parse" in str(err.value) TEST_FILE1 = '../data/dataset/testCSV/quoted.csv' def exception_func(item): diff --git a/tests/ut/python/dataset/test_datasets_emnist.py b/tests/ut/python/dataset/test_datasets_emnist.py index 99d3ee29cdc..76703151f2c 100644 --- a/tests/ut/python/dataset/test_datasets_emnist.py +++ b/tests/ut/python/dataset/test_datasets_emnist.py @@ -359,7 +359,6 @@ def test_emnist_exception(): with pytest.raises(RuntimeError, match=error_msg_8): data = ds.EMnistDataset(DATA_DIR, "mnist", "train") data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) - data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data.__iter__(): pass with pytest.raises(RuntimeError, match=error_msg_8): diff --git a/tests/ut/python/dataset/test_datasets_generator.py b/tests/ut/python/dataset/test_datasets_generator.py index 60f0a8adc35..0c9ec382eef 100644 --- a/tests/ut/python/dataset/test_datasets_generator.py +++ b/tests/ut/python/dataset/test_datasets_generator.py @@ -638,7 +638,7 @@ def test_generator_error_2(): for _ in data1: pass print("========", str(info.value)) - assert "Generator should return a tuple of NumPy arrays" in str(info.value) + assert "'GeneratorDataset' should return a tuple of NumPy arrays" in str(info.value) def test_generator_error_3(): @@ -663,7 +663,8 @@ def test_generator_error_4(): for _ in data1: pass - assert "Unexpected error. Result of a tensorOp doesn't match output column names" in str(info.value) + assert "the number of columns returned in 'map' operations should match the number of 'output_columns'"\ + in str(info.value) def test_generator_sequential_sampler(): diff --git a/tests/ut/python/dataset/test_datasets_manifestop.py b/tests/ut/python/dataset/test_datasets_manifestop.py index 2d3667b41ee..34315617ba3 100644 --- a/tests/ut/python/dataset/test_datasets_manifestop.py +++ b/tests/ut/python/dataset/test_datasets_manifestop.py @@ -167,7 +167,7 @@ def test_manifest_dataset_exception(): pass assert False except RuntimeError as e: - assert "Invalid data, 'source' is not found in Manifest file" in str(e) + assert "Invalid manifest file, 'source' is missing in" in str(e) NO_USAGE_DATA_FILE = "../data/dataset/testManifestData/invalidNoUsage.manifest" try: @@ -176,7 +176,7 @@ def test_manifest_dataset_exception(): pass assert False except RuntimeError as e: - assert "Invalid data, 'usage' is not found in Manifest file" in str(e) + assert "Invalid manifest file, 'usage' is missing in" in str(e) if __name__ == '__main__': diff --git a/tests/ut/python/dataset/test_datasets_tfrecord.py b/tests/ut/python/dataset/test_datasets_tfrecord.py index f44ca8af6e8..9f240eeeb2c 100644 --- a/tests/ut/python/dataset/test_datasets_tfrecord.py +++ b/tests/ut/python/dataset/test_datasets_tfrecord.py @@ -307,7 +307,7 @@ def test_tf_wrong_schema(): pass except RuntimeError as e: exception_occurred = True - assert "shape in schema's column 'image' is incorrect" in str(e) + assert "Data dimensions of 'image' do not match" in str(e) assert exception_occurred, "test_tf_wrong_schema failed." @@ -318,7 +318,7 @@ def test_tfrecord_invalid_columns(): data = ds.TFRecordDataset(FILES, columns_list=invalid_columns_list) with pytest.raises(RuntimeError) as info: _ = data.create_dict_iterator(num_epochs=1, output_numpy=True).__next__() - assert "Invalid data, failed to find column name: not_exist" in str(info.value) + assert "Invalid columns_list, tfrecord file failed to find column name: not_exist" in str(info.value) def test_tfrecord_exception(): diff --git a/tests/ut/python/dataset/test_datasets_usps.py b/tests/ut/python/dataset/test_datasets_usps.py index 3ce96f5b610..4ed5e88070e 100644 --- a/tests/ut/python/dataset/test_datasets_usps.py +++ b/tests/ut/python/dataset/test_datasets_usps.py @@ -214,12 +214,12 @@ def test_usps_exception(): for _ in test_data.__iter__(): pass - error_msg_9 = "failed to find USPS train data file" + error_msg_9 = "usps does not exist or is a directory" with pytest.raises(RuntimeError, match=error_msg_9): train_data = ds.USPSDataset(WRONG_DIR, "train") for _ in train_data.__iter__(): pass - error_msg_10 = "failed to find USPS test data file" + error_msg_10 = "usps.t does not exist or is a directory" with pytest.raises(RuntimeError, match=error_msg_10): test_data = ds.USPSDataset(WRONG_DIR, "test") for _ in test_data.__iter__(): diff --git a/tests/ut/python/dataset/test_datasets_voc.py b/tests/ut/python/dataset/test_datasets_voc.py index c73dbd15ee4..c372e498a70 100644 --- a/tests/ut/python/dataset/test_datasets_voc.py +++ b/tests/ut/python/dataset/test_datasets_voc.py @@ -240,7 +240,7 @@ def test_voc_exception(): pass assert False except RuntimeError as e: - assert "Invalid bndbox: {321, 121, 421, 120}" in str(e) + assert "should be greater than 0, but got {321, 121, 421, 120}" in str(e) def exception_func(item): raise Exception("Error occur!") diff --git a/tests/ut/python/dataset/test_map_offload.py b/tests/ut/python/dataset/test_map_offload.py index 8f71969c93e..d0da8e5d1c7 100644 --- a/tests/ut/python/dataset/test_map_offload.py +++ b/tests/ut/python/dataset/test_map_offload.py @@ -68,6 +68,9 @@ def test_auto_offload(): dataset_auto_enabled.create_tuple_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(img_0, img_1) + # Need to turn off here or subsequent test cases will fail. + ds.config.set_auto_offload(False) + def test_offload_concat_dataset_1(): """ diff --git a/tests/ut/python/dataset/test_var_batch_map.py b/tests/ut/python/dataset/test_var_batch_map.py index 854aafb688d..f6033c11d7b 100644 --- a/tests/ut/python/dataset/test_var_batch_map.py +++ b/tests/ut/python/dataset/test_var_batch_map.py @@ -369,9 +369,12 @@ def test_multi_col_map(): # test exceptions assert "output_columns with value 233 is not of type" in batch_map_config(2, 2, split_col, ["col2"], 233) assert "column_order with value 233 is not of type" in batch_map_config(2, 2, split_col, ["col2"], ["col1"], 233) - assert "output_columns in batch is not set correctly" in batch_map_config(2, 2, split_col, ["col2"], ["col1"]) - assert "Incorrect number of columns" in batch_map_config(2, 2, split_col, ["col2"], ["col3", "col4", "col5"]) - assert "col-1 doesn't exist" in batch_map_config(2, 2, split_col, ["col-1"], ["col_x", "col_y"]) + assert "columns that are not involved in 'per_batch_map' should not be in output_columns"\ + in batch_map_config(2, 2, split_col, ["col2"], ["col1"]) + assert "the number of columns returned in 'per_batch_map' function should be 3"\ + in batch_map_config(2, 2, split_col, ["col2"], ["col3", "col4", "col5"]) + assert "'col-1' of 'input_columns' doesn't exist"\ + in batch_map_config(2, 2, split_col, ["col-1"], ["col_x", "col_y"]) def test_exceptions_2(): @@ -379,16 +382,16 @@ def test_exceptions_2(): for i in range(num): yield (np.array([i]),) - def simple_copy(colList, batchInfo): - return ([np.copy(arr) for arr in colList],) + def simple_copy(col_list, batch_info): + return ([np.copy(arr) for arr in col_list],) - def concat_copy(colList, batchInfo): + def concat_copy(col_list, batch_info): # this will duplicate the number of rows returned, which would be wrong! - return ([np.copy(arr) for arr in colList] * 2,) + return ([np.copy(arr) for arr in col_list] * 2,) - def shrink_copy(colList, batchInfo): + def shrink_copy(col_list, batch_info): # this will duplicate the number of rows returned, which would be wrong! - return ([np.copy(arr) for arr in colList][0:int(len(colList) / 2)],) + return ([np.copy(arr) for arr in col_list][0:int(len(col_list) / 2)],) def test_exceptions_config(gen_num, batch_size, in_cols, per_batch_map): data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).batch(batch_size, input_columns=in_cols, @@ -401,9 +404,9 @@ def test_exceptions_2(): return str(e) # test exception where column name is incorrect - assert "col:num1 doesn't exist" in test_exceptions_config(4, 2, ["num1"], simple_copy) - assert "expects: 2 rows returned from per_batch_map, got: 4" in test_exceptions_config(4, 2, ["num"], concat_copy) - assert "expects: 4 rows returned from per_batch_map, got: 2" in test_exceptions_config(4, 4, ["num"], shrink_copy) + assert "'num1' of 'input_columns' doesn't exist" in test_exceptions_config(4, 2, ["num1"], simple_copy) + assert "expects: 2 rows returned from 'per_batch_map', got: 4" in test_exceptions_config(4, 2, ["num"], concat_copy) + assert "expects: 4 rows returned from 'per_batch_map', got: 2" in test_exceptions_config(4, 4, ["num"], shrink_copy) if __name__ == '__main__':