optimize datasetops error msg

This commit is contained in:
xiefangqi 2021-11-23 17:34:46 +08:00
parent 2384f31ad0
commit f74b5cea0c
75 changed files with 796 additions and 557 deletions

View File

@ -90,7 +90,7 @@ Status BarrierOp::blockCond() {
{
py::gil_scoped_acquire gil_acquire;
if (Py_IsInitialized() == 0) {
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
}
// we have condition name, however the flexibility is in python today
try {
@ -99,7 +99,8 @@ Status BarrierOp::blockCond() {
// Process the return value
if (!py::isinstance<py::bool_>(ret_py_obj)) {
return Status(StatusCode::kMDPyFuncException,
"Invalid parameter, condition wait function should return true/false.");
"Invalid parameter, condition wait function should return boolean, but got " +
std::string(ret_py_obj.get_type().str()));
}
} catch (const py::error_already_set &e) {
return Status(StatusCode::kMDPyFuncException, e.what());

View File

@ -134,7 +134,7 @@ Status BatchOp::operator()() {
if ((num_workers_ > 1 || batch_map_func_) && GetMemoryUsage() > MAX_MEMORY_USAGE_THRESHOLD) {
MS_LOG(WARNING) << "Memory consumption is more than " << (GetMemoryUsage() * 100) << "%, "
<< "which may cause oom error. Please reduce num_parallel_workers size / "
<< "optimize per_batch_map function / other python data preprocess function to "
<< "optimize 'per_batch_map' function / other python data preprocess function to "
<< "reduce memory usage.";
}
#endif
@ -203,8 +203,9 @@ Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, TensorRow *d
first_shape.Print(shape1);
old_tensor->shape().Print(shape2);
RETURN_STATUS_UNEXPECTED(
"Invalid data, batch operation expect same shape for each data row, but got inconsistent shape in column " +
std::to_string(i) + " expected shape for this column is:" + shape1.str() + ", got shape:" + shape2.str());
"Inconsistent batch shapes, batch operation expect same shape for each data row, "
"but got inconsistent shape in column " +
std::to_string(i) + ", expected shape for this column is:" + shape1.str() + ", got shape:" + shape2.str());
}
}
} else { // handle string column differently
@ -300,7 +301,7 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
CHECK_FAIL_RETURN_UNEXPECTED(num_rows == out_cols[i].size(),
"Invalid data, column: " + out_col_names_[i] +
" expects: " + std::to_string(num_rows) +
" rows returned from per_batch_map, got: " + std::to_string(out_cols[i].size()));
" rows returned from 'per_batch_map', got: " + std::to_string(out_cols[i].size()));
for (auto &t_row : *out_q_table) {
t_row[col_id] = out_cols[i][row_id++];
}
@ -339,14 +340,16 @@ Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
*batch_size = size.cast<int32_t>();
if (*batch_size <= 0) {
return Status(StatusCode::kMDPyFuncException,
"Invalid parameter, batch_size function should return an integer greater than 0, but got: " +
"Invalid batch_size function, 'batch_size' function should return an integer greater than 0, "
"but got: " +
std::to_string(*batch_size));
}
} catch (const py::error_already_set &e) {
return Status(StatusCode::kMDPyFuncException, e.what());
} catch (const py::cast_error &e) {
return Status(StatusCode::kMDPyFuncException,
"Invalid parameter, batch_size function should return an integer greater than 0.");
return Status(
StatusCode::kMDPyFuncException,
"Invalid batch_size function, the return value of batch_size function cast failed: " + std::string(e.what()));
}
}
return Status(StatusCode::kSuccess, "batch_size function call succeeded.");
@ -379,11 +382,13 @@ Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBat
// Parse batch map return value
py::tuple ret_tuple = py::cast<py::tuple>(ret_py_obj);
CHECK_FAIL_RETURN_UNEXPECTED(py::isinstance<py::tuple>(ret_tuple),
"per_batch_map function should return a tuple.");
"Invalid per_batch_map, 'per_batch_map' function should return a tuple, but got " +
std::string(ret_py_obj.get_type().str()));
CHECK_FAIL_RETURN_UNEXPECTED(ret_tuple.size() == out_col_names_.size(),
"Incorrect number of columns returned in per_batch_map function. Expects: " +
"Invalid per_batch_map, the number of columns returned in 'per_batch_map' function "
"should be " +
std::to_string(out_col_names_.size()) +
" got: " + std::to_string(ret_tuple.size()));
" , but got: " + std::to_string(ret_tuple.size()));
for (size_t i = 0; i < ret_tuple.size(); i++) {
TensorRow output_batch;
// If user returns a type that is neither a list nor an array, issue a error msg.
@ -405,7 +410,8 @@ Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBat
return Status(StatusCode::kMDPyFuncException, e.what());
} catch (const py::cast_error &e) {
return Status(StatusCode::kMDPyFuncException,
"Invalid parameter, per_batch_map function of batch should return a tuple of list of numpy array.");
"Invalid per_batch_map, the return value of 'per_batch_map' function cast to py::tuple failed: " +
std::string(e.what()));
}
}
return Status::OK();
@ -432,7 +438,7 @@ Status BatchOp::PadColumns(std::unique_ptr<TensorQTable> *table, const PadInfo &
if (pad_shapes[col_id].empty()) pad_shapes[col_id] = max_shapes[col_id]; // fill pad shape with -1
CHECK_FAIL_RETURN_UNEXPECTED(
pad_shapes[col_id].size() == max_shapes[col_id].size(),
"Invalid data, rank of pad_shape must be equal to rank of specified column. pad_shapes rank:" +
"Invalid pad_info, rank of pad_shape must be equal to rank of specified column. pad_shapes rank:" +
std::to_string(pad_shapes[col_id].size()) + ", column rank: " + std::to_string(max_shapes[col_id].size()));
}
@ -482,12 +488,14 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info,
for (const auto &p : pad_info) {
auto location = column_name_id_map.find(p.first);
CHECK_FAIL_RETURN_UNEXPECTED(location != column_name_id_map.end(),
"Invalid parameter, column name: " + p.first + " does not exist.");
"Invalid pad_info, column name: " + p.first + " does not exist.");
auto col_id = static_cast<dsize_t>(location->second);
CHECK_FAIL_RETURN_UNEXPECTED(
col_id < pad_vals->size() && col_id < pad_shapes->size(),
"Invalid parameter, column id must be less than the size of pad_val and pad_shape, but got: " +
std::to_string(col_id));
"Invalid pad_info, column name should be match with the size of pad value and pad shape, but got "
"column name: " +
p.first + ", the size of pad value: " + std::to_string(pad_vals->size()) +
" and the size of pad shape: " + std::to_string(pad_shapes->size()) + ".");
pad_cols->insert(col_id);
(*pad_vals)[col_id] = p.second.second; // set pad values
(*pad_shapes)[col_id] = p.second.first.AsVector(); // empty vector if shape is unknown
@ -498,8 +506,9 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info,
Status BatchOp::ComputeColMap() {
CHECK_FAIL_RETURN_UNEXPECTED(child_.size() == 1,
"Invalid data, batch operator can't be used as a single operator, "
"should be preceded by an operator that reads data, for example, ImageFolderDataset.");
"Invalid batch, batch operator can't be used as a single operator, "
"should be preceded by an operator that reads data, for example, "
"ds1 = ds.ImageFolderDataset().batch().");
CHECK_FAIL_RETURN_UNEXPECTED(!(child_[0]->column_name_id_map().empty()),
"Invalid data, the column of the previous operator of the batch cannot be empty.");
@ -514,7 +523,7 @@ Status BatchOp::ComputeColMap() {
// check all input columns exist
for (const auto &col : in_col_names_) {
CHECK_FAIL_RETURN_UNEXPECTED(child_map_.find(col) != child_map_.end(),
"Invalid parameter, col:" + col + " doesn't exist in dataset.");
"Invalid input_columns, '" + col + "' of 'input_columns' doesn't exist.");
}
// following logic deals with per_batch_map
@ -551,8 +560,21 @@ Status BatchOp::ComputeColMap() {
}
}
CHECK_FAIL_RETURN_UNEXPECTED(column_name_id_map_.size() == (child_map_no_in_col.size() + out_col_names_.size()),
"Key error in column_name_id_map_. output_columns in batch is not set correctly!");
if (column_name_id_map_.size() != (child_map_no_in_col.size() + out_col_names_.size())) {
const std::string prefix_str = std::string("[");
auto column_no_in_col = std::accumulate(
child_map_no_in_col.begin(), child_map_no_in_col.end(), prefix_str,
[](const std::string &str, const std::pair<std::string, int32_t> &p) { return str + p.first + ","; });
column_no_in_col += "]";
auto column_out =
std::accumulate(out_col_names_.begin(), out_col_names_.end(), prefix_str,
[](const std::string &str, const std::string &out_col) { return str + out_col + ","; });
column_out += "]";
RETURN_STATUS_UNEXPECTED(
"Invalid output_columns, columns that are not involved in 'per_batch_map' should not be "
"in output_columns, but got columns that are not in input_columns: " +
column_no_in_col + ", output_columns: " + column_out + ".");
}
return Status::OK();
}

View File

@ -108,7 +108,7 @@ Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, T
for (size_t i = 0; i < number_of_arguments; i++) {
auto map_item = column_name_id_map_.find(length_dependent_columns_[i]);
if (map_item == column_name_id_map_.end()) {
RETURN_STATUS_UNEXPECTED("BucketBatchByLength: Couldn't find the specified column(" +
RETURN_STATUS_UNEXPECTED("Invalid column, BucketBatchByLength couldn't find the specified column(" +
length_dependent_columns_[i] + ") in the dataset.");
}
int32_t column_index = map_item->second;
@ -118,7 +118,8 @@ Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, T
RETURN_IF_NOT_OK(output.at(0)->GetItemAt(out_element_length, {0}));
if (*out_element_length < 0) {
RETURN_STATUS_UNEXPECTED(
"Invalid parameter, element_length_function must return an integer greater than or equal to 0, but got" +
"Invalid element_length_function, element_length_function must return an integer greater than or equal to 0, "
"but got" +
std::to_string(*out_element_length));
}
} else {
@ -139,7 +140,8 @@ Status BucketBatchByLengthOp::PadAndBatchBucket(int32_t bucket_index, int32_t ba
if (pad_shape[i] == TensorShape::kDimUnknown) {
if (bucket_index + 1 >= bucket_boundaries_.size()) {
std::string error_message =
"Invalid data, requested to pad to bucket boundary, element falls in last bucket.";
"Invalid data, requested to pad to bucket boundary failed, bucket index should be less than " +
std::to_string(bucket_boundaries_.size()) + ", but got " + std::to_string(bucket_index);
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, error_message);
}

View File

@ -41,7 +41,8 @@ BuildSentencePieceVocabOp::BuildSentencePieceVocabOp(std::shared_ptr<SentencePie
Status BuildSentencePieceVocabOp::operator()() {
if (tree_ == nullptr) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set.");
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"[Internal ERROR] Pipeline init failed, Execution tree not set.");
}
RETURN_IF_NOT_OK(sentence_queue_->Register(tree_->AllTasks()));
RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask(
@ -58,7 +59,7 @@ Status BuildSentencePieceVocabOp::operator()() {
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
}
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "no operator should be after from_dataset (repeat detected)");
CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "'build_sentencepiece_vocab' does not support 'repeat'.)");
eoe_warning = true;
}
// add empty tensorRow for quit
@ -71,13 +72,13 @@ Status BuildSentencePieceVocabOp::SentenceThread() {
TaskManager::FindMe()->Post();
if (col_names_.empty() == true) {
auto itr = column_name_id_map_.find("text");
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(),
"Invalid data, 'text' column does not exist in dataset.");
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid data, 'text' column does not exist.");
col_id_ = itr->second;
} else {
auto itr = column_name_id_map_.find(col_names_[0]);
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(),
"Invalid parameter, column name: " + col_names_[0] + " does not exist in dataset.");
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid column, column name: " + col_names_[0] +
" does not exist, check existed "
"column with dataset API 'get_col_names'");
col_id_ = itr->second;
}
std::unique_ptr<DatasetSentenceIterator> sentence_iter = std::make_unique<DatasetSentenceIterator>(this);
@ -89,7 +90,7 @@ Status BuildSentencePieceVocabOp::SentenceThread() {
} else {
if (vocab_ == nullptr) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"Invalid parameter, SentencePiece vocab not set.");
"[Internal ERROR] SentencePiece vocab should not be null.");
}
vocab_->set_model_proto(model_proto);
}
@ -131,7 +132,7 @@ bool BuildSentencePieceVocabOp::Done() { return read_done_; }
void BuildSentencePieceVocabOp::Next(std::string *sentence) {
if (sentence == nullptr) {
MS_LOG(ERROR) << "BuildSentencePieceVocab get nullptr element, please check data.";
MS_LOG(ERROR) << "[Internal ERROR] BuildSentencePieceVocab get nullptr element, please check data.";
return;
}
TensorRow new_row;
@ -151,8 +152,8 @@ void BuildSentencePieceVocabOp::Next(std::string *sentence) {
if (new_row[col_id_]->type().IsNumeric() || new_row[col_id_]->Rank() > 1) {
ret_status_ =
Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"Invalid data, build_sentence_piece_vocab only works on string data with rank equal to 1, got type: " +
new_row[col_id_]->type().ToString() + "and rank: " + std::to_string(new_row[col_id_]->Rank()));
"Invalid data, build_sentence_piece_vocab only supports string data with rank equal to 1, but got type: " +
new_row[col_id_]->type().ToString() + ", rank: " + std::to_string(new_row[col_id_]->Rank()));
read_done_ = true;
return;
}

View File

@ -69,7 +69,9 @@ class BuildSentencePieceVocabOp : public PipelineOp {
Status operator()() override;
Status Reset() override { RETURN_STATUS_UNEXPECTED("Reset shouldn't be called in BuildSentencePieceVocabOp"); }
Status Reset() override {
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Reset shouldn't be called in BuildSentencePieceVocabOp.");
}
std::string Name() const override { return kBuildSentencePieceVocabOp; }

View File

@ -54,7 +54,8 @@ Status BuildVocabOp::WorkerEntry(int32_t worker_id) {
while (!new_row.empty()) {
for (int32_t col : col_ids_) {
CHECK_FAIL_RETURN_UNEXPECTED(!new_row[col]->type().IsNumeric(),
"Invalid data, build_vocab only works on string data, but got numeric data type: " +
"Invalid datatype, 'build_vocab' only supports string type of input, but got "
"numeric type: " +
new_row[col]->type().ToString());
for (auto itr = new_row[col]->begin<std::string_view>(); itr != new_row[col]->end<std::string_view>(); ++itr) {
(*wrkr_map)[std::string(*itr)] += 1;
@ -79,7 +80,8 @@ Status BuildVocabOp::WorkerEntry(int32_t worker_id) {
Status BuildVocabOp::operator()() {
// launch the collector thread
if (tree_ == nullptr) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set.");
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"[Internal ERROR] Pipeline init failed, Execution tree not set.");
}
RETURN_IF_NOT_OK(distributor_queue_->Register(tree_->AllTasks()));
RETURN_IF_NOT_OK(collector_queue_->Register(tree_->AllTasks()));
@ -96,8 +98,9 @@ Status BuildVocabOp::operator()() {
col_ids_.reserve(col_names_.size());
for (std::string col : col_names_) {
auto itr = column_name_id_map_.find(col);
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(),
"Invalid parameter, column name: " + col + " does not exist in dataset.");
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid column name, column name: " + col +
" does not exist, check existed column "
"with dataset API 'get_col_names'");
col_ids_.push_back(itr->second);
}
} else {
@ -113,7 +116,8 @@ Status BuildVocabOp::operator()() {
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
}
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "no operator should be after from_dataset (repeat detected)");
CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning,
"Invalid repeat operator, BuildVocab does not support 'repeat' operator.");
eoe_warning = true;
}
@ -137,7 +141,8 @@ Status BuildVocabOp::CollectorThread() {
++num_quited_worker;
}
} // all frequencies are obtained
CHECK_FAIL_RETURN_UNEXPECTED(!word_cnt_.empty(), "Invalid data, there are no words in the dataset.");
CHECK_FAIL_RETURN_UNEXPECTED(!word_cnt_.empty(),
"Invalid data, BuildVocab load data failed that no words found in vocab, check vocab.");
std::vector<std::string> words;
// make sure enough is reserved, this will become a partially sorted list eventually
words.reserve(wrkr_map->size());
@ -158,7 +163,7 @@ Status BuildVocabOp::CollectorThread() {
}
CHECK_FAIL_RETURN_UNEXPECTED(err_msg.empty(),
"Invalid data, these special words are already in the dataset: " + err_msg + ".");
"Invalid special words, these special words are already in the vocab: " + err_msg + ".");
int64_t num_words = std::min(static_cast<int64_t>(words.size()), top_k_);
if (num_words == 0) {

View File

@ -66,7 +66,7 @@ class BuildVocabOp : public ParallelOp<TensorRow, TensorRow> {
Status operator()() override;
Status Reset() override { RETURN_STATUS_UNEXPECTED("Reset shouldn't be called in BuildVocabOp"); }
Status Reset() override { RETURN_STATUS_UNEXPECTED("[Internal ERROR] Reset shouldn't be called in BuildVocabOp"); }
private:
const int32_t interval_;

View File

@ -191,7 +191,7 @@ Status CacheBase::FetchFromCache(int32_t worker_id) {
if (AllowCacheMiss()) {
++num_cache_miss_;
} else {
std::string errMsg = "Row id " + std::to_string(row_id) + " not found.";
std::string errMsg = "[Internal ERROR] Row id " + std::to_string(row_id) + " not found.";
RETURN_STATUS_UNEXPECTED(errMsg);
}
}
@ -225,7 +225,8 @@ Status CacheBase::UpdateColumnMapFromCache() {
Status CacheBase::GetPrefetchRow(row_id_type row_id, TensorRow *out) {
RETURN_UNEXPECTED_IF_NULL(out);
CHECK_FAIL_RETURN_UNEXPECTED(row_id >= 0, "Expect positive row id, but got:" + std::to_string(row_id));
CHECK_FAIL_RETURN_UNEXPECTED(row_id >= 0,
"[Internal ERROR] Expect positive row id, but got:" + std::to_string(row_id));
RETURN_IF_NOT_OK(prefetch_.PopFront(row_id, out));
return Status::OK();
}
@ -278,7 +279,7 @@ Status CacheBase::Prefetcher(int32_t worker_id) {
cache_miss.clear();
std::unique_ptr<IOBlock> blk;
RETURN_IF_NOT_OK(prefetch_queues_[worker_id]->PopFront(&blk));
CHECK_FAIL_RETURN_UNEXPECTED(!blk->eof(), "Expect eoe or a regular io block.");
CHECK_FAIL_RETURN_UNEXPECTED(!blk->eof(), "[Internal ERROR] Expect eoe or a regular io block.");
if (!blk->eoe()) {
RETURN_IF_NOT_OK(blk->GetKeys(&prefetch_keys));
Status rc;

View File

@ -29,7 +29,7 @@ namespace dataset {
Status CacheLookupOp::operator()() {
if (!sampler_) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"Invalid parameter, CacheLookupOp requires a sampler before it can be executed, but got nullptr.");
"Invalid sampler, Cache requires a sampler before it can be executed, but got nullptr.");
}
RETURN_IF_NOT_OK(RegisterResources());

View File

@ -147,7 +147,8 @@ Status CacheMergeOp::CacheMissWorkerEntry(int32_t workerId) {
} else {
row_id_type row_id = new_row.getId();
if (row_id < 0) {
std::string errMsg = "Expect positive row id, but got: " + std::to_string(row_id);
std::string errMsg =
"[Internal ERROR] row id should be greater than or equal to 0, but got: " + std::to_string(row_id);
RETURN_STATUS_UNEXPECTED(errMsg);
}
if (cache_missing_rows_) {
@ -213,7 +214,8 @@ Status CacheMergeOp::PrepareOperator() { // Run any common code from super clas
// specific logic
CHECK_FAIL_RETURN_UNEXPECTED(
child_.size() == kNumChildren,
"Incorrect number of children of CacheMergeOp, required num is 2, but got:" + std::to_string(child_.size()));
"[Internal ERROR] Incorrect number of children of CacheMergeOp, required num is 2, but got:" +
std::to_string(child_.size()));
RETURN_IF_NOT_OK(DatasetOp::PrepareOperator());
// Get the computed check sum from all ops in the cache miss class
uint32_t cache_crc = DatasetOp::GenerateCRC(child_[kCacheMissChildIdx]);
@ -231,7 +233,7 @@ Status CacheMergeOp::PrepareOperator() { // Run any common code from super clas
}
Status CacheMergeOp::ComputeColMap() {
CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "Invalid data, cache miss stream is empty.");
CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "[Internal ERROR] cache miss stream is empty.");
if (column_name_id_map().empty()) {
column_name_id_map_ = child_[kCacheMissChildIdx]->column_name_id_map();
}
@ -270,7 +272,7 @@ Status CacheMergeOp::GetRq(row_id_type row_id, CacheMergeOp::TensorRowCacheReque
RETURN_IF_NOT_OK(mem.allocate(1));
*out = mem.GetMutablePointer();
} else {
RETURN_STATUS_UNEXPECTED("Invalid data, map insert fail.");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] map insert fail.");
}
}
return Status::OK();

View File

@ -43,7 +43,7 @@ Status CacheOp::operator()() {
RETURN_UNEXPECTED_IF_NULL(tree_);
if (!sampler_) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"Invalid parameter, CacheOp requires a sampler before it can be executed, but got nullptr.");
"Invalid sampler, CacheOp requires a sampler before it can be executed, but got nullptr.");
}
RETURN_IF_NOT_OK(RegisterResources());
@ -145,9 +145,9 @@ Status CacheOp::WaitForCachingAllRows() {
BuildPhaseDone = true;
break;
case CacheServiceState::kOutOfMemory:
return Status(StatusCode::kMDOutOfMemory, "Cache server is running out of memory");
return Status(StatusCode::kMDOutOfMemory, "Cache server is running out of memory, check memory usage.");
case CacheServiceState::kNoSpace:
return Status(StatusCode::kMDNoSpace, "Cache server is running of out spill storage");
return Status(StatusCode::kMDNoSpace, "Cache server is running of out spill storage, check memory usage.");
case CacheServiceState::kNone:
case CacheServiceState::kError:
default:

View File

@ -74,9 +74,17 @@ Status ConcatOp::Verify(int32_t id, const TensorRow &new_row) {
// Compare the data type and data rank with these in child[0]
int32_t index = 0;
for (auto item : new_row) {
if ((item->type() != data_type_[index]) || item->Rank() != data_rank_[index++]) {
RETURN_STATUS_UNEXPECTED("Invalid data, data type or data rank is not the same with previous dataset.");
if (item->type() != data_type_[index]) {
RETURN_STATUS_UNEXPECTED(
"Invalid datatype, the data type of two datasets concated should be the same, but got " +
item->type().ToString() + " and " + data_type_[index].ToString() + ".");
}
if (item->Rank() != data_rank_[index]) {
RETURN_STATUS_UNEXPECTED(
"Invalid datatype, the data rank of two datasets concated should be the same, but got " +
std::to_string(item->Rank()) + " and " + std::to_string(data_rank_[index]) + ".");
}
index++;
}
}
verified_ = true;
@ -89,12 +97,13 @@ Status ConcatOp::ComputeColMap() {
// Obtain columns_name_id_map from child_[0]
column_name_id_map_ = child_[0]->column_name_id_map();
if (column_name_id_map_.empty()) {
RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Child column name map cannot be empty!");
}
// Verify all children have the same column name map
for (size_t i = 0; i < child_.size(); ++i) {
if (child_[i]->column_name_id_map() != column_name_id_map_) {
RETURN_STATUS_UNEXPECTED("Invalid data, column name or column order is not the same with previous dataset.");
RETURN_STATUS_UNEXPECTED(
"Invalid columns, 'column name' or 'column order' of concat datasets should be the same.");
}
}
} else {
@ -118,7 +127,7 @@ Status ConcatOp::GetNumClasses(int64_t *num_classes) {
*num_classes = max_num_classes;
return Status::OK();
}
Status ConcatOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); }
Status ConcatOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ConcatOp is an inlined operator."); }
bool ConcatOp::IgnoreSample() {
bool is_not_mappable_or_second_ne_zero = true;
@ -184,10 +193,10 @@ Status ConcatOp::GetNextRow(TensorRow *row) {
return Status::OK();
}
if (row->eof()) {
CHECK_FAIL_RETURN_UNEXPECTED(cur_child_ == 0, "Received an unexpected EOF.");
CHECK_FAIL_RETURN_UNEXPECTED(cur_child_ == 0, "[Internal ERROR] Received an unexpected EOF.");
for (int32_t i = cur_child_ + 1; i < child_.size(); i++) {
RETURN_IF_NOT_OK(child_[i]->GetNextRow(row));
CHECK_FAIL_RETURN_UNEXPECTED(row->eof(), "Row must be an EOF.");
CHECK_FAIL_RETURN_UNEXPECTED(row->eof(), "[Internal ERROR] Row must be an EOF.");
}
return Status::OK();
}

View File

@ -63,7 +63,7 @@ Status DatasetOp::AddChild(std::shared_ptr<DatasetOp> child) {
}
if (operator_id_ == kInvalidOperatorId) {
std::string err_msg(
"Cannot add child node. Tree node connections can only "
"[Internal ERROR] Cannot add child node. Tree node connections can only "
"be made if the node belongs to a tree.");
RETURN_STATUS_UNEXPECTED(err_msg);
}
@ -71,7 +71,7 @@ Status DatasetOp::AddChild(std::shared_ptr<DatasetOp> child) {
// disallow relationships with other trees
if (tree_ != child->tree_) {
std::string err_msg(
"Cannot add child node. Tree node connections can only be made if both nodes belong to the same tree.");
"Invalid operator structure, the relationship of operators should be one by one, but got too many branches.");
RETURN_STATUS_UNEXPECTED(err_msg);
}
child_.push_back(child);
@ -82,7 +82,7 @@ Status DatasetOp::AddChild(std::shared_ptr<DatasetOp> child) {
Status DatasetOp::RemoveChild(std::shared_ptr<DatasetOp> child) {
if (operator_id_ == kInvalidOperatorId) {
std::string err_msg(
"Cannot remove child node. Tree node connections can only "
"[Internal ERROR] Cannot remove child node. Tree node connections can only "
"be made if the node belongs to a tree.");
RETURN_STATUS_UNEXPECTED(err_msg);
}
@ -90,7 +90,7 @@ Status DatasetOp::RemoveChild(std::shared_ptr<DatasetOp> child) {
// disallow relationships with other trees
if (tree_ != child->tree_) {
std::string err_msg(
"Cannot remove child node. Tree node connections can only be made if both nodes belong to the same tree.");
"Invalid operator structure, the relationship of operators should be one by one, but got too many branches.");
RETURN_STATUS_UNEXPECTED(err_msg);
}
@ -132,11 +132,15 @@ void DatasetOp::RemoveParent(const DatasetOp *parent) {
// Removes this node from the tree and connects it's parent/child together
Status DatasetOp::Remove() {
if (parent_.size() > 1) {
std::string err_msg("[Internal ERROR], no support for the relationship between operators is not one-to-one.");
std::string err_msg(
"Invalid operator structure, the relationship between operators should be one-to-one, but encountered more than "
"one parent, namely: " +
std::to_string(parent_.size()));
RETURN_STATUS_UNEXPECTED(err_msg);
}
if (child_.size() > 1) {
std::string err_msg("[Internal ERROR], no support for the relationship between operators is not one-to-one.");
std::string err_msg(
"Invalid operator structure, the relationship of operators should be one by one, but got too many branches.");
RETURN_STATUS_UNEXPECTED(err_msg);
}
@ -151,7 +155,8 @@ Status DatasetOp::Remove() {
// If we have a parent, then assign child's parent to point to our parent.
if (!parent_.empty()) {
CHECK_FAIL_RETURN_UNEXPECTED(parent_[0]->Children().size() == 1,
"Removing a node whose parent has more than 1 child is not supported.");
"Invalid operator structure, the relationship of operators should be one by one, "
"but got too many branches.");
child_[0]->parent_[0] = parent_[0];
} else {
// We don't have a parent, so we are the root node being removed.
@ -293,7 +298,8 @@ Status DatasetOp::GetClassIndexing(std::vector<std::pair<std::string, std::vecto
return child_[child_.size() - 1]->GetClassIndexing(output_class_indexing);
} else {
*output_class_indexing = {};
RETURN_STATUS_UNEXPECTED("Trying to get class index from leaf node, missing override.");
RETURN_STATUS_UNEXPECTED("Unsupported scenario, GetClassIndexing failed for " + Name() +
" doesn't support GetClassIndexing yet.");
}
}
@ -343,12 +349,14 @@ std::string DatasetOp::ColumnNameMapAsString() const {
// Operations changing the column map must overwrite this function.
Status DatasetOp::ComputeColMap() {
if (child_.size() > 1) {
RETURN_STATUS_UNEXPECTED("[Internal ERROR], no support for the relationship between operators is not one-to-one.");
RETURN_STATUS_UNEXPECTED(
"Invalid operator structure, the relationship of operators should be one by one, but got too many branches.");
}
if (column_name_id_map_.empty()) {
column_name_id_map_ = child_[0]->column_name_id_map();
if (column_name_id_map_.empty()) {
RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!");
RETURN_STATUS_UNEXPECTED("Invalid column list, the column list of " + child_[0]->Name() +
" should have one column at least, but got empty.");
}
MS_LOG(DEBUG) << "Setting column map:\n" << DatasetOp::ColumnNameMapAsString();
} else {

View File

@ -119,8 +119,8 @@ Status DeviceQueueOp::FilterMetadata(TensorRow *row) {
Status DeviceQueueOp::CheckExceptions(const TensorRow &row) const {
// this method checks if the row meets the conditions to be sent to TDT
for (const auto &item : row) {
CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Invalid data, cannot send string tensor to device.");
CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Invalid data, cannot send tensor with no data to device.");
CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Invalid datatype, cannot send string data to device.");
CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Invalid data, the data send to device is null.");
}
return Status::OK();
}
@ -152,7 +152,8 @@ Status DeviceQueueOp::operator()() {
}
}
if (tdtInstancePtr->acl_handle_ == nullptr) {
RETURN_STATUS_UNEXPECTED("Create channel for sending data failed, please check DEVICE ID setting.");
RETURN_STATUS_UNEXPECTED(
"[Internal ERROR] Create channel for sending data failed, please check DEVICE ID setting.");
}
RETURN_IF_NOT_OK(SendDataToAscend());
#endif
@ -343,7 +344,8 @@ Status DeviceQueueOp::SendRowToTdt(TensorRow curr_row, bool is_profiling_enable,
#ifdef ENABLE_TDTQUE
Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) {
if (!create_data_info_queue_) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "DataInfo queue is not created.");
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"[Internal ERROR] DataInfo queue is not created.");
}
// This place has a race condition with operator(), so the first one
// arrive here will do the initialize work.
@ -359,7 +361,7 @@ Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) {
}
#else
Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "GetDataInfo is not supported yet.");
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "'GetDataInfo' only supported on Ascend.");
}
#endif
@ -446,7 +448,7 @@ Status DeviceQueueOp::PushDataToGPU() {
if (!ps::PsDataPrefetch::GetInstance().PrefetchData(channel_name_, items[0].data_ptr_, items[0].data_len_,
items[0].data_type_)) {
return Status(StatusCode::kMDTimeOut, __LINE__, __FILE__,
"Failed to prefetch data in current PS mode(cache data when sending).");
"[Internal ERROR] Failed to prefetch data in current PS mode(cache data when sending).");
}
RETURN_IF_NOT_OK(RetryPushData(handle, items));
#ifndef ENABLE_SECURITY
@ -623,18 +625,19 @@ Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items,
for (auto &sub_item : *items) {
auto rc = pool_[worker_id]->Allocate(sub_item.data_len_, &sub_item.data_ptr_);
if (rc.IsError() || sub_item.data_ptr_ == nullptr) {
return Status(StatusCode::kMDOutOfMemory, __LINE__, __FILE__, "Memory malloc failed.");
return Status(StatusCode::kMDOutOfMemory, __LINE__, __FILE__, "Memory malloc failed, check memory usage.");
}
if (curr_row[i] == nullptr) {
MS_LOG(ERROR) << "The pointer curr_row[" << i << "] is null";
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "TensorRow 'curr_row' contains nullptr.");
MS_LOG(ERROR) << "[Internal ERROR] The pointer curr_row[" << i << "] is null";
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"[Internal ERROR] TensorRow 'curr_row' contains nullptr.");
}
sub_item.data_type_ = curr_row[i]->type().ToString();
const unsigned char *column_data = curr_row[i]->GetBuffer();
if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data,
static_cast<uint32_t>(curr_row[i++]->SizeInBytes())) != 0) {
MS_LOG(ERROR) << "memcpy_s failed!";
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "memcpy failed when using memcpy_s do copy.");
MS_LOG(ERROR) << "[Internal ERROR] memcpy_s failed.";
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "[Internal ERROR] memcpy_s failed.");
}
}

View File

@ -43,7 +43,7 @@ void EpochCtrlOp::Print(std::ostream &out, bool show_all) const {
Status EpochCtrlOp::GetNextRow(TensorRow *row) {
RETURN_UNEXPECTED_IF_NULL(row);
if (child_.empty()) {
RETURN_STATUS_UNEXPECTED("EpochCtrlOp can't be the leaf node(first operator) of pipeline.");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] EpochCtrlOp can't be the leaf node(first operator) of pipeline.");
}
// `retry_if_eoe` is false because EpochCtrlOp does not eat EOE.

View File

@ -143,7 +143,7 @@ Status FilterOp::WorkerCompute(const TensorRow &in_row, bool *out_predicate) {
Status FilterOp::CheckInput(const TensorRow &input) const {
for (auto &item : input) {
if (item == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid data, input tensor is null.");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] input tensor is null.");
}
}
return Status::OK();

View File

@ -192,7 +192,7 @@ Status MapOp::WorkerEntry(int32_t worker_id) {
}
RETURN_IF_NOT_OK(worker_out_queues_[worker_id]->EmplaceBack(std::move(in_row)));
} else {
CHECK_FAIL_RETURN_UNEXPECTED(in_row.size() != 0, "MapOp got an empty TensorRow.");
CHECK_FAIL_RETURN_UNEXPECTED(in_row.size() != 0, "[Internal ERROR] MapOp got an empty TensorRow.");
TensorRow out_row;
// Perform the compute function of TensorOp(s) and store the result in new_tensor_table.
RETURN_IF_NOT_OK(WorkerCompute(in_row, &out_row, job_list));
@ -244,7 +244,11 @@ Status MapOp::WorkerCompute(const TensorRow &in_row, TensorRow *out_row,
// Sanity check a row in result_table
if (!result_table.empty() && out_columns_.size() != result_table[0].size()) {
RETURN_STATUS_UNEXPECTED("Result of a tensorOp doesn't match output column names");
RETURN_STATUS_UNEXPECTED(
"Invalid columns, the number of columns returned in 'map' operations should match "
"the number of 'output_columns', but got the number of columns returned in 'map' operations: " +
std::to_string(result_table[0].size()) +
", the number of 'output_columns': " + std::to_string(out_columns_.size()) + ".");
}
// Merging the data processed by job (result_table) with the data that are not used.
@ -299,7 +303,8 @@ Status MapOp::InitPrivateVariable(std::unordered_map<std::string, int32_t> *col_
if (in_columns_.empty()) {
auto itr =
std::find_if(col_name_id_map->begin(), col_name_id_map->end(), [](const auto &it) { return it.second == 0; });
CHECK_FAIL_RETURN_UNEXPECTED(itr != col_name_id_map->end(), "Column name id map doesn't have id 0");
CHECK_FAIL_RETURN_UNEXPECTED(itr != col_name_id_map->end(),
"[Internal ERROR] Column name id map doesn't have id 0");
MS_LOG(INFO) << "Input columns empty for map op, will apply to the first column in the current table.";
in_columns_.push_back(itr->first);

View File

@ -74,7 +74,7 @@ TensorRow ProjectOp::Project(const TensorRow &row) {
// However, the ProjectOp is defined as a inlined operator, so it is invalid to launch the
// functor since this op runs inlined inside another operator. The function is overloaded to
// ensure that it is not called by mistake (it will generate an error).
Status ProjectOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. ProjectOp is an inlined operator."); }
Status ProjectOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ProjectOp is an inlined operator."); }
Status ProjectOp::EoeReceived(int32_t worker_id) {
state_ = OpState::kDeOpIdle;
@ -92,7 +92,7 @@ Status ProjectOp::ComputeColMap() {
for (size_t i = 0; i < columns_to_project_.size(); i++) {
std::string &current_column = columns_to_project_[i];
if (child_column_name_mapping.find(current_column) == child_column_name_mapping.end()) {
std::string err_msg = "Invalid parameter, column name: " + current_column + " does not exist in dataset.";
std::string err_msg = "Invalid column, column name: " + current_column + " does not exist.";
RETURN_STATUS_UNEXPECTED(err_msg);
}
// Setup the new column name mapping for ourself (base class field)

View File

@ -41,7 +41,7 @@ Status RenameOp::GetNextRow(TensorRow *row) {
return Status::OK();
}
Status RenameOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. RenameOp is an inlined operator."); }
Status RenameOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] RenameOp is an inlined operator."); }
// Rename core functionality to compute the new column name id map.
// We need to overwrite the super class ComputeColMap here because we're making a modification of the
@ -71,7 +71,7 @@ Status RenameOp::ComputeColMap() {
MS_LOG(DEBUG) << "Rename operator index found " << index << " value " << id << ".";
if (new_col_name.find(out_columns_[index]) != new_col_name.end()) {
std::string err_msg(
"Invalid parameter, rename operation does not support rename one column name into another already exist "
"Invalid column, rename operation does not support rename one column name into another already exist "
"column name, existing column name is: " +
out_columns_[index] + ".");
RETURN_STATUS_UNEXPECTED(err_msg);
@ -82,7 +82,7 @@ Status RenameOp::ComputeColMap() {
// not found
if (new_col_name.find(name) != new_col_name.end()) {
std::string err_msg(
"Invalid parameter, rename operation does not support rename one column name into another already exist "
"Invalid column, rename operation does not support rename one column name into another already exist "
"column name, existing column name is: " +
name + ".");
RETURN_STATUS_UNEXPECTED(err_msg);
@ -95,7 +95,7 @@ Status RenameOp::ComputeColMap() {
// only checks number of renamed columns have been found, this input check doesn't check everything
if (found != in_columns_.size()) {
MS_LOG(DEBUG) << "Rename operator column names found: " << found << " out of " << in_columns_.size() << ".";
std::string err_msg = "Invalid parameter, column to be renamed does not exist in dataset.";
std::string err_msg = "Invalid column, column to be renamed does not exist.";
RETURN_STATUS_UNEXPECTED(err_msg);
}

View File

@ -60,7 +60,7 @@ void RepeatOp::Print(std::ostream &out, bool show_all) const {
Status RepeatOp::GetNextRow(TensorRow *row) {
RETURN_UNEXPECTED_IF_NULL(row);
if (child_.empty()) {
RETURN_STATUS_UNEXPECTED("Pipeline init failed, RepeatOp can't be the first op in pipeline.");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Pipeline init failed, RepeatOp can't be the first op in pipeline.");
}
RETURN_IF_NOT_OK(child_[0]->GetNextRow(row));
@ -108,7 +108,7 @@ Status RepeatOp::EoeReceived(int32_t worker_id) {
// However, the RepeatOp is defined as a inlined operator, so it is invalid to launch the
// functor since this op runs inlined inside another operator. The function is overloaded to
// ensure that it is not called by mistake (it will generate an error).
Status RepeatOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. RepeatOp is an inlined operator."); }
Status RepeatOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] RepeatOp is an inlined operator."); }
// Base-class override for handling cases when an eof is received.
Status RepeatOp::EofReceived(int32_t worker_id) {

View File

@ -205,7 +205,8 @@ Status ShuffleOp::InitShuffleBuffer() {
// rows.
if (shuffle_buffer_state_ != kShuffleStateInit) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"Invalid shuffle buffer state, shuffle buffer should be init first or reset after each epoch.");
"[Internal ERROR] Invalid shuffle buffer state, shuffle buffer should be init first or reset "
"after each epoch.");
}
// Before we drop into the fetching loop, call the fetch once for the first time
@ -220,7 +221,7 @@ Status ShuffleOp::InitShuffleBuffer() {
}
if (new_row.empty()) {
RETURN_STATUS_UNEXPECTED("Invalid data, unable to fetch a single row for shuffle buffer.");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Unable to fetch a single row for shuffle buffer.");
}
// Now fill the rest of the shuffle buffer until we are unable to get the next row or we reached

View File

@ -43,7 +43,7 @@ void SkipOp::Print(std::ostream &out, bool show_all) const {
}
}
Status SkipOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); }
Status SkipOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] SkipOp is an inlined operator."); }
Status SkipOp::GetNextRow(TensorRow *row) {
RETURN_UNEXPECTED_IF_NULL(row);

View File

@ -64,7 +64,7 @@ Status AlbumOp::PrepareData() {
dirname_offset_ = folder_path_.length();
std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder);
if (!folder.Exists() || dirItr == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_ + ".");
RETURN_STATUS_UNEXPECTED("Invalid folder, " + folder_path_ + " does not exist or permission denied.");
}
MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << ".";
@ -94,7 +94,7 @@ Status AlbumOp::PrepareData() {
// This function does not return status because we want to just skip bad input, not crash
bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
if (valid == nullptr) {
MS_LOG(ERROR) << "Album parameter can't be nullptr.";
MS_LOG(ERROR) << "[Internal ERROR] Album parameter can't be nullptr.";
return false;
}
std::ifstream file_handle;
@ -214,8 +214,8 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
} else {
RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither int32 nor int64, it is " +
data_schema_->Column(col_num).Type().ToString());
RETURN_STATUS_UNEXPECTED("Invalid column type, column type of " + data_schema_->Column(col_num).Name() +
" should be int32 or int64, but got " + data_schema_->Column(col_num).Type().ToString());
}
row->push_back(std::move(label));
return Status::OK();
@ -243,7 +243,8 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array));
} else {
RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither float32 nor float64, it is " +
RETURN_STATUS_UNEXPECTED("Invalid column type, column type of " + data_schema_->Column(col_num).Name() +
" should be float32 nor float64, but got " +
data_schema_->Column(col_num).Type().ToString());
}
row->push_back(std::move(float_array));
@ -323,7 +324,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
std::ifstream file_handle(folder_path_ + file);
if (!file_handle.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file);
RETURN_STATUS_UNEXPECTED("Invalid json file, " + folder_path_ + file + " does not exist or permission denied.");
}
std::string line;
while (getline(file_handle, line)) {
@ -342,7 +343,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
}
} catch (const std::exception &err) {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file);
RETURN_STATUS_UNEXPECTED("Invalid file, " + folder_path_ + file + " load failed: " + std::string(err.what()));
}
}
file_handle.close();

View File

@ -60,16 +60,16 @@ Status CelebAOp::ParseAttrFile() {
auto realpath = FileUtils::GetRealPath((folder_path / "list_attr_celeba.txt").ToString().data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << (folder_path / "list_attr_celeba.txt").ToString();
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" +
(folder_path / "list_attr_celeba.txt").ToString());
MS_LOG(ERROR) << "Invalid file path, " << (folder_path / "list_attr_celeba.txt").ToString() << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, " + (folder_path / "list_attr_celeba.txt").ToString() +
" does not exist.");
}
std::ifstream attr_file(realpath.value());
if (!attr_file.is_open()) {
std::string attr_file_name = (folder_path / "list_attr_celeba.txt").ToString();
return Status(StatusCode::kMDFileNotExist, __LINE__, __FILE__,
"Invalid file, failed to open Celeba attr file: " + attr_file_name);
"Invalid attr file, failed to open: " + attr_file_name + ", permission denied.");
}
attr_file_ = (folder_path / "list_attr_celeba.txt").ToString();
@ -89,12 +89,11 @@ Status CelebAOp::ParseAttrFile() {
try {
num_rows_in_attr_file_ = static_cast<int64_t>(std::stoul(rows_num)); // First line is rows number in attr file
} catch (std::invalid_argument &e) {
RETURN_STATUS_UNEXPECTED(
"Invalid data, failed to convert rows_num from attr_file to unsigned long, invalid value: " + rows_num + ".");
RETURN_STATUS_UNEXPECTED("Invalid rows_num, failed to convert rows_num: " + rows_num + " to unsigned long in " +
attr_file_ + ".");
} catch (std::out_of_range &e) {
RETURN_STATUS_UNEXPECTED(
"Invalid data, failed to convert rows_num from attr_file to unsigned long, value out of range: " + rows_num +
".");
RETURN_STATUS_UNEXPECTED("Invalid rows_num, rows_num in " + attr_file_ + " is out of range, rows_num is " +
rows_num + ".");
}
(void)getline(attr_file, attr_name); // Second line is attribute name,ignore it
@ -125,8 +124,8 @@ bool CelebAOp::CheckDatasetTypeValid() {
Path folder_path(folder_path_);
partition_file_.open((folder_path / "list_eval_partition.txt").ToString());
if (!partition_file_.is_open()) {
MS_LOG(ERROR) << "Invalid file, fail to open CelebA partition file, path="
<< (folder_path / "list_eval_partition.txt").ToString();
MS_LOG(ERROR) << "Invalid eval partition file, failed to open eval partition file: "
<< (folder_path / "list_eval_partition.txt").ToString() << " does not exist or permission denied.";
return false;
}
}
@ -140,10 +139,12 @@ bool CelebAOp::CheckDatasetTypeValid() {
try {
type = std::stoi(vec[1]);
} catch (std::invalid_argument &e) {
MS_LOG(WARNING) << "Invalid data, failed to convert to int, invalid value: " << vec[1] << ".";
MS_LOG(WARNING) << "Invalid number, the second word in list_eval_partition.txt should be numeric, but got: "
<< vec[1] << ".";
return false;
} catch (std::out_of_range &e) {
MS_LOG(WARNING) << "Invalid data, failed to convert to int, value out of range: " << vec[1] << ".";
MS_LOG(WARNING) << "Invalid number, the second word in list_eval_partition.txt is out of range, word is: " << vec[1]
<< ".";
return false;
}
// train:0, valid=1, test=2
@ -185,12 +186,11 @@ Status CelebAOp::PrepareData() {
try {
value = std::stoi(split[label_index]);
} catch (std::invalid_argument &e) {
RETURN_STATUS_UNEXPECTED("Invalid data, failed to convert item from attr_file to int, corresponding value: " +
split[label_index] + ".");
RETURN_STATUS_UNEXPECTED("Invalid label index, the label index in " + file_path.ToString() +
" should be numeric, but got: " + split[label_index] + ".");
} catch (std::out_of_range &e) {
RETURN_STATUS_UNEXPECTED(
"Invalid data, failed to convert item from attr_file to int as out of range, corresponding value: " +
split[label_index] + ".");
RETURN_STATUS_UNEXPECTED("Invalid label index, the label index in " + file_path.ToString() +
" is out of range, index is " + split[label_index] + ".");
}
image_labels.second.push_back(value);
}
@ -242,7 +242,8 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
Status rc = Decode(image, &image);
if (rc.IsError()) {
image = nullptr;
std::string err_msg = "Invalid data, failed to decode image: " + image_path.ToString();
std::string err_msg =
"Invalid image, " + image_path.ToString() + " decode failed, the image is broken or permission denied.";
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
}
}

View File

@ -113,7 +113,7 @@ Status CifarOp::ReadCifar10BlockData() {
// check the validity of the file path
Path file_path(file);
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
"Invalid file, failed to find cifar10 file: " + file);
"Invalid cifar10 file, " + file + " does not exist or is a directory.");
std::string file_name = file_path.Basename();
if (usage_ == "train") {
@ -125,12 +125,12 @@ Status CifarOp::ReadCifar10BlockData() {
}
std::ifstream in(file, std::ios::binary);
CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar10 file: " + file +
", make sure file not damaged or permission denied.");
CHECK_FAIL_RETURN_UNEXPECTED(
in.is_open(), "Invalid cifar10 file, failed to open " + file + ", the file is damaged or permission denied.");
for (uint32_t index = 0; index < num_cifar10_records / kCifarBlockImageNum; ++index) {
(void)in.read(reinterpret_cast<char *>(&(image_data[0])), block_size * sizeof(unsigned char));
CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar10 file: " + file +
CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid cifar10 file, failed to read data from: " + file +
", re-download dataset(make sure it is CIFAR-10 binary version).");
(void)cifar_raw_data_block_->EmplaceBack(image_data);
// Add file path info
@ -155,7 +155,7 @@ Status CifarOp::ReadCifar100BlockData() {
// check the validity of the file path
Path file_path(file);
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
"Invalid file, failed to find cifar100 file: " + file);
"Invalid cifar100 file, " + file + " does not exist or is a directory.");
std::string file_name = file_path.Basename();
// if usage is train/test, get only these 2 files
@ -167,16 +167,16 @@ Status CifarOp::ReadCifar100BlockData() {
} else if (file_name.find("train") != std::string::npos) {
num_cifar100_records = num_cifar100_train_records;
} else {
RETURN_STATUS_UNEXPECTED("Invalid file, Cifar100 train/test file not found in: " + file_name);
RETURN_STATUS_UNEXPECTED("Invalid cifar100 file, Cifar100 train/test file is missing in: " + file_name);
}
std::ifstream in(file, std::ios::binary);
CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar100 file: " + file +
", make sure file not damaged or permission denied.");
CHECK_FAIL_RETURN_UNEXPECTED(
in.is_open(), "Invalid cifar100 file, failed to open " + file + ", the file is damaged or permission denied.");
for (uint32_t index = 0; index < num_cifar100_records / kCifarBlockImageNum; index++) {
(void)in.read(reinterpret_cast<char *>(&(image_data[0])), block_size * sizeof(unsigned char));
CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar100 file: " + file +
CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid cifar100 file, failed to read data from: " + file +
", re-download dataset(make sure it is CIFAR-100 binary version).");
(void)cifar_raw_data_block_->EmplaceBack(image_data);
// Add file path info
@ -200,10 +200,10 @@ Status CifarOp::GetCifarFiles() {
}
}
} else {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open directory: " + dir_path.ToString() +
", make sure file not damaged or permission denied.");
RETURN_STATUS_UNEXPECTED("Invalid directory, " + dir_path.ToString() + " is not a directory or permission denied.");
}
CHECK_FAIL_RETURN_UNEXPECTED(!cifar_files_.empty(), "Invalid file, no .bin files found under " + folder_path_);
CHECK_FAIL_RETURN_UNEXPECTED(!cifar_files_.empty(),
"Invalid cifar folder, cifar(.bin) files are missing under " + folder_path_);
std::sort(cifar_files_.begin(), cifar_files_.end());
return Status::OK();
}
@ -306,9 +306,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage,
constexpr int64_t num_cifar10_records = 10000;
for (auto &file : op->cifar_files_) {
Path file_path(file);
CHECK_FAIL_RETURN_UNEXPECTED(
file_path.Exists() && !file_path.IsDirectory(),
"Invalid file, failed to open cifar10 file: " + file + ", make sure file not damaged or permission denied.");
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
"Invalid cifar10 file, " + file + " does not exist or is a directory.");
std::string file_name = file_path.Basename();
if (op->usage_ == "train") {
@ -321,8 +320,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage,
std::ifstream in(file, std::ios::binary);
CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar10 file: " + file +
", make sure file not damaged or permission denied.");
CHECK_FAIL_RETURN_UNEXPECTED(
in.is_open(), "Invalid cifar10 file, failed to open " + file + ", the file is damaged or permission denied.");
*count = *count + num_cifar10_records;
}
return Status::OK();
@ -334,9 +333,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage,
Path file_path(file);
std::string file_name = file_path.Basename();
CHECK_FAIL_RETURN_UNEXPECTED(
file_path.Exists() && !file_path.IsDirectory(),
"Invalid file, failed to find cifar100 file: " + file + ", make sure file not damaged or permission denied.");
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
"Invalid cifar100 file, " + file + " does not exist or is a directory.");
if (op->usage_ == "train" && file_path.Basename().find("train") == std::string::npos) continue;
if (op->usage_ == "test" && file_path.Basename().find("test") == std::string::npos) continue;
@ -347,8 +345,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage,
num_cifar100_records += kCifar100RecordsPerTrainFile;
}
std::ifstream in(file, std::ios::binary);
CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar100 file: " + file +
", make sure file not damaged or permission denied.");
CHECK_FAIL_RETURN_UNEXPECTED(
in.is_open(), "Invalid cifar100 file, failed to open " + file + ", the file is damaged or permission denied.");
}
*count = num_cifar100_records;
return Status::OK();

View File

@ -56,7 +56,8 @@ Status CityscapesOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
} else {
std::ifstream file_handle(data.second);
if (!file_handle.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + data.second);
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + data.second +
", the json is damaged or permission denied.");
}
std::string contents((std::istreambuf_iterator<char>(file_handle)), std::istreambuf_iterator<char>());
nlohmann::json contents_js = nlohmann::json::parse(contents);
@ -71,13 +72,15 @@ Status CityscapesOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
if (decode_ == true) {
Status rc = Decode(image, &image);
if (rc.IsError()) {
std::string err = "Invalid data, failed to decode image: " + data.first;
std::string err =
"Invalid image, failed to decode " + data.first + ", the image is damaged or permission denied.";
RETURN_STATUS_UNEXPECTED(err);
}
if (task_ != taskSuffix) {
Status rc_t = Decode(task, &task);
if (rc_t.IsError()) {
std::string err_t = "Invalid data, failed to decode image: " + data.second;
std::string err_t =
"Invalid image, failed to decode " + data.second + ", the image is damaged or permission denied.";
RETURN_STATUS_UNEXPECTED(err_t);
}
}
@ -106,8 +109,8 @@ void CityscapesOp::Print(std::ostream &out, bool show_all) const {
Status CityscapesOp::PrepareData() {
auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data());
if (!real_dataset_dir.has_value()) {
MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_;
RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_);
MS_LOG(ERROR) << "Invalid file path, Cityscapes Dataset dir: " << dataset_dir_ << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, Cityscapes Dataset dir: " + dataset_dir_ + " does not exist.");
}
Path dataset_dir(real_dataset_dir.value());
@ -143,15 +146,18 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con
Path images_dir_p(images_dir);
if (!images_dir_p.IsDirectory()) {
RETURN_STATUS_UNEXPECTED("Invalid path, " + images_dir_p.ToString() + " is an invalid directory path.");
RETURN_STATUS_UNEXPECTED("Invalid path, Cityscapes Dataset image dir: " + images_dir_p.ToString() +
" is not a directory path.");
}
Path task_dir_p(task_dir);
if (!task_dir_p.IsDirectory()) {
RETURN_STATUS_UNEXPECTED("Invalid path, " + task_dir_p.ToString() + " is an invalid directory path.");
RETURN_STATUS_UNEXPECTED("Invalid path, Cityscapes Dataset task dir: " + task_dir_p.ToString() +
" is not a directory path.");
}
std::shared_ptr<Path::DirIterator> d_it = Path::DirIterator::OpenDirectory(&images_dir_p);
if (d_it == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + images_dir_p.ToString());
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open Cityscapes Dataset image directory: " +
images_dir_p.ToString());
}
while (d_it->HasNext()) {
@ -165,7 +171,8 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con
Path task_city_dir = task_dir_p / city_dir.Basename();
std::shared_ptr<Path::DirIterator> img_city_it = Path::DirIterator::OpenDirectory(&img_city_dir);
if (img_city_it == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + img_city_dir.ToString());
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open Cityscapes Dataset image city directory: " +
img_city_dir.ToString());
}
while (img_city_it->HasNext()) {
@ -179,13 +186,15 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con
Path task_file_path = task_city_dir / (img_file_name.substr(0, img_file_name.find("_leftImg8bit")) + "_" +
GetTaskSuffix(task_, real_quality_mode));
if (!task_file_path.Exists()) {
RETURN_STATUS_UNEXPECTED("Invalid file, " + task_file_path.ToString() + " not found.");
RETURN_STATUS_UNEXPECTED("Invalid file, Cityscapes Dataset task file: " + task_file_path.ToString() +
" does not exist.");
}
image_task_map_[image_file_path.ToString()] = task_file_path.ToString();
}
} catch (const std::exception &err) {
RETURN_STATUS_UNEXPECTED("Invalid path, failed to load Cityscapes Dataset: " + dataset_dir_);
RETURN_STATUS_UNEXPECTED("Invalid path, failed to load Cityscapes Dataset from " + dataset_dir_ + ": " +
std::string(err.what()));
}
}
@ -213,7 +222,9 @@ Status CityscapesOp::CountDatasetInfo() {
num_rows_ = static_cast<int64_t>(image_task_pairs_.size());
if (num_rows_ == 0) {
RETURN_STATUS_UNEXPECTED(
"Invalid data, no valid data matching the dataset API CityscapesDataset. Please check file path or dataset API.");
"Invalid data, no valid data matching the dataset API 'CityscapesDataset'. Please check dataset API or file "
"path: " +
dataset_dir_ + ".");
}
return Status::OK();
}

View File

@ -55,7 +55,7 @@ Status ClueOp::GetValue(const nlohmann::json &js, std::vector<std::string> key_c
if (cursor.find(key_chain[i]) != cursor.end()) {
cursor = cursor[key_chain[i]];
} else {
RETURN_STATUS_UNEXPECTED("Invalid data, in given JSON file, failed to find key: " + key_chain[i]);
RETURN_STATUS_UNEXPECTED("Invalid json file, in given JSON file, failed to find key: " + key_chain[i]);
}
}
std::string final_str = key_chain.back();
@ -84,13 +84,13 @@ Status ClueOp::GetValue(const nlohmann::json &js, std::vector<std::string> key_c
Status ClueOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) {
auto realpath = FileUtils::GetRealPath(file.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file;
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file);
std::string err_msg = "Invalid file path, " + file + " does not exist.";
LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
std::ifstream handle(realpath.value());
if (!handle.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + file);
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file + ", the file is damaged or permission denied.");
}
int64_t rows_total = 0;
@ -115,7 +115,7 @@ Status ClueOp::LoadFile(const std::string &file, int64_t start_offset, int64_t e
js = nlohmann::json::parse(line);
} catch (const std::exception &err) {
// Catch any exception and convert to Status return code
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse JSON file: " + file);
RETURN_STATUS_UNEXPECTED("Invalid json, failed to parse " + file + ", " + std::string(err.what()));
}
int cols_count = cols_to_keyword_.size();
TensorRow t_row(cols_count, nullptr);
@ -219,7 +219,7 @@ Status ClueOp::CalculateNumRowsPerShard() {
}
std::string file_list = ss.str();
RETURN_STATUS_UNEXPECTED(
"Invalid data, CLUEDataset API can't read the data file (interface mismatch or no data found). "
"Invalid data, 'CLUEDataset' API can't read the data file (interface mismatch or no data found). "
"Check file path:" +
file_list);
}
@ -232,13 +232,13 @@ Status ClueOp::CalculateNumRowsPerShard() {
int64_t CountTotalRowsPerFile(const std::string &file) {
auto realpath = FileUtils::GetRealPath(file.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Get real path failed, path=" << file;
MS_LOG(ERROR) << "Invalid file, " << file << " does not exist.";
return 0;
}
std::ifstream handle(realpath.value());
if (!handle.is_open()) {
MS_LOG(ERROR) << "Invalid file, failed to open file: " << file;
MS_LOG(ERROR) << "Invalid file, failed to open " << file << ": the file is damaged or permission denied.";
return 0;
}

View File

@ -78,8 +78,8 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
std::shared_ptr<Tensor> image, coordinate;
auto itr = coordinate_map_.find(image_id);
if (itr == coordinate_map_.end()) {
RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id +
" in annotation node is not found in image node in JSON file.");
RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + image_id +
" is missing from image node in annotation file: " + annotation_path_);
}
std::string kImageFile = image_folder_path_ + std::string("/") + image_id;
@ -115,7 +115,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
} else if (task_type_ == TaskType::Panoptic) {
RETURN_IF_NOT_OK(LoadMixTensorRow(row_id, image_id, image, coordinate, trow));
} else {
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff or Panoptic.");
RETURN_STATUS_UNEXPECTED("Invalid task, task type should be Detection, Stuff, Keypoint or Panoptic.");
}
return Status::OK();
@ -128,8 +128,8 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima
std::vector<uint32_t> iscrowd_row;
auto itr_item = simple_item_map_.find(image_id);
if (itr_item == simple_item_map_.end()) {
RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id +
" in annotation node is not found in image node in JSON file.");
RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + image_id +
" is missing in the node of image from annotation file: " + annotation_path_ + ".");
}
std::vector<uint32_t> annotation = itr_item->second;
@ -153,7 +153,7 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima
std::string img_id;
size_t pos = image_id.find(".");
if (pos == std::string::npos) {
RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\"");
RETURN_STATUS_UNEXPECTED("Invalid image, 'image_id': " + image_id + " should be with suffix like \".jpg\"");
}
std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id));
std::shared_ptr<Tensor> filename;
@ -171,8 +171,8 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_
std::vector<uint32_t> item_queue;
auto itr_item = simple_item_map_.find(image_id);
if (itr_item == simple_item_map_.end()) {
RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id +
" in annotation node is not found in image node in JSON file.");
RETURN_STATUS_UNEXPECTED("Invalid image_id, the attribute of 'image_id': " + image_id +
" is missing in the node of 'image' from annotation file: " + annotation_path_);
}
item_queue = itr_item->second;
@ -186,7 +186,7 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_
std::string img_id;
size_t pos = image_id.find(".");
if (pos == std::string::npos) {
RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\"");
RETURN_STATUS_UNEXPECTED("Invalid image, 'image_id': " + image_id + " should be with suffix like \".jpg\"");
}
std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id));
std::shared_ptr<Tensor> filename;
@ -206,8 +206,8 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id,
std::vector<uint32_t> area_row;
auto itr_item = simple_item_map_.find(image_id);
if (itr_item == simple_item_map_.end()) {
RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id +
" in annotation node is not found in image node in JSON file.");
RETURN_STATUS_UNEXPECTED("Invalid image_id, the attribute of 'image_id': " + image_id +
" is missing in the node of 'image' from annotation file: " + annotation_path_);
}
std::vector<uint32_t> annotation = itr_item->second;
@ -237,7 +237,7 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id,
std::string img_id;
size_t pos = image_id.find(".");
if (pos == std::string::npos) {
RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\"");
RETURN_STATUS_UNEXPECTED("Invalid image, " + image_id + " should be with suffix like \".jpg\"");
}
std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id));
std::shared_ptr<Tensor> filename;
@ -252,7 +252,9 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id,
template <typename T>
Status CocoOp::SearchNodeInJson(const nlohmann::json &input_tree, std::string node_name, T *output_node) {
auto node = input_tree.find(node_name);
CHECK_FAIL_RETURN_UNEXPECTED(node != input_tree.end(), "Invalid data, required node not found in JSON: " + node_name);
CHECK_FAIL_RETURN_UNEXPECTED(node != input_tree.end(), "Invalid annotation, the attribute of '" + node_name +
"' is missing in annotation file: " + annotation_path_ +
".");
(*output_node) = *node;
return Status::OK();
}
@ -262,17 +264,19 @@ Status CocoOp::PrepareData() {
try {
auto realpath = FileUtils::GetRealPath(annotation_path_.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << annotation_path_;
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + annotation_path_);
std::string err_msg = "Invalid file path, Coco Dataset annotation file: " + annotation_path_ + " does not exist.";
LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
std::ifstream in(realpath.value());
if (!in.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open annotation file: " + annotation_path_);
RETURN_STATUS_UNEXPECTED("Invalid annotation file, Coco Dataset annotation file: " + annotation_path_ +
" open failed, permission denied!");
}
in >> js;
} catch (const std::exception &err) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open JSON file: " + annotation_path_ + ".");
RETURN_STATUS_UNEXPECTED("Invalid annotation file, Coco Dataset annotation file:" + annotation_path_ +
" load failed, error description: " + std::string(err.what()));
}
std::vector<std::string> image_que;
@ -292,8 +296,8 @@ Status CocoOp::PrepareData() {
RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonAnnoImageId), &image_id));
auto itr_file = image_index_.find(image_id);
if (itr_file == image_index_.end()) {
RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + std::to_string(image_id) +
" in annotation node is not found in image node in JSON file.");
RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + std::to_string(image_id) +
" is missing in the node of 'image' from annotation file: " + annotation_path_);
}
file_name = itr_file->second;
switch (task_type_) {
@ -313,7 +317,7 @@ Status CocoOp::PrepareData() {
RETURN_IF_NOT_OK(PanopticColumnLoad(annotation, file_name, image_id));
break;
default:
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic.");
RETURN_STATUS_UNEXPECTED("Invalid task, task type should be Detection, Stuff, Keypoint or Panoptic.");
}
}
for (auto img : image_que) {
@ -322,7 +326,7 @@ Status CocoOp::PrepareData() {
num_rows_ = image_ids_.size();
if (num_rows_ == 0) {
RETURN_STATUS_UNEXPECTED(
"Invalid data, CocoDataset API can't read the data file (interface mismatch or no data found). "
"Invalid data, 'CocoDataset' API can't read the data file (interface mismatch or no data found). "
"Check file in directory: " +
image_folder_path_ + ".");
}
@ -331,7 +335,8 @@ Status CocoOp::PrepareData() {
Status CocoOp::ImageColumnLoad(const nlohmann::json &image_tree, std::vector<std::string> *image_vec) {
if (image_tree.size() == 0) {
RETURN_STATUS_UNEXPECTED("Invalid data, no \"image\" node found in JSON file: " + annotation_path_ + ".");
RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'image' node is missing in annotation file: " + annotation_path_ +
".");
}
for (auto img : image_tree) {
std::string file_name;
@ -354,8 +359,8 @@ Status CocoOp::DetectionColumnLoad(const nlohmann::json &annotation_tree, const
RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoCategoryId), &category_id));
auto search_category = category_set_.find(category_id);
if (search_category == category_set_.end())
RETURN_STATUS_UNEXPECTED(
"Invalid data, category_id can't find in categories where category_id: " + std::to_string(category_id) + ".");
RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'category_id': " + std::to_string(category_id) +
" is missing in the node of 'categories' from annotation file: " + annotation_path_);
auto node_iscrowd = annotation_tree.find(kJsonAnnoIscrowd);
if (node_iscrowd != annotation_tree.end()) iscrowd = *node_iscrowd;
bbox.insert(bbox.end(), node_bbox.begin(), node_bbox.end());
@ -392,13 +397,13 @@ Status CocoOp::KeypointColumnLoad(const nlohmann::json &annotation_tree, const s
const int32_t &unique_id) {
auto itr_num_keypoint = annotation_tree.find(kJsonAnnoNumKeypoints);
if (itr_num_keypoint == annotation_tree.end())
RETURN_STATUS_UNEXPECTED(
"Invalid data, no num_keypoint found in annotation file where image_id: " + std::to_string(unique_id) + ".");
RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'num_keypoint' node is missing in annotation file: " +
annotation_path_ + " where 'image_id': " + std::to_string(unique_id) + ".");
simple_item_map_[image_file].push_back(*itr_num_keypoint);
auto itr_keypoint = annotation_tree.find(kJsonAnnoKeypoints);
if (itr_keypoint == annotation_tree.end())
RETURN_STATUS_UNEXPECTED(
"Invalid data, no keypoint found in annotation file where image_id: " + std::to_string(unique_id) + ".");
RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'keypoint' node is missing in annotation file: " +
annotation_path_ + " where 'image_id': " + std::to_string(unique_id) + ".");
coordinate_map_[image_file].push_back(*itr_keypoint);
return Status::OK();
}
@ -407,31 +412,34 @@ Status CocoOp::PanopticColumnLoad(const nlohmann::json &annotation_tree, const s
const int32_t &image_id) {
auto itr_segments = annotation_tree.find(kJsonAnnoSegmentsInfo);
if (itr_segments == annotation_tree.end())
RETURN_STATUS_UNEXPECTED(
"Invalid data, no segments_info found in annotation file where image_id: " + std::to_string(image_id) + ".");
RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'segments_info' node is missing in annotation file: " +
annotation_path_ + " where 'image_id': " + std::to_string(image_id) + ".");
for (auto info : *itr_segments) {
std::vector<float> bbox;
uint32_t category_id = 0;
auto itr_bbox = info.find(kJsonAnnoBbox);
if (itr_bbox == info.end())
RETURN_STATUS_UNEXPECTED("Invalid data, no bbox found in segments_info(in annotation file) where image_id: " +
std::to_string(image_id) + ".");
RETURN_STATUS_UNEXPECTED(
"Invalid annotation, the 'bbox' attribute is missing in the node of 'segments_info' where 'image_id': " +
std::to_string(image_id) + " from annotation file: " + annotation_path_ + ".");
bbox.insert(bbox.end(), itr_bbox->begin(), itr_bbox->end());
coordinate_map_[image_file].push_back(bbox);
RETURN_IF_NOT_OK(SearchNodeInJson(info, std::string(kJsonAnnoCategoryId), &category_id));
auto search_category = category_set_.find(category_id);
if (search_category == category_set_.end())
RETURN_STATUS_UNEXPECTED(
"Invalid data, category_id can't find in categories where category_id: " + std::to_string(category_id) + ".");
RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'category_id': " + std::to_string(category_id) +
" is missing in the node of 'categories' from " + annotation_path_ + ".");
auto itr_iscrowd = info.find(kJsonAnnoIscrowd);
if (itr_iscrowd == info.end())
RETURN_STATUS_UNEXPECTED(
"Invalid data, no iscrowd found in segments_info where image_id: " + std::to_string(image_id) + ".");
"Invalid annotation, the attribute of 'iscrowd' is missing in the node of 'segments_info' where 'image_id': " +
std::to_string(image_id) + " from annotation file: " + annotation_path_ + ".");
auto itr_area = info.find(kJsonAnnoArea);
if (itr_area == info.end())
RETURN_STATUS_UNEXPECTED(
"Invalid data, no area found in segments_info where image_id: " + std::to_string(image_id) + ".");
"Invalid annotation, the attribute of 'area' is missing in the node of 'segments_info' where 'image_id': " +
std::to_string(image_id) + " from annotation file: " + annotation_path_ + ".");
simple_item_map_[image_file].push_back(category_id);
simple_item_map_[image_file].push_back(*itr_iscrowd);
simple_item_map_[image_file].push_back(*itr_area);
@ -441,7 +449,8 @@ Status CocoOp::PanopticColumnLoad(const nlohmann::json &annotation_tree, const s
Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) {
if (categories_tree.size() == 0) {
RETURN_STATUS_UNEXPECTED("Invalid data, no categories found in annotation_path: " + annotation_path_);
RETURN_STATUS_UNEXPECTED(
"Invalid annotation, the 'categories' node is missing in annotation file: " + annotation_path_ + ".");
}
for (auto category : categories_tree) {
int32_t id = 0;
@ -449,7 +458,9 @@ Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) {
std::vector<int32_t> label_info;
auto itr_id = category.find(kJsonId);
if (itr_id == category.end()) {
RETURN_STATUS_UNEXPECTED("Invalid data, no JSON id found in categories of " + annotation_path_);
RETURN_STATUS_UNEXPECTED(
"Invalid annotation, the attribute of 'id' is missing in the node of 'categories' from annotation file: " +
annotation_path_);
}
id = *itr_id;
label_info.push_back(id);
@ -458,13 +469,16 @@ Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) {
auto itr_name = category.find(kJsonCategoriesName);
CHECK_FAIL_RETURN_UNEXPECTED(
itr_name != category.end(),
"Invalid data, no categories name found in categories where id: " + std::to_string(id));
"Invalid annotation, the attribute of 'name' is missing in the node of 'categories' where 'id': " +
std::to_string(id));
name = *itr_name;
if (task_type_ == TaskType::Panoptic) {
auto itr_isthing = category.find(kJsonCategoriesIsthing);
CHECK_FAIL_RETURN_UNEXPECTED(itr_isthing != category.end(),
"Invalid data, nothing found in categories of " + annotation_path_);
"Invalid annotation, the attribute of 'isthing' is missing in the node of "
"'categories' from annotation file: " +
annotation_path_);
label_info.push_back(*itr_isthing);
}
label_index_.emplace_back(std::make_pair(name, label_info));
@ -477,7 +491,8 @@ Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &c
if (decode_ == true) {
Status rc = Decode(*tensor, tensor);
CHECK_FAIL_RETURN_UNEXPECTED(rc.IsOk(), "Invalid data, failed to decode image: " + path);
CHECK_FAIL_RETURN_UNEXPECTED(
rc.IsOk(), "Invalid image, failed to decode " + path + ": the image is broken or permission denied.");
}
return Status::OK();
}
@ -505,8 +520,8 @@ Status CocoOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<i
RETURN_UNEXPECTED_IF_NULL(output_class_indexing);
if ((*output_class_indexing).empty()) {
if ((task_type_ != TaskType::Detection) && (task_type_ != TaskType::Panoptic)) {
MS_LOG(ERROR) << "Invalid parameter, GetClassIndex only valid in \"Detection\" and \"Panoptic\" task.";
RETURN_STATUS_UNEXPECTED("Invalid parameter, GetClassIndex only valid in \"Detection\" and \"Panoptic\" task.");
MS_LOG(ERROR) << "Invalid task, only 'Detection' and 'Panoptic' task support GetClassIndex.";
RETURN_STATUS_UNEXPECTED("Invalid task, only 'Detection' and 'Panoptic' task support GetClassIndex.");
}
RETURN_IF_NOT_OK(PrepareData());
for (const auto &label : label_index_) {

View File

@ -111,7 +111,11 @@ int CsvOp::CsvParser::PutRecord(int c) {
std::string s = std::string(str_buf_.begin(), str_buf_.begin() + pos_);
std::shared_ptr<Tensor> t;
if (cur_col_ >= column_default_.size()) {
err_message_ = "Number of file columns does not match the default records";
std::stringstream ss;
ss << "Invalid columns, the size of column_names should be less than the size of 'column_defaults', "
<< "but got the size of column_names: " << cur_col_
<< ", the size of column_defaults : " << column_default_.size() << ".";
err_message_ = ss.str();
return -1;
}
Status rc;
@ -139,7 +143,11 @@ int CsvOp::CsvParser::PutRecord(int c) {
break;
}
if (cur_col_ >= cur_row_.size()) {
err_message_ = "Number of file columns does not match the tensor table";
std::stringstream ss;
ss << "Invalid columns, the size of column_names should be greater than or equal to the size of columns of "
<< "loading data, but got the size of column_names: " << cur_col_
<< ", the size of columns in original loaded dataset: " << column_default_.size() << ".";
err_message_ = ss.str();
return -1;
}
cur_row_[cur_col_] = std::move(t);
@ -166,7 +174,11 @@ int CsvOp::CsvParser::PutRow(int c) {
}
if (cur_col_ != column_default_.size()) {
err_message_ = "The number of columns does not match the definition.";
std::stringstream ss;
ss << "Invalid columns, the size of column_names should be less than the size of 'column_defaults', "
<< "but got the size of column_names: " << cur_col_
<< ", the size of 'column_defaults': " << column_default_.size() << ".";
err_message_ = ss.str();
return -1;
}
@ -201,11 +213,11 @@ int CsvOp::CsvParser::EndFile(int c) {
int CsvOp::CsvParser::CatchException(int c) {
if (GetMessage(c) == Message::MS_QUOTE && cur_state_ == State::UNQUOTE) {
err_message_ = "Invalid quote in unquote field.";
err_message_ = "Invalid csv file, unexpected quote in unquote field from " + file_path_ + ".";
} else if (GetMessage(c) == Message::MS_END_OF_FILE && cur_state_ == State::QUOTE) {
err_message_ = "Reach the end of file in quote field.";
err_message_ = "Invalid csv file, reach the end of file in quote field, check " + file_path_ + ".";
} else if (GetMessage(c) == Message::MS_NORMAL && cur_state_ == State::SECOND_QUOTE) {
err_message_ = "Receive unquote char in quote field.";
err_message_ = "Invalid csv file, receive unquote char in quote field, check " + file_path_ + ".";
}
return -1;
}
@ -459,14 +471,14 @@ Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t en
auto realpath = FileUtils::GetRealPath(file.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << file;
RETURN_STATUS_UNEXPECTED("Invalid file, " + DatasetName() + " file get real path failed, path=" + file);
MS_LOG(ERROR) << "Invalid file path, " << file << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, " + file + " does not exist.");
}
std::ifstream ifs;
ifs.open(realpath.value(), std::ifstream::in);
if (!ifs.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + " file: " + file);
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file + ", the file is damaged or permission denied.");
}
if (column_name_list_.empty()) {
std::string tmp;
@ -483,17 +495,18 @@ Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t en
if (err != 0) {
// if error code is -2, the returned error is interrupted
if (err == -2) return Status(kMDInterrupted);
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse file: " + file + ": line " +
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse csv file: " + file + " at line " +
std::to_string(csv_parser.GetTotalRows() + 1) +
". Error message: " + csv_parser.GetErrorMessage());
}
}
} catch (std::invalid_argument &ia) {
std::string err_row = std::to_string(csv_parser.GetTotalRows() + 1);
RETURN_STATUS_UNEXPECTED("Invalid data, " + file + ": line " + err_row + ", type does not match.");
RETURN_STATUS_UNEXPECTED("Invalid csv, csv file: " + file + " parse failed at line " + err_row +
", type does not match.");
} catch (std::out_of_range &oor) {
std::string err_row = std::to_string(csv_parser.GetTotalRows() + 1);
RETURN_STATUS_UNEXPECTED("Invalid data, " + file + ": line " + err_row + ", value out of range.");
RETURN_STATUS_UNEXPECTED("Invalid csv, " + file + " parse failed at line " + err_row + " : value out of range.");
}
return Status::OK();
}
@ -594,13 +607,14 @@ int64_t CsvOp::CountTotalRows(const std::string &file) {
CsvParser csv_parser(0, jagged_rows_connector_.get(), field_delim_, column_default_list_, file);
Status rc = csv_parser.InitCsvParser();
if (rc.IsError()) {
MS_LOG(ERROR) << "[Internal ERROR], failed to initialize " + DatasetName(true) + " Parser. Error:" << rc;
MS_LOG(ERROR) << "[Internal ERROR], failed to initialize " + DatasetName(true) + " Parser. Error description:"
<< rc;
return 0;
}
auto realpath = FileUtils::GetRealPath(file.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << file;
MS_LOG(ERROR) << "Invalid file path, csv file: " << file << " does not exist.";
return 0;
}
@ -673,8 +687,8 @@ Status CsvOp::ComputeColMap() {
/* Process exception if ERROR in column name solving*/
if (!rc.IsOk()) {
MS_LOG(ERROR) << "Invalid file, fail to analyse column name map, path=" + csv_file;
RETURN_STATUS_UNEXPECTED("Invalid file, fail to analyse column name map, path=" + csv_file);
MS_LOG(ERROR) << "Invalid file, failed to get column name list from csv file: " + csv_file;
RETURN_STATUS_UNEXPECTED("Invalid file, failed to get column name list from csv file: " + csv_file);
}
}
} else {
@ -689,9 +703,10 @@ Status CsvOp::ComputeColMap() {
if (column_default_list_.size() != column_name_id_map_.size()) {
RETURN_STATUS_UNEXPECTED(
"Invalid parameter, the number of column names does not match the default column, size of default column_list: " +
"Invalid parameter, the size of column_names should be equal to the size of 'column_defaults', but got "
" size of 'column_defaults': " +
std::to_string(column_default_list_.size()) +
", size of column_name: " + std::to_string(column_name_id_map_.size()));
", size of column_names: " + std::to_string(column_name_id_map_.size()));
}
return Status::OK();
@ -703,7 +718,7 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) {
if (!check_flag_) {
auto realpath = FileUtils::GetRealPath(csv_file_name.data());
if (!realpath.has_value()) {
std::string err_msg = "Invalid file, " + DatasetName() + " file get real path failed, path=" + csv_file_name;
std::string err_msg = "Invalid file path, csv file: " + csv_file_name + " does not exist.";
MS_LOG(ERROR) << err_msg;
RETURN_STATUS_UNEXPECTED(err_msg);
}
@ -721,11 +736,9 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) {
if (column_name_id_map_.find(col_names[i]) == column_name_id_map_.end()) {
column_name_id_map_[col_names[i]] = i;
} else {
MS_LOG(ERROR) << "Invalid parameter, duplicate column names are not allowed: " + col_names[i] +
", The corresponding data files: " + csv_file_name;
RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column names are not allowed: " + col_names[i] +
", The corresponding data files: " + csv_file_name);
MS_LOG(ERROR) << "Invalid parameter, duplicate column " << col_names[i] << " for csv file: " << csv_file_name;
RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column " + col_names[i] +
" for csv file: " + csv_file_name);
}
}
check_flag_ = true;
@ -736,11 +749,10 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) {
if (column_name_id_map_.find(column_name_list_[i]) == column_name_id_map_.end()) {
column_name_id_map_[column_name_list_[i]] = i;
} else {
MS_LOG(ERROR) << "Invalid parameter, duplicate column names are not allowed: " + column_name_list_[i] +
", The corresponding data files: " + csv_file_name;
RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column names are not allowed: " +
column_name_list_[i] + ", The corresponding data files: " + csv_file_name);
MS_LOG(ERROR) << "Invalid parameter, duplicate column " << column_name_list_[i]
<< " for csv file: " << csv_file_name << ".";
RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column " + column_name_list_[i] +
" for csv file: " + csv_file_name + ".");
}
}
check_flag_ = true;
@ -764,7 +776,7 @@ bool CsvOp::ColumnNameValidate() {
for (auto &csv_file : csv_files_list_) {
auto realpath = FileUtils::GetRealPath(csv_file.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << csv_file;
MS_LOG(ERROR) << "Invalid file path, csv file: " << csv_file << " does not exist.";
return false;
}
@ -781,9 +793,8 @@ bool CsvOp::ColumnNameValidate() {
match_file = csv_file;
} else { // Case the other files
if (col_names != record) {
MS_LOG(ERROR)
<< "Invalid parameter, every corresponding column name must be identical, either element or permutation. "
<< "Invalid files are: " + match_file + " and " + csv_file;
MS_LOG(ERROR) << "Invalid parameter, every column name should be equal the record from csv, but got column: "
<< col_names << ", csv record: " << record << ". Check " + match_file + " and " + csv_file + ".";
return false;
}
}

View File

@ -76,13 +76,15 @@ Status DIV2KOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
if (decode_ == true) {
Status hr_rc = Decode(hr_image, &hr_image);
if (hr_rc.IsError()) {
std::string err = "Invalid data, failed to decode image: " + data.first;
std::string err =
"Invalid image, failed to decode " + data.first + ", the image is damaged or permission denied.";
RETURN_STATUS_UNEXPECTED(err);
}
Status lr_rc = Decode(lr_image, &lr_image);
if (lr_rc.IsError()) {
std::string err = "Invalid data, failed to decode image: " + data.second;
std::string err =
"Invalid image, failed to decode " + data.second + ", the image is damaged or permission denied.";
RETURN_STATUS_UNEXPECTED(err);
}
}
@ -141,7 +143,7 @@ Status DIV2KOp::GetDIV2KLRDirRealName(const std::string &hr_dir_key, const std::
out_str += ("\t" + item.first + ": " + item.second + ",\n");
});
out_str += "\n}";
RETURN_STATUS_UNEXPECTED("Invalid param, " + lr_dir_key + " not found in DatasetPramMap: \n" + out_str);
RETURN_STATUS_UNEXPECTED("Invalid param, dir: " + lr_dir_key + " not found under div2k dataset dir, " + out_str);
}
if (downgrade_2017.find(downgrade_) != downgrade_2017.end() && scale_2017.find(scale_) != scale_2017.end()) {
@ -158,8 +160,8 @@ Status DIV2KOp::GetDIV2KDataByUsage() {
auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data());
if (!real_dataset_dir.has_value()) {
MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_;
RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_);
MS_LOG(ERROR) << "Invalid file path, div2k dataset dir: " << dataset_dir_ << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, div2k dataset dir: " + dataset_dir_ + " does not exist.");
}
Path dataset_dir(real_dataset_dir.value());
@ -167,14 +169,15 @@ Status DIV2KOp::GetDIV2KDataByUsage() {
Path lr_images_dir = dataset_dir / lr_dir_real_name_;
if (!hr_images_dir.IsDirectory()) {
RETURN_STATUS_UNEXPECTED("Invalid path, " + hr_images_dir.ToString() + " is an invalid directory path.");
RETURN_STATUS_UNEXPECTED("Invalid path, div2k hr image dir: " + hr_images_dir.ToString() + " is not a directory.");
}
if (!lr_images_dir.IsDirectory()) {
RETURN_STATUS_UNEXPECTED("Invalid path, " + lr_images_dir.ToString() + " is an invalid directory path.");
RETURN_STATUS_UNEXPECTED("Invalid path, div2k lr image dir: " + lr_images_dir.ToString() + " is not a directory.");
}
auto hr_it = Path::DirIterator::OpenDirectory(&hr_images_dir);
if (hr_it == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + hr_images_dir.ToString());
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open div2k hr image dir: " + hr_images_dir.ToString() +
", permission denied.");
}
std::string image_name;
@ -202,12 +205,14 @@ Status DIV2KOp::GetDIV2KDataByUsage() {
Path lr_image_file_path(lr_image_file_path_);
if (!lr_image_file_path.Exists()) {
RETURN_STATUS_UNEXPECTED("Invalid file, " + lr_image_file_path.ToString() + " not found.");
RETURN_STATUS_UNEXPECTED("Invalid file, div2k image file: " + lr_image_file_path.ToString() +
" does not exist.");
}
image_hr_lr_map_[hr_image_file_path.ToString()] = lr_image_file_path.ToString();
} catch (const std::exception &err) {
RETURN_STATUS_UNEXPECTED("Invalid path, failed to load DIV2K Dataset: " + dataset_dir_);
RETURN_STATUS_UNEXPECTED("Invalid path, failed to load DIV2K Dataset from " + dataset_dir_ + ": " +
std::string(err.what()));
}
}
for (auto item : image_hr_lr_map_) {
@ -220,7 +225,8 @@ Status DIV2KOp::CountDatasetInfo() {
num_rows_ = static_cast<int64_t>(image_hr_lr_pairs_.size());
if (num_rows_ == 0) {
RETURN_STATUS_UNEXPECTED(
"Invalid data, no valid data matching the dataset API DIV2KDataset. Please check file path or dataset API.");
"Invalid data, no valid data matching the dataset API 'DIV2KDataset'. Please check dataset API or file path: " +
dataset_dir_ + ".");
}
return Status::OK();
}

View File

@ -58,11 +58,12 @@ Status EMnistOp::WalkAllFiles() {
const std::string train_prefix = "-train";
const std::string test_prefix = "-test";
auto realpath = FileUtils::GetRealPath(folder_path_.data());
CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Get real path failed: " + folder_path_);
CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Invalid file path, " + folder_path_ + " does not exist.");
Path dir(realpath.value());
auto dir_it = Path::DirIterator::OpenDirectory(&dir);
if (dir_it == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + dir.ToString());
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open emnist dataset dir: " + dir.ToString() +
", the directory is not a directory or permission denied.");
}
std::string prefix;
prefix = "emnist-" + name_; // used to match usage == "all".
@ -88,7 +89,9 @@ Status EMnistOp::WalkAllFiles() {
std::sort(image_names_.begin(), image_names_.end());
std::sort(label_names_.begin(), label_names_.end());
CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(),
"Invalid data, num of images is not equal to num of labels.");
"Invalid data, num of image files should be equal to num of label files under " +
realpath.value() + ", but got num of images: " + std::to_string(image_names_.size()) +
", num of labels: " + std::to_string(label_names_.size()) + ".");
return Status::OK();
}
@ -118,12 +121,12 @@ Status EMnistOp::CountTotalRows(const std::string &dir, const std::string &name,
for (size_t i = 0; i < op->image_names_.size(); ++i) {
std::ifstream image_reader;
image_reader.open(op->image_names_[i], std::ios::binary);
CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(),
"Invalid file, failed to open image file: " + op->image_names_[i]);
CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), "Invalid file, failed to open " + op->image_names_[i] +
": the image file is damaged or permission denied.");
std::ifstream label_reader;
label_reader.open(op->label_names_[i], std::ios::binary);
CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(),
"Invalid file, failed to open label file: " + op->label_names_[i]);
CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), "Invalid file, failed to open " + op->label_names_[i] +
": the label file is damaged or permission denied.");
uint32_t num_images;
Status s = op->CheckImage(op->image_names_[i], &image_reader, &num_images);
image_reader.close();
@ -134,8 +137,10 @@ Status EMnistOp::CountTotalRows(const std::string &dir, const std::string &name,
label_reader.close();
RETURN_IF_NOT_OK(s);
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
"Invalid data, num of images is not equal to num of labels.");
CHECK_FAIL_RETURN_UNEXPECTED(
(num_images == num_labels),
"Invalid data, num of images should be equal to num of labels, but got num of images: " +
std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + ".");
*count = *count + num_images;
}

View File

@ -90,7 +90,8 @@ void FakeImageOp::Print(std::ostream &out, bool show_all) const {
Status FakeImageOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const {
if (cls_ids == nullptr || !cls_ids->empty() || label_list_.empty()) {
if (label_list_.empty()) {
RETURN_STATUS_UNEXPECTED("No image found in dataset. Check if image was generated successfully.");
RETURN_STATUS_UNEXPECTED(
"[Internal ERROR] No image found in dataset. Check if image was generated successfully.");
} else {
RETURN_STATUS_UNEXPECTED(
"[Internal ERROR] Map for storing image-index pair is nullptr or has been set in other place, "
@ -126,7 +127,7 @@ Status FakeImageOp::PrepareData() {
label_list_.shrink_to_fit();
num_rows_ = label_list_.size();
CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "Generate image failed, please check dataset API.");
CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "Invalid data, generate fake data failed, please check dataset API.");
image_tensor_.clear();
image_tensor_.resize(num_rows_);
return Status::OK();

View File

@ -56,12 +56,12 @@ Status FashionMnistOp::CountTotalRows(const std::string &dir, const std::string
for (size_t i = 0; i < op->image_names_.size(); ++i) {
std::ifstream image_reader;
image_reader.open(op->image_names_[i], std::ios::binary);
CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(),
"Invalid file, failed to open image file: " + op->image_names_[i]);
CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), "Invalid file, failed to open " + op->image_names_[i] +
": the image file is damaged or permission denied.");
std::ifstream label_reader;
label_reader.open(op->label_names_[i], std::ios::binary);
CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(),
"Invalid file, failed to open label file: " + op->label_names_[i]);
CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), "Invalid file, failed to open " + op->label_names_[i] +
": the label file is damaged or permission denied.");
uint32_t num_images;
Status s = op->CheckImage(op->image_names_[i], &image_reader, &num_images);
image_reader.close();
@ -72,8 +72,10 @@ Status FashionMnistOp::CountTotalRows(const std::string &dir, const std::string
label_reader.close();
RETURN_IF_NOT_OK(s);
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
"Invalid data, num of images is not equal to num of labels.");
CHECK_FAIL_RETURN_UNEXPECTED(
(num_images == num_labels),
"Invalid data, num of images should be equal to num of labels, but got num of images: " +
std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + ".");
*count = *count + num_images;
}

View File

@ -48,7 +48,8 @@ Status FlickrOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
if (decode_ == true) {
Status rc = Decode(image, &image);
if (rc.IsError()) {
std::string err = "Invalid data, failed to decode image: " + data.first;
std::string err =
"Invalid image, failed to decode " + data.first + ": the image is damaged or permission denied!";
RETURN_STATUS_UNEXPECTED(err);
}
}
@ -76,13 +77,14 @@ void FlickrOp::Print(std::ostream &out, bool show_all) const {
Status FlickrOp::PrepareData() {
auto real_file_path = FileUtils::GetRealPath(file_path_.data());
if (!real_file_path.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_path_;
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_path_);
MS_LOG(ERROR) << "Invalid file path, " << file_path_ << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_path_ + " does not exist.");
}
std::ifstream file_handle(real_file_path.value());
if (!file_handle.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Flickr annotation file: " + file_path_);
RETURN_STATUS_UNEXPECTED("Invalid annotation file, failed to open " + file_path_ +
" : the file is damaged or permission denied.");
}
std::string line;
@ -102,16 +104,16 @@ Status FlickrOp::PrepareData() {
image_name = line.substr(0, flag_idx - 2); // -2 because "#[0-4]\t"
if (image_name.empty()) {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, image_name is not found in Flickr annotation file: " + file_path_ +
"; line: " + line);
RETURN_STATUS_UNEXPECTED("Invalid file, the attribute of image_name is missing in flickr dataset file: " +
file_path_ + ", line: " + line);
}
image_file_path = (dataset_dir / image_name).ToString();
std::string annotation = line.substr(flag_idx + 1);
if (annotation.empty()) {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, annotation is not found in Flickr annotation file: " + file_path_ +
"; line: " + line);
RETURN_STATUS_UNEXPECTED("Invalid file, the attribute of annotation is missing in flickr dataset file: " +
file_path_ + ", line: " + line);
}
bool valid = false;
@ -127,7 +129,8 @@ Status FlickrOp::PrepareData() {
image_annotation_map_[image_file_path].emplace_back(annotation);
} catch (const std::exception &err) {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Flickr annotation file: " + file_path_);
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse flickr dataset file: " + file_path_ + ": " +
std::string(err.what()));
}
}
@ -146,8 +149,8 @@ Status FlickrOp::PrepareData() {
Status FlickrOp::CheckImageType(const std::string &file_name, bool *valid) {
auto real_file_name = FileUtils::GetRealPath(file_name.data());
if (!real_file_name.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_name;
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_name);
MS_LOG(ERROR) << "Invalid file path, flickr dataset file: " << file_name << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, flickr dataset file: " + file_name + " does not exist.");
}
std::ifstream file_handle;
@ -155,14 +158,16 @@ Status FlickrOp::CheckImageType(const std::string &file_name, bool *valid) {
*valid = false;
file_handle.open(real_file_name.value(), std::ios::binary | std::ios::in);
if (!file_handle.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open image file: " + file_name);
RETURN_STATUS_UNEXPECTED("Invalid flickr file, failed to open " + file_name +
": the file is damaged or permission denied.");
}
unsigned char file_type[read_num];
(void)file_handle.read(reinterpret_cast<char *>(file_type), read_num);
if (file_handle.fail()) {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name);
RETURN_STATUS_UNEXPECTED("Invalid flickr file, failed to read " + file_name +
": the file is damaged or the file content is incomplete.");
}
file_handle.close();
if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) {

View File

@ -64,7 +64,7 @@ Status GeneratorOp::CreateGeneratorObject() {
// Acquire Python GIL
py::gil_scoped_acquire gil_acquire;
if (Py_IsInitialized() == 0) {
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized.");
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized.");
}
try {
py::array sample_ids;
@ -93,15 +93,17 @@ Status GeneratorOp::Init() {
Status GeneratorOp::PyRowToTensorRow(py::object py_data, TensorRow *tensor_row) {
if (!py::isinstance<py::tuple>(py_data)) {
return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__,
"Invalid data, Generator should return a tuple of NumPy arrays, currently returned is not a tuple.");
"Invalid python function, the 'source' of 'GeneratorDataset' should return a tuple of NumPy arrays, "
"but got " +
std::string(py_data.get_type().str()));
}
py::tuple py_row = py_data.cast<py::tuple>();
// Check if returned number of columns matches with column names
if (py_row.size() != column_names_.size()) {
return Status(
StatusCode::kMDPyFuncException, __LINE__, __FILE__,
"Invalid data, Generator should return same number of NumPy arrays as specified in column_names, the size of"
" column_names is:" +
"Invalid python function, the 'source' of 'GeneratorDataset' should return same number of NumPy arrays as "
"specified in column_names, the size of column_names is:" +
std::to_string(column_names_.size()) +
" and number of returned NumPy array is:" + std::to_string(py_row.size()));
}
@ -110,15 +112,18 @@ Status GeneratorOp::PyRowToTensorRow(py::object py_data, TensorRow *tensor_row)
py::object ret_py_ele = py_row[i];
if (!py::isinstance<py::array>(ret_py_ele)) {
return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__,
"Invalid data, Generator should return a tuple of NumPy arrays. Ensure each item in tuple that "
"returned by source function of GeneratorDataset be NumPy array.");
"Invalid python function, 'GeneratorDataset' should return a tuple of NumPy arrays, but got " +
std::string(ret_py_ele.get_type().str()));
}
std::shared_ptr<Tensor> tensor;
RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_ele.cast<py::array>(), &tensor));
if ((!column_types_.empty()) && (column_types_[i] != DataType::DE_UNKNOWN) &&
(column_types_[i] != tensor->type())) {
return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__,
"Invalid data, type of returned data in GeneratorDataset is not same with specified column_types.");
"Invalid python function, type of returned data in 'GeneratorDataset' should be same with "
"specified column_types, but the type of returned data: " +
std::string(ret_py_ele.get_type().str()) +
", specified column type: " + column_types_[i].ToString());
}
tensor_row->push_back(tensor);
}
@ -173,7 +178,7 @@ Status GeneratorOp::operator()() {
{
py::gil_scoped_acquire gil_acquire;
if (Py_IsInitialized() == 0) {
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
}
try {
#ifndef ENABLE_SECURITY

View File

@ -93,7 +93,8 @@ Status ImageFolderOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
if (decode_ == true) {
Status rc = Decode(image, &image);
if (rc.IsError()) {
std::string err = "Invalid data, failed to decode image: " + folder_path_ + (pair_ptr->first);
std::string err = "Invalid image, " + folder_path_ + (pair_ptr->first) +
" decode failed, the image is broken or permission denied.";
RETURN_STATUS_UNEXPECTED(err);
}
}
@ -121,7 +122,7 @@ void ImageFolderOp::Print(std::ostream &out, bool show_all) const {
Status ImageFolderOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const {
if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) {
if (image_label_pairs_.empty()) {
RETURN_STATUS_UNEXPECTED("Invalid data, " + DatasetName(true) +
RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + DatasetName(true) +
"Dataset API can't read the data file(interface mismatch or no data found). Check " +
DatasetName() + " file path: " + folder_path_);
} else {
@ -156,7 +157,7 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) {
Path folder(folder_path_ + folder_name);
std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder);
if (folder.Exists() == false || dirItr == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + folder_name);
RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + folder_name + " does not exist or permission denied.");
}
std::set<std::string> imgs; // use this for ordering
while (dirItr->HasNext()) {
@ -193,8 +194,8 @@ Status ImageFolderOp::RecursiveWalkFolder(Path *dir) {
RETURN_IF_NOT_OK(folder_name_queue_->EmplaceBack(subdir.ToString().substr(dirname_offset_)));
}
if (recursive_ == true) {
MS_LOG(ERROR) << "RecursiveWalkFolder(&subdir) functionality is disabled permanently. No recursive walk of "
<< "directory will be performed.";
MS_LOG(ERROR) << "[Internal ERROR] RecursiveWalkFolder(&subdir) functionality is disabled permanently. "
<< "No recursive walk of directory will be performed.";
}
}
}
@ -206,7 +207,7 @@ Status ImageFolderOp::StartAsyncWalk() {
TaskManager::FindMe()->Post();
Path dir(folder_path_);
if (dir.Exists() == false || dir.IsDirectory() == false) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + folder_path_);
RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + folder_path_ + " may not exist or the path is not a directory.");
}
dirname_offset_ = folder_path_.length();
RETURN_IF_NOT_OK(RecursiveWalkFolder(&dir));
@ -242,10 +243,9 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
std::string err_msg = "";
int64_t row_cnt = 0;
err_msg += (dir.Exists() == false || dir.IsDirectory() == false)
? "Invalid parameter, input path is invalid or not set, path: " + path
? "Invalid dataset_dir, " + path + " does not exist or the path is not a directory. "
: "";
err_msg +=
(num_classes == nullptr && num_rows == nullptr) ? "Invalid parameter, num_class and num_rows are null.\n" : "";
err_msg += (num_classes == nullptr && num_rows == nullptr) ? "[Internal ERROR] num_class and num_rows are null." : "";
if (err_msg.empty() == false) {
RETURN_STATUS_UNEXPECTED(err_msg);
}
@ -266,7 +266,7 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
} else {
for (const auto &p : class_index) {
CHECK_FAIL_RETURN_UNEXPECTED(folder_names.find(p.first) != folder_names.end(),
"Invalid parameter, folder: " + p.first + " doesn't exist in " + path + " .");
"Invalid subdirectory, class: " + p.first + " doesn't exist in " + path + " .");
}
(*num_classes) = class_index.size();
}
@ -277,7 +277,8 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
Path subdir(folder_paths.front());
dir_itr = Path::DirIterator::OpenDirectory(&subdir);
if (subdir.Exists() == false || dir_itr == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + subdir.ToString());
RETURN_STATUS_UNEXPECTED("Invalid subdirectory, ImageFolder Dataset subdirectory: " + subdir.ToString() +
" does not exist or permission denied");
}
while (dir_itr->HasNext()) {
if (exts.empty() || exts.find(subdir.Extension()) != exts.end()) {

View File

@ -64,7 +64,7 @@ FilenameBlock::FilenameBlock(IOBlockFlags io_block_flags)
// Gets the filename from the block using the provided index container
Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj<std::string> &index) const {
if (out_filename == nullptr) {
RETURN_STATUS_UNEXPECTED("Failed to get filename from FilenameBlock.");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Failed to get filename from FilenameBlock.");
}
// a FilenameBlock only has one key. Call base class method to fetch that key
@ -77,7 +77,7 @@ Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj<
auto &it = r.first;
*out_filename = it.value();
} else {
RETURN_STATUS_UNEXPECTED("Could not find filename from index.");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Could not find filename from index.");
}
return Status::OK();

View File

@ -38,15 +38,15 @@ LJSpeechOp::LJSpeechOp(const std::string &file_dir, int32_t num_workers, int32_t
Status LJSpeechOp::PrepareData() {
auto real_path = FileUtils::GetRealPath(folder_path_.data());
if (!real_path.has_value()) {
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + folder_path_);
RETURN_STATUS_UNEXPECTED("Invalid file path, LJSpeech Dataset folder: " + folder_path_ + " does not exist.");
}
Path root_folder(real_path.value());
Path metadata_file_path = root_folder / "metadata.csv";
CHECK_FAIL_RETURN_UNEXPECTED(metadata_file_path.Exists() && !metadata_file_path.IsDirectory(),
"Invalid file, failed to find metadata file: " + metadata_file_path.ToString());
"Invalid file, failed to find LJSpeech metadata file: " + metadata_file_path.ToString());
std::ifstream csv_reader(metadata_file_path.ToString());
CHECK_FAIL_RETURN_UNEXPECTED(csv_reader.is_open(),
"Invalid file, failed to open metadata file: " + metadata_file_path.ToString() +
"Invalid file, failed to open LJSpeech metadata file: " + metadata_file_path.ToString() +
", make sure file not damaged or permission denied.");
std::string line = "";
while (getline(csv_reader, line)) {
@ -64,8 +64,8 @@ Status LJSpeechOp::PrepareData() {
}
if (meta_info_list_.empty()) {
csv_reader.close();
RETURN_STATUS_UNEXPECTED(
"Reading failed, unable to read valid data from the metadata file: " + metadata_file_path.ToString() + ".");
RETURN_STATUS_UNEXPECTED("Reading failed, unable to read valid data from the LJSpeech metadata file: " +
metadata_file_path.ToString() + ".");
}
num_rows_ = meta_info_list_.size();
csv_reader.close();
@ -76,7 +76,7 @@ Status LJSpeechOp::PrepareData() {
// 1 function call produces 1 TensorTow
Status LJSpeechOp::LoadTensorRow(row_id_type index, TensorRow *trow) {
int32_t num_items = meta_info_list_.size();
CHECK_FAIL_RETURN_UNEXPECTED(index >= 0 && index < num_items, "The input index is out of range.");
CHECK_FAIL_RETURN_UNEXPECTED(index >= 0 && index < num_items, "[Internal ERROR] The input index is out of range.");
std::shared_ptr<Tensor> waveform;
std::shared_ptr<Tensor> sample_rate_scalar;
std::shared_ptr<Tensor> transcription, normalized_transcription;
@ -118,7 +118,7 @@ void LJSpeechOp::Print(std::ostream &out, bool show_all) const {
Status LJSpeechOp::CountTotalRows(const std::string &dir, int64_t *count) {
auto real_path = FileUtils::GetRealPath(dir.data());
if (!real_path.has_value()) {
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + dir);
RETURN_STATUS_UNEXPECTED("Invalid file, " + dir + " does not exist.");
}
Path root_folder(real_path.value());
Path metadata_file_path = root_folder / "metadata.csv";

View File

@ -63,7 +63,8 @@ Status ManifestOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
if (decode_ == true) {
Status rc = Decode(image, &image);
if (rc.IsError()) {
std::string err = "Invalid data, failed to decode image: " + data.first;
std::string err =
"Invalid image, failed to decode: " + data.first + ", the image is damaged or permission denied.";
RETURN_STATUS_UNEXPECTED(err);
}
}
@ -91,7 +92,7 @@ void ManifestOp::Print(std::ostream &out, bool show_all) const {
Status ManifestOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const {
if (cls_ids == nullptr || !cls_ids->empty() || image_labelname_.empty()) {
if (image_labelname_.empty()) {
RETURN_STATUS_UNEXPECTED("Invalid data, no image found in dataset.");
RETURN_STATUS_UNEXPECTED("Invalid manifest file, image data is missing in " + file_);
} else {
RETURN_STATUS_UNEXPECTED(
"[Internal ERROR] Map for containing image-index pair is nullptr or has been set in other place,"
@ -120,13 +121,14 @@ Status ManifestOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids)
Status ManifestOp::PrepareData() {
auto realpath = FileUtils::GetRealPath(file_.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_;
RETURN_STATUS_UNEXPECTED("Invalid data, get real path failed, path=" + file_);
MS_LOG(ERROR) << "Invalid file path, " << file_ << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_ + " does not exist.");
}
std::ifstream file_handle(realpath.value());
if (!file_handle.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Manifest file: " + file_);
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file_ +
": manifest file is damaged or permission denied!");
}
std::string line;
std::set<std::string> classes;
@ -137,7 +139,7 @@ Status ManifestOp::PrepareData() {
std::string image_file_path = js.value("source", "");
if (image_file_path == "") {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, 'source' is not found in Manifest file: " + file_ + " at line " +
RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'source' is missing in file: " + file_ + " at line " +
std::to_string(line_count));
}
// If image is not JPEG/PNG/GIF/BMP, drop it
@ -149,7 +151,7 @@ Status ManifestOp::PrepareData() {
std::string usage = js.value("usage", "");
if (usage == "") {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, 'usage' is not found in Manifest file: " + file_ + " at line " +
RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'usage' is missing in file: " + file_ + " at line " +
std::to_string(line_count));
}
(void)std::transform(usage.begin(), usage.end(), usage.begin(), ::tolower);
@ -164,7 +166,7 @@ Status ManifestOp::PrepareData() {
classes.insert(label_name);
if (label_name == "") {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, 'name' of label is not found in Manifest file: " + file_ +
RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'name' attribute of label is missing in file: " + file_ +
" at line " + std::to_string(line_count));
}
if (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) {
@ -180,7 +182,8 @@ Status ManifestOp::PrepareData() {
line_count++;
} catch (const std::exception &err) {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse manifest file: " + file_);
RETURN_STATUS_UNEXPECTED("Invalid manifest file, parse ManiFest file: " + file_ + " failed, " +
std::string(err.what()));
}
}
num_classes_ = classes.size();
@ -193,8 +196,8 @@ Status ManifestOp::PrepareData() {
Status ManifestOp::CheckImageType(const std::string &file_name, bool *valid) {
auto realpath = FileUtils::GetRealPath(file_name.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_name;
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_name);
MS_LOG(ERROR) << "Invalid file path, " << file_name << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_name + " does not exist.");
}
std::ifstream file_handle;
@ -202,14 +205,16 @@ Status ManifestOp::CheckImageType(const std::string &file_name, bool *valid) {
*valid = false;
file_handle.open(realpath.value(), std::ios::binary | std::ios::in);
if (!file_handle.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open image file: " + file_name);
RETURN_STATUS_UNEXPECTED("Invalid manifest file, failed to open " + file_name +
" : the manifest file is damaged or permission denied.");
}
unsigned char file_type[read_num];
(void)file_handle.read(reinterpret_cast<char *>(file_type), read_num);
if (file_handle.fail()) {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name);
RETURN_STATUS_UNEXPECTED("Invalid manifest file, failed to read " + file_name +
" : the manifest file is damaged or permission denied.");
}
file_handle.close();
if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) {

View File

@ -68,7 +68,8 @@ Status MindRecordOp::Init() {
data_schema_ = std::make_unique<DataSchema>();
std::vector<std::string> col_names = shard_reader_->GetShardColumn()->GetColumnName();
CHECK_FAIL_RETURN_UNEXPECTED(!col_names.empty(), "Invalid data, no column names are specified.");
CHECK_FAIL_RETURN_UNEXPECTED(!col_names.empty(),
"Invalid column, no column names are specified, check mindrecord file.");
std::vector<mindrecord::ColumnDataType> col_data_types = shard_reader_->GetShardColumn()->GeColumnDataType();
std::vector<std::vector<int64_t>> col_shapes = shard_reader_->GetShardColumn()->GetColumnShape();
@ -107,9 +108,8 @@ Status MindRecordOp::Init() {
if (!load_all_cols) {
std::unique_ptr<DataSchema> tmp_schema = std::make_unique<DataSchema>();
for (std::string colname : columns_to_load_) {
CHECK_FAIL_RETURN_UNEXPECTED(
colname_to_ind.find(colname) != colname_to_ind.end(),
"Invalid data, specified loading column name: " + colname + " does not exist in data file.");
CHECK_FAIL_RETURN_UNEXPECTED(colname_to_ind.find(colname) != colname_to_ind.end(),
"Invalid column, " + colname + " does not exist in data file.");
RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->Column(colname_to_ind[colname])));
}
data_schema_ = std::move(tmp_schema);
@ -177,7 +177,7 @@ Status MindRecordOp::WorkerEntry(int32_t worker_id) {
}
RETURN_IF_NOT_OK(worker_in_queues_[worker_id]->PopFront(&io_block));
}
RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker.");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Unexpected nullptr received in worker.");
}
Status MindRecordOp::GetRowFromReader(TensorRow *fetched_row, uint64_t row_id, int32_t worker_id) {
@ -231,14 +231,15 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
RETURN_IF_NOT_OK(shard_column->GetColumnFromJson(column_name, sample_json_, &data_ptr, &n_bytes));
} else if (category == mindrecord::ColumnInBlob) {
CHECK_FAIL_RETURN_UNEXPECTED(sample_bytes_.find(column_name) != sample_bytes_.end(),
"Invalid data, failed to retrieve blob data from padding sample.");
"Invalid padded_sample, failed to retrieve blob data from padding sample, "
"check 'padded_sample'.");
std::string ss(sample_bytes_[column_name]);
n_bytes = ss.size();
data_ptr = std::make_unique<unsigned char[]>(n_bytes);
std::copy(ss.begin(), ss.end(), data_ptr.get());
} else {
RETURN_STATUS_UNEXPECTED("Invalid data, retrieved data type is unknown.");
RETURN_STATUS_UNEXPECTED("Invalid datatype, retrieved data type is unknown.");
}
if (data == nullptr) {
data = reinterpret_cast<const unsigned char *>(data_ptr.get());
@ -254,7 +255,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
DataType type = column.Type();
// Set shape
CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0, "Found memory size of column data type is 0.");
CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0,
"[Internal ERROR] Found memory size of column data type is 0.");
auto num_elements = n_bytes / column_data_type_size;
if (type == DataType::DE_STRING) {
std::string s{data, data + n_bytes};

View File

@ -128,7 +128,7 @@ class MindRecordOp : public MappableLeafOp {
const mindrecord::json &columns_json, const mindrecord::TaskType task_type);
Status LoadTensorRow(row_id_type row_id, TensorRow *row) override {
return Status(StatusCode::kMDSyntaxError, "Cannot call this method.");
return Status(StatusCode::kMDSyntaxError, "[Internal ERROR] Cannot call this method.");
}
// Private function for computing the assignment of the column name map.
// @return - Status

View File

@ -69,7 +69,7 @@ void MnistOp::Print(std::ostream &out, bool show_all) const {
Status MnistOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const {
if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) {
if (image_label_pairs_.empty()) {
RETURN_STATUS_UNEXPECTED("Invalid data, no image found in " + DatasetName() + " file.");
RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, image data is missing.");
} else {
RETURN_STATUS_UNEXPECTED(
"[Internal ERROR] Map for containing image-index pair is nullptr or has been set in other place,"
@ -89,7 +89,7 @@ Status MnistOp::ReadFromReader(std::ifstream *reader, uint32_t *result) {
uint32_t res = 0;
reader->read(reinterpret_cast<char *>(&res), 4);
CHECK_FAIL_RETURN_UNEXPECTED(!reader->fail(),
"Invalid data, failed to read 4 bytes from " + DatasetName() + " file.");
"Invalid file, failed to read 4 bytes from " + DatasetName() + " file.");
*result = SwapEndian(res);
return Status::OK();
}
@ -100,17 +100,22 @@ uint32_t MnistOp::SwapEndian(uint32_t val) const {
}
Status MnistOp::CheckImage(const std::string &file_name, std::ifstream *image_reader, uint32_t *num_images) {
CHECK_FAIL_RETURN_UNEXPECTED(image_reader->is_open(),
"Invalid file, failed to open " + DatasetName() + " image file: " + file_name);
CHECK_FAIL_RETURN_UNEXPECTED(image_reader->is_open(), "Invalid " + DatasetName() + " file, failed to open " +
file_name + " : the file is damaged or permission denied.");
int64_t image_len = image_reader->seekg(0, std::ios::end).tellg();
(void)image_reader->seekg(0, std::ios::beg);
// The first 16 bytes of the image file are type, number, row and column
CHECK_FAIL_RETURN_UNEXPECTED(image_len >= 16, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name);
CHECK_FAIL_RETURN_UNEXPECTED(image_len >= 16,
"Invalid " + DatasetName() + " file, the first data length of " + file_name +
" should be 16 bytes(contains type, number, row and column), but got " +
std::to_string(image_len) + ".");
uint32_t magic_number;
RETURN_IF_NOT_OK(ReadFromReader(image_reader, &magic_number));
CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kMnistImageFileMagicNumber,
"Invalid file, this is not the " + DatasetName() + " image file: " + file_name);
"Invalid " + DatasetName() + " file, the image number of " + file_name + " should be " +
std::to_string(kMnistImageFileMagicNumber) + ", but got " +
std::to_string(magic_number));
uint32_t num_items;
RETURN_IF_NOT_OK(ReadFromReader(image_reader, &num_items));
@ -120,28 +125,38 @@ Status MnistOp::CheckImage(const std::string &file_name, std::ifstream *image_re
RETURN_IF_NOT_OK(ReadFromReader(image_reader, &cols));
// The image size of the Mnist dataset is fixed at [28,28]
CHECK_FAIL_RETURN_UNEXPECTED((rows == kMnistImageRows) && (cols == kMnistImageCols),
"Invalid data, shape of image is not equal to (28, 28).");
"Invalid " + DatasetName() + " file, shape of image in " + file_name +
" should be (28, 28), but got (" + std::to_string(rows) + ", " + std::to_string(cols) +
").");
CHECK_FAIL_RETURN_UNEXPECTED((image_len - 16) == num_items * rows * cols,
"Invalid data, got truncated data len: " + std::to_string(image_len - 16) +
", which is not equal to real data len: " + std::to_string(num_items * rows * cols));
"Invalid " + DatasetName() + " file, truncated data length of " + file_name +
" should be " + std::to_string(image_len - 16) + ", but got " +
std::to_string(num_items * rows * cols));
*num_images = num_items;
return Status::OK();
}
Status MnistOp::CheckLabel(const std::string &file_name, std::ifstream *label_reader, uint32_t *num_labels) {
CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(),
"Invalid file, failed to open " + DatasetName() + " label file: " + file_name);
CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(), "Invalid " + DatasetName() + " file, failed to open " +
file_name + " : the file is damaged or permission denied!");
int64_t label_len = label_reader->seekg(0, std::ios::end).tellg();
(void)label_reader->seekg(0, std::ios::beg);
// The first 8 bytes of the image file are type and number
CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 8, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name);
CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 8, "Invalid " + DatasetName() + " file, the first data length of " +
file_name + " should be 8 bytes(contains type and number), but got " +
std::to_string(label_len) + ".");
uint32_t magic_number;
RETURN_IF_NOT_OK(ReadFromReader(label_reader, &magic_number));
CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kMnistLabelFileMagicNumber,
"Invalid file, this is not the " + DatasetName() + " label file: " + file_name);
"Invalid " + DatasetName() + " file, the number of labels in " + file_name +
" should be " + std::to_string(kMnistLabelFileMagicNumber) + ", but got " +
std::to_string(magic_number) + ".");
uint32_t num_items;
RETURN_IF_NOT_OK(ReadFromReader(label_reader, &num_items));
CHECK_FAIL_RETURN_UNEXPECTED((label_len - 8) == num_items, "Invalid data, number of labels is wrong.");
CHECK_FAIL_RETURN_UNEXPECTED((label_len - 8) == num_items, "Invalid " + DatasetName() +
" file, the data length of labels in " + file_name +
" should be " + std::to_string(label_len - 8) +
", but got " + std::to_string(num_items) + ".");
*num_labels = num_items;
return Status::OK();
}
@ -151,7 +166,10 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la
RETURN_IF_NOT_OK(CheckImage(image_names_[index], image_reader, &num_images));
RETURN_IF_NOT_OK(CheckLabel(label_names_[index], label_reader, &num_labels));
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
"Invalid data, num_images is not equal to num_labels. Ensure data file is not damaged.");
"Invalid " + DatasetName() + " file, the images number of " + image_names_[index] +
" should be equal to the labels number of " + label_names_[index] +
", but got images number: " + std::to_string(num_images) +
", labels number: " + std::to_string(num_labels) + ".");
// The image size of the Mnist dataset is fixed at [28,28]
int64_t size = kMnistImageRows * kMnistImageCols;
auto images_buf = std::make_unique<char[]>(size * num_images);
@ -163,13 +181,13 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la
}
(void)image_reader->read(images_buf.get(), size * num_images);
if (image_reader->fail()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " image: " + image_names_[index] +
", size:" + std::to_string(size * num_images) + ". Ensure data file is not damaged.");
RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, failed to read " + image_names_[index] +
" : the file is damaged or permission denied!");
}
(void)label_reader->read(labels_buf.get(), num_images);
if (label_reader->fail()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " label:" + label_names_[index] +
", size: " + std::to_string(num_images) + ". Ensure data file is not damaged.");
RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, failed to read " + label_names_[index] +
" : the file is damaged or the file content is incomplete.");
}
TensorShape img_tensor_shape = TensorShape({kMnistImageRows, kMnistImageCols, 1});
for (int64_t j = 0; j != num_images; ++j) {
@ -244,8 +262,10 @@ Status MnistOp::WalkAllFiles() {
std::sort(image_names_.begin(), image_names_.end());
std::sort(label_names_.begin(), label_names_.end());
CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(),
"Invalid data, num of images is not equal to num of labels.");
CHECK_FAIL_RETURN_UNEXPECTED(
image_names_.size() == label_names_.size(),
"Invalid " + DatasetName() + " file, num of images should be equal to num of labels, but got num of images: " +
std::to_string(image_names_.size()) + ", num of labels: " + std::to_string(label_names_.size()) + ".");
return Status::OK();
}
@ -279,7 +299,9 @@ Status MnistOp::CountTotalRows(const std::string &dir, const std::string &usage,
uint32_t num_labels;
RETURN_IF_NOT_OK(op->CheckLabel(op->label_names_[i], &label_reader, &num_labels));
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
"Invalid data, num of images is not equal to num of labels.");
"Invalid " + op->DatasetName() +
" file, num of images should be equal to num of labels, but got num of images: " +
std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + ".");
*count = *count + num_images;
// Close the readers

View File

@ -215,7 +215,8 @@ bool NonMappableLeafOp::NeedPushFileToBlockQueue(const std::string &file_name, i
bool push = false;
int64_t start_index = device_id_ * num_rows_per_shard_;
if (device_id_ + 1 < 0) {
MS_LOG(ERROR) << "Device id is invalid, got " + std::to_string(device_id_);
MS_LOG(ERROR) << "Invalid device id, device id should be greater than or equal 0, but got "
<< std::to_string(device_id_);
return false;
}

View File

@ -141,7 +141,8 @@ Status PhotoTourOp::GetFileContent(const std::string &info_file, std::string *an
RETURN_UNEXPECTED_IF_NULL(ans);
std::ifstream reader;
reader.open(info_file);
CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open PhotoTour info file: " + info_file);
CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open " + info_file +
": PhotoTour info file is damaged or permission denied.");
(void)reader.seekg(0, std::ios::end);
std::size_t size = reader.tellg();
(void)reader.seekg(0, std::ios::beg);
@ -183,7 +184,9 @@ Status PhotoTourOp::ReadInfoFile(const std::string &data_dir, const std::string
switch (col_idx) {
case ID_3DPOINT: {
std::string item = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour info file failed: " + info_file_path);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
"Invalid data, reading PhotoTour info file failed: " + info_file_path +
" at line: " + std::to_string(pos) + ", the content should not be empty.");
int id_3dpoint = std::atoi(item.c_str());
labels_.push_back(id_3dpoint);
col_idx = UNKNOWN;
@ -191,7 +194,9 @@ Status PhotoTourOp::ReadInfoFile(const std::string &data_dir, const std::string
}
case UNKNOWN: {
std::string item2 = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(), "Reading PhotoTour info file failed: " + info_file_path);
CHECK_FAIL_RETURN_UNEXPECTED(
!item2.empty(), "Invalid data, Reading PhotoTour info file failed: " + info_file_path +
" at line: " + std::to_string(pos) + ", the content in file should not be empty.");
col_idx = ID_3DPOINT;
break;
}
@ -225,34 +230,44 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri
switch (col_idx) {
case PATCH_ID1: {
std::string item = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
"Invalid dataReading PhotoTour matched file failed: " + info_file_path +
" at line: " + std::to_string(pos) + ", the content should not be empty.");
patch_id1 = std::atoi(item.c_str());
col_idx = LABEL1;
break;
}
case LABEL1: {
std::string item = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
" at line: " + std::to_string(pos) + ", the content should not be empty.");
label1 = std::atoi(item.c_str());
col_idx = UNUSED1;
break;
}
case UNUSED1: {
std::string item = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
" at line: " + std::to_string(pos) + ", the content should not be empty.");
col_idx = PATCH_ID2;
break;
}
case PATCH_ID2: {
std::string item = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
" at line: " + std::to_string(pos) + ", the content should not be empty.");
patch_id2 = std::atoi(item.c_str());
col_idx = LABEL2;
break;
}
case LABEL2: {
std::string item = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
" at line: " + std::to_string(pos) + ", the content should not be empty.");
label2 = std::atoi(item.c_str());
col_idx = UNUSED2;
matches_.push_back(std::make_tuple(patch_id1, patch_id2, uint32_t(label1 == label2)));
@ -260,13 +275,17 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri
}
case UNUSED2: {
std::string item = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
" at line: " + std::to_string(pos) + ", the content should not be empty.");
col_idx = UNUSED3;
break;
}
case UNUSED3: {
std::string item2 = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(),
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
" at line: " + std::to_string(pos) + ", the content should not be empty.");
col_idx = PATCH_ID1;
break;
}
@ -281,8 +300,9 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri
Status PhotoTourOp::GetPhotoTourDataTensor(uint32_t index, std::shared_ptr<Tensor> *image_tensor) {
RETURN_UNEXPECTED_IF_NULL(image_tensor);
CHECK_FAIL_RETURN_UNEXPECTED(index < kLens.at(name_),
"Index exceeds the maximum count of image, got: " + std::to_string(index));
CHECK_FAIL_RETURN_UNEXPECTED(
index < kLens.at(name_),
"[Internal ERROR] Index exceeds the maximum count of image, got: " + std::to_string(index));
int image_id = index / (kPatchNumPerRow * kPatchNumPerCol);
int row_in_image = (index % (kPatchNumPerRow * kPatchNumPerCol)) / kPatchNumPerRow;
@ -320,7 +340,7 @@ Status PhotoTourOp::PrepareData() {
chosen_dataset_folder_path_ = (Path(dataset_dir_) / Path(name_)).ToString();
train_ = kTrain.at(usage_);
auto real_folder_path = FileUtils::GetRealPath(chosen_dataset_folder_path_.data());
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Get real path failed: " + chosen_dataset_folder_path_);
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), chosen_dataset_folder_path_ + " does not exist.");
std::vector<cv::String> file_names;
cv::glob(real_folder_path.value(), file_names);

View File

@ -107,7 +107,8 @@ Status Places365Op::GetFileContent(const std::string &info_file, std::string *an
RETURN_UNEXPECTED_IF_NULL(ans);
std::ifstream reader;
reader.open(info_file);
CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open Places365 file: " + info_file);
CHECK_FAIL_RETURN_UNEXPECTED(
!reader.fail(), "Invalid file, failed to open " + info_file + ": Places365 file is damaged or permission denied.");
reader.seekg(0, std::ios::end);
std::size_t size = reader.tellg();
reader.seekg(0, std::ios::beg);
@ -153,21 +154,21 @@ Status Places365Op::LoadCategories(const std::string &category_meta_name) {
while ((pos = s.find(" ")) != std::string::npos) {
switch (col_idx) {
case CATEGORY: {
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(),
"Reading places365 category file failed: " + category_meta_name);
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " +
category_meta_name + ", space characters not found.");
category = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!category.empty(),
"Reading places365 category file failed: " + category_meta_name);
CHECK_FAIL_RETURN_UNEXPECTED(!category.empty(), "Invalid data, Reading places365 category file failed: " +
category_meta_name + ", space characters not found.");
// switch the type of substring.
col_idx = LABEL;
break;
}
case LABEL: {
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(),
"Reading places365 category file failed: " + category_meta_name);
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " +
category_meta_name + ", space characters not found.");
std::string label_item = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!label_item.empty(),
"Reading places365 category file failed: " + category_meta_name);
CHECK_FAIL_RETURN_UNEXPECTED(!label_item.empty(), "Invalid data, Reading places365 category file failed: " +
category_meta_name + ", space characters not found.");
label = std::atoi(label_item.c_str());
// switch the type of substring.
col_idx = CATEGORY;
@ -204,19 +205,21 @@ Status Places365Op::LoadFileLists(const std::string &filelists_meta_name) {
while ((pos = s.find(" ")) != std::string::npos) {
switch (col_idx) {
case PATH: {
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(),
"Reading places365 category file failed: " + filelists_meta_name);
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " +
filelists_meta_name + ", space characters not found.");
path = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!path.empty(), "Reading places365 filelist file failed: " + filelists_meta_name);
CHECK_FAIL_RETURN_UNEXPECTED(!path.empty(), "Invalid data, Reading places365 filelist file failed: " +
filelists_meta_name + ", space characters not found.");
// switch the type of substring.
col_idx = LABEL;
break;
}
case LABEL: {
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(),
"Reading places365 category file failed: " + filelists_meta_name);
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " +
filelists_meta_name + ", space characters not found.");
std::string item = get_splited_str(pos);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading places365 filelist file failed: " + filelists_meta_name);
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Invalid data, Reading places365 filelist file failed: " +
filelists_meta_name + ", space characters not found.");
label = std::atoi(item.c_str());
// switch the type of substring.
col_idx = PATH;
@ -233,13 +236,15 @@ Status Places365Op::LoadFileLists(const std::string &filelists_meta_name) {
Status Places365Op::GetPlaces365DataTensor(uint32_t index, std::shared_ptr<Tensor> *image_tensor) {
std::string file_path = image_path_label_pairs_[index].first;
CHECK_FAIL_RETURN_UNEXPECTED(Path(file_path).Exists(), file_path + " File not exists.");
CHECK_FAIL_RETURN_UNEXPECTED(Path(file_path).Exists(),
"Invalid file path, Places365 image: " + file_path + " does not exists.");
RETURN_IF_NOT_OK(Tensor::CreateFromFile(file_path, image_tensor));
if (decode_) {
Status rc = Decode(*image_tensor, image_tensor);
if (rc.IsError()) {
*image_tensor = nullptr;
std::string err_msg = "Invalid data, failed to decode image: " + file_path;
std::string err_msg =
"Invalid image, failed to decode " + file_path + ": the image is damaged or permission denied.";
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
}
}
@ -249,14 +254,15 @@ Status Places365Op::GetPlaces365DataTensor(uint32_t index, std::shared_ptr<Tenso
Status Places365Op::PrepareData() {
auto real_folder_path = FileUtils::GetRealPath(root_.data());
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Get real path failed: " + root_);
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Invalid file path, " + root_ + " does not exist.");
RETURN_IF_NOT_OK(LoadCategories((Path(real_folder_path.value()) / Path(kCategoriesMeta)).ToString()));
RETURN_IF_NOT_OK(LoadFileLists((Path(real_folder_path.value()) / Path(kFileListMeta.at(usage_))).ToString()));
num_rows_ = image_path_label_pairs_.size();
CHECK_FAIL_RETURN_UNEXPECTED(
num_rows_ > 0,
"Invalid data, no valid data matching the dataset API Places365Dataset. Please check file path or dataset API.");
"Invalid data, no valid data matching the dataset API Places365Dataset. Please check dataset API or file path: " +
root_ + ".");
return Status::OK();
}
@ -281,7 +287,7 @@ Status Places365Op::CountTotalRows(const std::string &dir, const std::string &us
for (size_t i = 0; i < op->image_path_label_pairs_.size(); ++i) {
CHECK_FAIL_RETURN_UNEXPECTED(Path(op->image_path_label_pairs_[i].first).Exists(),
op->image_path_label_pairs_[i].first + " File not exists.");
"Invalid file path, " + op->image_path_label_pairs_[i].first + " does not exists.");
}
*count = op->image_path_label_pairs_.size();
return Status::OK();

View File

@ -111,7 +111,9 @@ Status QMnistOp::CountTotalRows(const std::string &dir, const std::string &usage
uint32_t num_labels;
RETURN_IF_NOT_OK(op->CheckLabel(op->label_names_[i], &label_reader, &num_labels));
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
"Invalid data, num of images is not equal to num of labels.");
"Invalid data, num of images should be equal to num of labels loading from " + dir +
", but got num of images: " + std::to_string(num_images) +
", num of labels: " + std::to_string(num_labels) + ".");
if (usage == "test10k") {
// only use the first 10k samples and drop the last 50k samples
@ -141,7 +143,8 @@ Status QMnistOp::WalkAllFiles() {
const std::string nist_prefix = "xnist";
auto real_folder_path = FileUtils::GetRealPath(folder_path_.data());
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Get real path failed: " + folder_path_);
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(),
"Invalid QMnist folder, " + folder_path_ + " does not exist or permission denied!");
Path root_dir(real_folder_path.value());
if (usage_ == "train") {
@ -162,20 +165,25 @@ Status QMnistOp::WalkAllFiles() {
label_names_.push_back((root_dir / Path(nist_prefix + "-" + label_ext)).ToString());
}
CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(),
"Invalid data, num of images is not equal to num of labels.");
CHECK_FAIL_RETURN_UNEXPECTED(
image_names_.size() == label_names_.size(),
"Invalid data, num of Qmnist image files should be equal to num of Qmnist label files under directory:" +
folder_path_ + ", but got num of image files: " + std::to_string(image_names_.size()) +
", num of label files: " + std::to_string(label_names_.size()) + ".");
for (size_t i = 0; i < image_names_.size(); i++) {
Path file_path(image_names_[i]);
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
"Failed to find " + DatasetName() + " image file: " + file_path.ToString());
CHECK_FAIL_RETURN_UNEXPECTED(
file_path.Exists() && !file_path.IsDirectory(),
"Invalid file path, Qmnist data file: " + file_path.ToString() + " does not exist or is a directory.");
MS_LOG(INFO) << DatasetName(true) << " operator found image file at " << file_path.ToString() << ".";
}
for (size_t i = 0; i < label_names_.size(); i++) {
Path file_path(label_names_[i]);
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
"Failed to find " + DatasetName() + " label file: " + file_path.ToString());
CHECK_FAIL_RETURN_UNEXPECTED(
file_path.Exists() && !file_path.IsDirectory(),
"Invalid file path, Qmnist data file: " + file_path.ToString() + " does not exist or is a directory.");
MS_LOG(INFO) << DatasetName(true) << " operator found label file at " << file_path.ToString() << ".";
}
@ -189,7 +197,9 @@ Status QMnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *l
RETURN_IF_NOT_OK(CheckImage(image_names_[index], image_reader, &num_images));
RETURN_IF_NOT_OK(CheckLabel(label_names_[index], label_reader, &num_labels));
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
"Invalid data, num_images is not equal to num_labels. Ensure data file is not damaged.");
"Invalid data, num of images should be equal to num of labels loading from " +
folder_path_ + ", but got num of images: " + std::to_string(num_images) +
", num of labels: " + std::to_string(num_labels) + ".");
// The image size of the QMNIST dataset is fixed at [28,28]
int64_t image_size = kQMnistImageRows * kQMnistImageCols;
@ -216,16 +226,16 @@ Status QMnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *l
}
(void)image_reader->read(images_buf.get(), image_size * num_images);
if (image_reader->fail()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " image: " + image_names_[index] +
", size:" + std::to_string(image_size * num_images) +
". Ensure data file is not damaged.");
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + std::to_string(image_size * num_images) +
" bytes from " + image_names_[index] +
": the data file is damaged or the content is incomplete.");
}
// uint32_t use 4 bytes in memory
(void)label_reader->read(reinterpret_cast<char *>(labels_buf.get()), label_length * num_labels * 4);
if (label_reader->fail()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " label:" + label_names_[index] +
", size: " + std::to_string(label_length * num_labels) +
". Ensure data file is not damaged.");
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + std::to_string(label_length * num_labels * 4) +
" bytes from " + label_names_[index] +
": the data file is damaged or content is incomplete.");
}
TensorShape image_tensor_shape = TensorShape({kQMnistImageRows, kQMnistImageCols, 1});
TensorShape label_tensor_shape = TensorShape({kQMnistLabelLength});
@ -258,23 +268,32 @@ Status QMnistOp::CheckLabel(const std::string &file_name, std::ifstream *label_r
RETURN_UNEXPECTED_IF_NULL(label_reader);
RETURN_UNEXPECTED_IF_NULL(num_labels);
CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(),
"Invalid file, failed to open " + DatasetName() + " label file: " + file_name);
"Invalid file, failed to open " + file_name + ": the label file is permission denied.");
int64_t label_len = label_reader->seekg(0, std::ios::end).tellg();
(void)label_reader->seekg(0, std::ios::beg);
// The first 12 bytes of the label file are type, number and length
CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 12, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name);
CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 12,
"Invalid file, load " + file_name +
" failed: the first 12 bytes of the label file should be type, number and length, " +
"but got the first read bytes : " + std::to_string(label_len));
uint32_t magic_number;
RETURN_IF_NOT_OK(ReadFromReader(label_reader, &magic_number));
CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kQMnistLabelFileMagicNumber,
"Invalid file, this is not the " + DatasetName() + " label file: " + file_name);
"Invalid label file, the number of labels loading from " + file_name + " should be " +
std::to_string(kQMnistLabelFileMagicNumber) + ", but got " +
std::to_string(magic_number) + ".");
uint32_t num_items;
RETURN_IF_NOT_OK(ReadFromReader(label_reader, &num_items));
uint32_t length;
RETURN_IF_NOT_OK(ReadFromReader(label_reader, &length));
CHECK_FAIL_RETURN_UNEXPECTED(length == kQMnistLabelLength, "Invalid data, length of labels is not equal to 8.");
CHECK_FAIL_RETURN_UNEXPECTED(length == kQMnistLabelLength, "Invalid data, length of every label loading from " +
file_name + " should be equal to 8, but got " +
std::to_string(length) + ".");
CHECK_FAIL_RETURN_UNEXPECTED((label_len - 12) == num_items * kQMnistLabelLength * 4,
"Invalid data, number of labels is wrong.");
"Invalid data, the total bytes of labels loading from Qmnist label file: " + file_name +
" should be " + std::to_string(label_len - 12) + ", but got " +
std::to_string(num_items * kQMnistLabelLength * 4) + ".");
*num_labels = num_items;
return Status::OK();
}

View File

@ -98,7 +98,7 @@ void RandomDataOp::GenerateSchema() {
std::make_unique<ColDescriptor>(col_name, DataType(newType), TensorImpl::kFlexible, rank, new_shape.get());
Status rc = data_schema_->AddColumn(*new_col);
if (rc.IsError()) MS_LOG(ERROR) << "Failed to generate a schema. Message:" << rc;
if (rc.IsError()) MS_LOG(ERROR) << "[Internal ERROR] Failed to generate a schema. Message:" << rc;
}
}
@ -136,7 +136,8 @@ Status RandomDataOp::CreateRandomRow(TensorRow *new_row) {
buf = std::make_unique<unsigned char[]>(size_in_bytes);
int ret_code = memset_s(buf.get(), size_in_bytes, random_byte, size_in_bytes);
if (ret_code != 0) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor.");
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"[Internal ERROR] memset_s failed to set random bytes for a tensor.");
}
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.Type(), buf.get(), &new_tensor));

View File

@ -53,7 +53,7 @@ Status DistributedSamplerRT::InitSampler() {
CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0, "Invalid parameter, num_samples must be greater than 0, but got " +
std::to_string(num_samples_) + ".\n");
CHECK_FAIL_RETURN_UNEXPECTED(
num_rows_ > 0, "Invalid parameter, num_rows must be greater than 0, but got " + std::to_string(num_rows_) + ".\n");
num_rows_ > 0, "[Internal ERROR] num_rows must be greater than 0, but got " + std::to_string(num_rows_) + ".\n");
CHECK_FAIL_RETURN_UNEXPECTED(
device_id_ < num_devices_ && device_id_ >= 0 && num_rows_ > 0 && num_samples_ > 0,
"Invalid parameter, num_shard must be greater than shard_id and greater than 0, got num_shard: " +
@ -96,7 +96,7 @@ Status DistributedSamplerRT::GetNextSample(TensorRow *out) {
RETURN_UNEXPECTED_IF_NULL(out);
if (cnt_ > samples_per_tensor_) {
RETURN_STATUS_UNEXPECTED(
"Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" +
"[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" +
std::to_string(cnt_) + ", samples_per_tensor(num_samples): " + std::to_string(samples_per_tensor_));
} else if (cnt_ == samples_per_tensor_ && (non_empty_ || !even_dist_)) {
(*out) = TensorRow(TensorRow::kFlagEOE);

View File

@ -29,7 +29,7 @@ Status MindRecordSamplerRT::GetNextSample(TensorRow *out) {
RETURN_UNEXPECTED_IF_NULL(out);
if (next_id_ > num_samples_) {
RETURN_STATUS_UNEXPECTED(
"Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " +
"[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " +
std::to_string(next_id_) + ", num_samples_: " + std::to_string(num_samples_));
} else if (next_id_ == num_samples_) {
(*out) = TensorRow(TensorRow::kFlagEOE);
@ -53,8 +53,8 @@ Status MindRecordSamplerRT::InitSampler() {
if (!sample_ids_) {
// Note, sample_ids_.empty() is okay and will just give no sample ids.
RETURN_STATUS_UNEXPECTED(
"Init Sampler failed as sample_ids is empty, here ShardReader did not provide a valid sample ids vector via"
" MindRecordSamplerRT");
"[Internal ERROR]Init Sampler failed as sample_ids is empty, here ShardReader did not provide a valid sample ids "
"vector via MindRecordSamplerRT.");
}
// Usually, the num samples is given from the user interface. In our case, that data is in mindrecord.

View File

@ -44,7 +44,7 @@ Status PKSamplerRT::InitSampler() {
// Compute that here for this case to find the total number of samples that are available to return.
// (in this case, samples per class * total classes).
if (samples_per_class_ > std::numeric_limits<int64_t>::max() / static_cast<int64_t>(labels_.size())) {
RETURN_STATUS_UNEXPECTED("Overflow in counting num_rows");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Overflow in counting num_rows");
}
num_rows_ = samples_per_class_ * static_cast<int64_t>(labels_.size());
@ -72,7 +72,7 @@ Status PKSamplerRT::GetNextSample(TensorRow *out) {
RETURN_UNEXPECTED_IF_NULL(out);
if (next_id_ > num_samples_ || num_samples_ == 0) {
RETURN_STATUS_UNEXPECTED(
"Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " +
"[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " +
std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_));
} else if (next_id_ == num_samples_) {
(*out) = TensorRow(TensorRow::kFlagEOE);

View File

@ -39,7 +39,7 @@ Status PythonSamplerRT::GetNextSample(TensorRow *out) {
{
py::gil_scoped_acquire gil_acquire;
if (Py_IsInitialized() == 0) {
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
}
try {
py::object py_ret = py_sampler_instance.attr("_get_indices")();
@ -57,7 +57,8 @@ Status PythonSamplerRT::GetNextSample(TensorRow *out) {
return Status(StatusCode::kMDPyFuncException, e.what());
} catch (const py::cast_error &e) {
return Status(StatusCode::kMDPyFuncException,
"Invalid data, python sampler iterator should return an integer index.");
"Invalid data, Python sampler iterator should return an integer index, but error raised: " +
std::string(e.what()));
}
}
(*out) = {sample_ids};
@ -71,7 +72,7 @@ Status PythonSamplerRT::InitSampler() {
return Status::OK();
}
CHECK_FAIL_RETURN_UNEXPECTED(
num_rows_ > 0, "Invalid parameter, num_rows must be greater than 0, but got " + std::to_string(num_rows_));
num_rows_ > 0, "[Internal ERROR] num_rows must be greater than 0, but got " + std::to_string(num_rows_));
// Special value of 0 for num_samples means that the user wants to sample the entire set of data.
// If the user asked to sample more rows than exists in the dataset, adjust the num_samples accordingly.
if (num_samples_ == 0 || num_samples_ > num_rows_) {
@ -80,12 +81,13 @@ Status PythonSamplerRT::InitSampler() {
{
py::gil_scoped_acquire gil_acquire;
if (Py_IsInitialized() == 0) {
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
}
try {
py_sampler_instance.attr("_handshake")(num_rows_, num_samples_);
} catch (const py::error_already_set &e) {
return Status(StatusCode::kMDPyFuncException, e.what());
return Status(StatusCode::kMDPyFuncException,
"[Internal ERROR] python sampler execute _handshake failed: " + std::string(e.what()));
}
}
@ -98,7 +100,7 @@ Status PythonSamplerRT::ResetSampler() {
need_to_reset_ = false;
py::gil_scoped_acquire gil_acquire;
if (Py_IsInitialized() == 0) {
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
}
try {
py_sampler_instance.attr("reset")();

View File

@ -36,8 +36,9 @@ RandomSamplerRT::RandomSamplerRT(bool replacement, int64_t num_samples, bool res
Status RandomSamplerRT::GetNextSample(TensorRow *out) {
RETURN_UNEXPECTED_IF_NULL(out);
if (next_id_ > num_samples_) {
RETURN_STATUS_UNEXPECTED("Sampler index must be less than or equal to num_samples(total rows in dataset), but got" +
std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_));
RETURN_STATUS_UNEXPECTED(
"[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got" +
std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_));
} else if (next_id_ == num_samples_) {
(*out) = TensorRow(TensorRow::kFlagEOE);
} else {
@ -81,7 +82,7 @@ Status RandomSamplerRT::InitSampler() {
}
CHECK_FAIL_RETURN_UNEXPECTED(
num_samples_ > 0 && num_rows_ > 0,
"Invalid parameter, num_samples and num_rows must be greater than 0, but got num_samples: " +
"[Internal ERROR] num_samples and num_rows must be greater than 0, but got num_samples: " +
std::to_string(num_samples_) + ", num_rows: " + std::to_string(num_rows_));
samples_per_tensor_ = samples_per_tensor_ > num_samples_ ? num_samples_ : samples_per_tensor_;
rnd_.seed(seed_);

View File

@ -28,7 +28,7 @@ Status RandomAccessOp::GetNumRowsInDataset(int64_t *num) const {
// Here, it is just a getter method to return the value. However, it is invalid if there is
// not a value set for this count, so generate a failure if that is the case.
if (num == nullptr || num_rows_ == -1) {
RETURN_STATUS_UNEXPECTED("Get num rows in Dataset failed, num_rows has not been set yet.");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Get num rows in Dataset failed, num_rows has not been set yet.");
}
(*num) = num_rows_;
return Status::OK();
@ -55,7 +55,7 @@ Status SamplerRT::HandshakeRandomAccessOp(const RandomAccessOp *op) {
RETURN_IF_NOT_OK(child_sampler->HandshakeRandomAccessOp(op));
}
CHECK_FAIL_RETURN_UNEXPECTED(op != nullptr, "RandomAccessOp init failed, as it is nullptr.");
CHECK_FAIL_RETURN_UNEXPECTED(op != nullptr, "[Internal ERROR] RandomAccessOp init failed, as it is nullptr.");
// If there's a child sampler, set the row count to be it's sample count
if (HasChildSampler()) {
@ -114,7 +114,7 @@ Status SamplerRT::GetAllIdsThenReset(py::array *data) {
{
py::gil_scoped_acquire gil_acquire;
if (Py_IsInitialized() == 0) {
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
}
try {
RETURN_IF_NOT_OK(sample_ids->GetDataAsNumpy(data));
@ -127,7 +127,9 @@ Status SamplerRT::GetAllIdsThenReset(py::array *data) {
#endif
Status SamplerRT::SetNumSamples(int64_t num_samples) {
CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "Invalid parameter, num_samples must be greater than or equal to 0.");
CHECK_FAIL_RETURN_UNEXPECTED(
num_samples >= 0,
"Invalid parameter, 'num_samples' must be greater than or equal to 0, but got " + std::to_string(num_samples));
num_samples_ = num_samples;
return Status::OK();
}
@ -161,13 +163,13 @@ Status SamplerRT::AddChild(std::shared_ptr<SamplerRT> child) {
// Only samplers can be added, not any other DatasetOp.
std::shared_ptr<SamplerRT> sampler = std::dynamic_pointer_cast<SamplerRT>(child);
if (!sampler) {
std::string err_msg("Cannot add child, child is not a sampler object.");
std::string err_msg("[Internal ERROR] Cannot add child, child is not a sampler object.");
RETURN_STATUS_UNEXPECTED(err_msg);
}
// Samplers can have at most 1 child.
if (!child_.empty()) {
std::string err_msg("Cannot add child sampler, this sampler already has a child.");
std::string err_msg("[Internal ERROR] Cannot add child sampler, this sampler already has a child.");
RETURN_STATUS_UNEXPECTED(err_msg);
}

View File

@ -42,7 +42,7 @@ class RandomAccessOp {
// @param std::map<int64_t, std::vector<int64_t>> * map
// @return Status The status code returned
virtual Status GetClassIds(std::map<int32_t, std::vector<int64_t>> *map) const {
RETURN_STATUS_UNEXPECTED("GetClassIds needs to be override to support PK");
RETURN_STATUS_UNEXPECTED("[Internal ERROR] GetClassIds needs to be override to support PK.");
}
// default destructor

View File

@ -29,7 +29,7 @@ Status SequentialSamplerRT::GetNextSample(TensorRow *out) {
RETURN_UNEXPECTED_IF_NULL(out);
if (id_count_ > num_samples_) {
RETURN_STATUS_UNEXPECTED(
"Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" +
"[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" +
std::to_string(id_count_) + ", num_samples_: " + std::to_string(num_samples_));
} else if (id_count_ == num_samples_) {
(*out) = TensorRow(TensorRow::kFlagEOE);

View File

@ -41,10 +41,9 @@ Status WeightedRandomSamplerRT::InitSampler() {
if (num_samples_ == 0 || num_samples_ > num_rows_) {
num_samples_ = num_rows_;
}
CHECK_FAIL_RETURN_UNEXPECTED(
num_rows_ > 0 && num_samples_,
"Invalid parameter, num_samples and num_rows must be greater than 0, but got num_rows: " +
std::to_string(num_rows_) + ", num_samples: " + std::to_string(num_samples_));
CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0 && num_samples_,
"[Internal ERROR] num_samples and num_rows must be greater than 0, but got num_rows: " +
std::to_string(num_rows_) + ", num_samples: " + std::to_string(num_samples_));
CHECK_FAIL_RETURN_UNEXPECTED(samples_per_tensor_ > 0,
"Invalid parameter, samples_per_tensor(num_samples) must be greater than 0, but got " +
std::to_string(samples_per_tensor_) + ".\n");
@ -160,8 +159,9 @@ Status WeightedRandomSamplerRT::GetNextSample(TensorRow *out) {
}
if (genId >= num_rows_) {
RETURN_STATUS_UNEXPECTED("Generated indice is out of bound, expect range [0, num_data-1], got indice: " +
std::to_string(genId) + ", num_data: " + std::to_string(num_rows_ - 1));
RETURN_STATUS_UNEXPECTED(
"[Internal ERROR] Generated indice is out of bound, expect range [0, num_data-1], got indice: " +
std::to_string(genId) + ", num_data: " + std::to_string(num_rows_ - 1));
}
if (HasChildSampler()) {

View File

@ -76,7 +76,8 @@ Status SBUOp::ReadImageToTensor(const std::string &path, std::shared_ptr<Tensor>
if (decode_ == true) {
Status rc = Decode(*tensor, tensor);
if (rc.IsError()) {
RETURN_STATUS_UNEXPECTED("Invalid data, failed to decode image: " + path);
RETURN_STATUS_UNEXPECTED("Invalid image, failed to decode image:" + path +
", the image is damaged or permission denied.");
}
}
return Status::OK();
@ -129,18 +130,21 @@ Status SBUOp::PrepareData() {
Path root_dir(real_folder_path.value());
url_path_ = root_dir / url_file_name;
CHECK_FAIL_RETURN_UNEXPECTED(url_path_.Exists() && !url_path_.IsDirectory(),
"Invalid file, failed to find SBU url file: " + url_path_.ToString());
CHECK_FAIL_RETURN_UNEXPECTED(
url_path_.Exists() && !url_path_.IsDirectory(),
"Invalid file, SBU url file: " + url_path_.ToString() + " does not exist or is a directory.");
MS_LOG(INFO) << "SBU operator found url file " << url_path_.ToString() << ".";
caption_path_ = root_dir / caption_file_name;
CHECK_FAIL_RETURN_UNEXPECTED(caption_path_.Exists() && !caption_path_.IsDirectory(),
"Invalid file, failed to find SBU caption file: " + caption_path_.ToString());
CHECK_FAIL_RETURN_UNEXPECTED(
caption_path_.Exists() && !caption_path_.IsDirectory(),
"Invalid file, SBU caption file: " + caption_path_.ToString() + " does not exist or is a directory.");
MS_LOG(INFO) << "SBU operator found caption file " << caption_path_.ToString() << ".";
image_folder_ = root_dir / image_folder_name;
CHECK_FAIL_RETURN_UNEXPECTED(image_folder_.Exists() && image_folder_.IsDirectory(),
"Invalid folder, failed to find SBU image folder: " + image_folder_.ToString());
CHECK_FAIL_RETURN_UNEXPECTED(
image_folder_.Exists() && image_folder_.IsDirectory(),
"Invalid folder, SBU image folder:" + image_folder_.ToString() + " does not exist or is not a directory.");
MS_LOG(INFO) << "SBU operator found image folder " << image_folder_.ToString() << ".";
std::ifstream url_file_reader;
@ -149,10 +153,11 @@ Status SBUOp::PrepareData() {
url_file_reader.open(url_path_.ToString(), std::ios::in);
caption_file_reader.open(caption_path_.ToString(), std::ios::in);
CHECK_FAIL_RETURN_UNEXPECTED(url_file_reader.is_open(),
"Invalid file, failed to open SBU url file: " + url_path_.ToString());
CHECK_FAIL_RETURN_UNEXPECTED(caption_file_reader.is_open(),
"Invalid file, failed to open SBU caption file: " + caption_path_.ToString());
CHECK_FAIL_RETURN_UNEXPECTED(url_file_reader.is_open(), "Invalid file, failed to open " + url_path_.ToString() +
": the SBU url file is permission denied.");
CHECK_FAIL_RETURN_UNEXPECTED(
caption_file_reader.is_open(),
"Invalid file, failed to open " + caption_path_.ToString() + ": the SBU caption file is permission denied.");
Status rc = GetAvailablePairs(url_file_reader, caption_file_reader);
url_file_reader.close();
@ -172,8 +177,8 @@ Status SBUOp::GetAvailablePairs(std::ifstream &url_file_reader, std::ifstream &c
while (std::getline(url_file_reader, url_line) && std::getline(caption_file_reader, caption_line)) {
CHECK_FAIL_RETURN_UNEXPECTED(
(url_line.empty() && caption_line.empty()) || (!url_line.empty() && !caption_line.empty()),
"Invalid data, SBU url and caption file are mismatched: " + url_path_.ToString() + " and " +
caption_path_.ToString());
"Invalid data, SBU url: " + url_path_.ToString() + " and caption file: " + caption_path_.ToString() +
" load empty data at line: " + std::to_string(line_num) + ".");
if (!url_line.empty() && !caption_line.empty()) {
line_num++;
RETURN_IF_NOT_OK(this->ParsePair(url_line, caption_line));
@ -182,7 +187,8 @@ Status SBUOp::GetAvailablePairs(std::ifstream &url_file_reader, std::ifstream &c
image_caption_pairs_.shrink_to_fit();
CHECK_FAIL_RETURN_UNEXPECTED(image_caption_pairs_.size() > 0, "No valid images in " + image_folder_.ToString());
CHECK_FAIL_RETURN_UNEXPECTED(image_caption_pairs_.size() > 0,
"Invalid data, no valid images in " + image_folder_.ToString() + ", check SBU dataset.");
// base field of RandomAccessOp
num_rows_ = image_caption_pairs_.size();

View File

@ -80,13 +80,14 @@ Status TextFileOp::LoadTensor(const std::string &line, TensorRow *out_row) {
Status TextFileOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) {
auto realpath = FileUtils::GetRealPath(file.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " get real path failed, path=" << file;
RETURN_STATUS_UNEXPECTED("Invalid file, " + DatasetName() + " get real path failed, path=" + file);
MS_LOG(ERROR) << "Invalid file path, " << file << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, " + file + " does not exist.");
}
std::ifstream handle(realpath.value());
if (!handle.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + file);
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open text:" + file +
", the file is damaged or permission denied.");
}
int64_t rows_total = 0;
@ -170,13 +171,13 @@ Status TextFileOp::FillIOBlockQueue(const std::vector<int64_t> &i_keys) {
int64_t CountTotalRows(const std::string &file) {
auto realpath = FileUtils::GetRealPath(file.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file;
MS_LOG(ERROR) << "Invalid file, " << file << " does not exist.";
return 0;
}
std::ifstream handle(realpath.value());
if (!handle.is_open()) {
MS_LOG(ERROR) << "Invalid file, failed to open file: " << file;
MS_LOG(ERROR) << "Invalid file, failed to open text file:" << file << ", the file is damaged or permission denied.";
return 0;
}

View File

@ -44,7 +44,7 @@ const int64_t kTFRecordFileLimit = 0x140000000;
bool TFReaderOp::ValidateFirstRowCrc(const std::string &filename) {
auto realpath = FileUtils::GetRealPath(filename.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filename;
MS_LOG(ERROR) << "Invalid file path, " << filename << " does not exist.";
return false;
}
@ -126,7 +126,7 @@ Status TFReaderOp::Init() {
}
if (total_rows_ < 0) {
RETURN_STATUS_UNEXPECTED(
"Invalid parameter, num_samples or num_rows for TFRecordDataset must be greater than 0, but got: " +
"[Internal ERROR] num_samples or num_rows for TFRecordDataset must be greater than 0, but got: " +
std::to_string(total_rows_));
}
@ -267,14 +267,14 @@ Status TFReaderOp::FillIOBlockNoShuffle() {
Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, int64_t end_offset, int32_t worker_id) {
auto realpath = FileUtils::GetRealPath(filename.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filename;
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + filename);
MS_LOG(ERROR) << "Invalid file path, " << filename << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, " + filename + " does not exist.");
}
std::ifstream reader;
reader.open(realpath.value());
if (!reader) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + filename);
RETURN_STATUS_UNEXPECTED("Invalid file, " + filename + " open failed: permission denied!");
}
int64_t rows_read = 0;
@ -304,7 +304,7 @@ Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, i
if (start_offset == kInvalidOffset || (rows_total >= start_offset && rows_total < end_offset)) {
dataengine::Example tf_file;
if (!tf_file.ParseFromString(serialized_example)) {
std::string errMsg = "Invalid file, failed to parse tfrecord file : " + filename;
std::string errMsg = "Failed to parse tfrecord file: " + filename + ", make sure protobuf version is suitable.";
MS_LOG(DEBUG) << errMsg + ", details of string: " << serialized_example;
RETURN_STATUS_UNEXPECTED(errMsg);
}
@ -333,7 +333,8 @@ Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, TensorRow *ou
const google::protobuf::Map<std::string, dataengine::Feature> &feature_map = example_features.feature();
auto iter_column = feature_map.find(current_col.Name());
if (iter_column == feature_map.end()) {
RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.Name() + " does not exist.");
RETURN_STATUS_UNEXPECTED("Invalid columns_list, column name: " + current_col.Name() +
" does not exist in tfrecord file, check tfrecord files.");
}
const dataengine::Feature &column_values_list = iter_column->second;
RETURN_IF_NOT_OK(LoadFeature(out_row, column_values_list, current_col, col));
@ -383,11 +384,13 @@ Status TFReaderOp::LoadFeature(TensorRow *tensor_row, const dataengine::Feature
break;
}
case dataengine::Feature::KindCase::KIND_NOT_SET: {
std::string err_msg = "Invalid data, column type in tf record file must be uint8, int64 or float32.";
std::string err_msg =
"Unrecognized datatype, column type in tfrecord file must be uint8, int64 or float32, check tfrecord file.";
RETURN_STATUS_UNEXPECTED(err_msg);
}
default: {
std::string err_msg = "Invalid data, column type in tf record file must be uint8, int64 or float32.";
std::string err_msg =
"Unrecognized datatype, column type in tfrecord file must be uint8, int64 or float32, check tfrecord file.";
RETURN_STATUS_UNEXPECTED(err_msg);
}
}
@ -404,8 +407,8 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
// Must be single byte type for each element!
if (current_col.Type() != DataType::DE_UINT8 && current_col.Type() != DataType::DE_INT8 &&
current_col.Type() != DataType::DE_STRING) {
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
", data type should be int8, uint8 or string, but got " + current_col.Type().ToString();
std::string err_msg = "Invalid column type, the column type of " + current_col.Name() +
" should be int8, uint8 or string, but got " + current_col.Type().ToString();
RETURN_STATUS_UNEXPECTED(err_msg);
}
@ -439,7 +442,8 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
for (int i = 1; i < cur_shape.Size(); ++i) {
if (cur_shape[i] == TensorShape::kDimUnknown) {
std::string err_msg =
"Invalid data, more than one unknown dimension in the shape of column: " + current_col.Name();
"Invalid data dimension, only one dimension shape supported is -1, but the 0th and the" +
std::to_string(i) + "th dimension shape of " + current_col.Name() + " are both -1.";
RETURN_STATUS_UNEXPECTED(err_msg);
}
new_pad_size *= cur_shape[i];
@ -447,10 +451,10 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
pad_size = new_pad_size;
} else {
if (cur_shape.known() && cur_shape.NumOfElements() != max_size) {
std::string err_msg = "Invalid data, shape in schema's column '" + current_col.Name() + "' is incorrect." +
"\nshape received: " + cur_shape.ToString() +
"\ntotal elements in shape received: " + std::to_string(cur_shape.NumOfElements()) +
"\nexpected total elements in shape: " + std::to_string(max_size);
std::string err_msg = "Data dimensions of '" + current_col.Name() +
"' do not match, the expected total elements of shape " + cur_shape.ToString() +
" should be " + std::to_string(max_size) + ", but got " +
std::to_string(cur_shape.NumOfElements());
RETURN_STATUS_UNEXPECTED(err_msg);
}
}
@ -469,8 +473,8 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor &current_col, const dataeng
// KFloatList can only map to DE types:
// DE_FLOAT32
if (current_col.Type() != DataType::DE_FLOAT32) {
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
", data type should be string, but got " + current_col.Type().ToString();
std::string err_msg = "Invalid column type, the column type of " + current_col.Name() +
" should be string, but got " + current_col.Type().ToString();
RETURN_STATUS_UNEXPECTED(err_msg);
}
@ -507,9 +511,9 @@ Status TFReaderOp::LoadIntListSwitch(const ColDescriptor &current_col, const dat
} else if (current_col.Type() == DataType::DE_INT8) {
RETURN_IF_NOT_OK(LoadIntList<int8_t>(current_col, column_values_list, num_elements, tensor));
} else {
std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.Name() +
", data type should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8" +
", but got " + current_col.Type().ToString();
std::string err_msg = "Invalid column type, the column type of " + current_col.Name() +
" should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8, but got " +
current_col.Type().ToString();
RETURN_STATUS_UNEXPECTED(err_msg);
}
@ -522,8 +526,8 @@ template <typename T>
Status TFReaderOp::LoadIntList(const ColDescriptor &current_col, const dataengine::Feature &column_values_list,
int32_t *num_elements, std::shared_ptr<Tensor> *tensor) {
if (!(current_col.Type().IsInt())) {
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
", data type should be int, but got " + current_col.Type().ToString();
std::string err_msg = "Invalid column type, the column type of " + current_col.Name() + " should be int, but got " +
current_col.Type().ToString();
RETURN_STATUS_UNEXPECTED(err_msg);
}
@ -551,8 +555,8 @@ Status TFReaderOp::LoadIntList(const ColDescriptor &current_col, const dataengin
Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::string> columns_to_load) {
auto realpath = FileUtils::GetRealPath(tf_file.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << tf_file;
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + tf_file);
MS_LOG(ERROR) << "Invalid file path, " << tf_file << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, " + tf_file + " does not exist.");
}
std::ifstream reader;
@ -572,7 +576,8 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::stri
dataengine::Example example;
if (!example.ParseFromString(serialized_example)) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse tfrecord file: " + serialized_example);
RETURN_STATUS_UNEXPECTED("Failed to parse tfrecord file: " + realpath.value() +
", fields that failed to parse: " + serialized_example);
}
const dataengine::Features &example_features = example.features();
@ -587,7 +592,7 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::stri
for (const auto &curr_col_name : columns_to_load) {
auto it = feature_map.find(curr_col_name);
if (it == feature_map.end()) {
RETURN_STATUS_UNEXPECTED("Invalid data, failed to find column name: " + curr_col_name);
RETURN_STATUS_UNEXPECTED("Invalid columns_list, tfrecord file failed to find column name: " + curr_col_name);
}
std::string column_name = it->first;
@ -609,10 +614,12 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::stri
break;
case dataengine::Feature::KindCase::KIND_NOT_SET:
RETURN_STATUS_UNEXPECTED("Invalid data, column type of tf record file must be uint8, int64 or float32.");
RETURN_STATUS_UNEXPECTED("Unrecognized column type, the column type of " + column_name +
" should be uint8, int64 or float32, but got unrecognized column type.");
default:
RETURN_STATUS_UNEXPECTED("Invalid data, column type of tf record file must be uint8, int64 or float32.");
RETURN_STATUS_UNEXPECTED("Unsupported column type, the column type of " + column_name +
" should be uint8, int64 or float32, but got unsupported column type.");
}
RETURN_IF_NOT_OK(
@ -633,7 +640,9 @@ Status TFReaderOp::CountTotalRows(int64_t *out_total_rows, const std::vector<std
std::vector<std::future<int64_t>> async_results;
if (threads <= 0) {
RETURN_STATUS_UNEXPECTED("Invalid data, the threads of TFReader should be greater than zero, but got zero.");
RETURN_STATUS_UNEXPECTED(
"Invalid threads number, the threads number of TFReader should be greater than zero, but got " +
std::to_string(threads) + ".");
}
int64_t chunk_size = filenames.size() / threads;
int64_t remainder = filenames.size() % threads;
@ -672,7 +681,7 @@ Status TFReaderOp::CountTotalRows(int64_t *out_total_rows, const std::vector<std
*out_total_rows = total_rows;
} catch (const std::exception &e) {
std::string err_msg = "Unexpected error occurred: ";
err_msg += e.what();
err_msg += std::string(e.what());
RETURN_STATUS_UNEXPECTED(err_msg);
}
@ -684,7 +693,7 @@ int64_t TFReaderOp::CountTotalRowsSectioned(const std::vector<std::string> &file
for (int i = begin; i < end; i++) {
auto realpath = FileUtils::GetRealPath(filenames[i].data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filenames[i];
MS_LOG(ERROR) << "Invalid file path, " << filenames[i] << " does not exist.";
continue;
}

View File

@ -107,7 +107,7 @@ int64_t USPSOp::CountRows(const std::string &data_file) {
std::ifstream data_file_reader;
data_file_reader.open(data_file, std::ios::in);
if (!data_file_reader.is_open()) {
MS_LOG(ERROR) << "Invalid file, failed to open file: " << data_file;
MS_LOG(ERROR) << "Invalid file, failed to open " << data_file << ": the file is permission denied.";
return 0;
}
@ -124,7 +124,8 @@ int64_t USPSOp::CountRows(const std::string &data_file) {
Status USPSOp::GetFiles() {
auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data());
CHECK_FAIL_RETURN_UNEXPECTED(real_dataset_dir.has_value(), "Get real path failed: " + dataset_dir_);
CHECK_FAIL_RETURN_UNEXPECTED(real_dataset_dir.has_value(),
"Invalid file path, USPS dataset dir: " + dataset_dir_ + " does not exist.");
Path root_dir(real_dataset_dir.value());
const Path train_file_name("usps");
@ -144,16 +145,18 @@ Status USPSOp::GetFiles() {
if (use_train) {
Path train_path = root_dir / train_file_name;
CHECK_FAIL_RETURN_UNEXPECTED(train_path.Exists() && !train_path.IsDirectory(),
"Invalid file, failed to find USPS train data file: " + train_path.ToString());
CHECK_FAIL_RETURN_UNEXPECTED(
train_path.Exists() && !train_path.IsDirectory(),
"Invalid file, USPS dataset train file: " + train_path.ToString() + " does not exist or is a directory.");
data_files_list_.emplace_back(train_path.ToString());
MS_LOG(INFO) << "USPS operator found train data file " << train_path.ToString() << ".";
}
if (use_test) {
Path test_path = root_dir / test_file_name;
CHECK_FAIL_RETURN_UNEXPECTED(test_path.Exists() && !test_path.IsDirectory(),
"Invalid file, failed to find USPS test data file: " + test_path.ToString());
CHECK_FAIL_RETURN_UNEXPECTED(
test_path.Exists() && !test_path.IsDirectory(),
"Invalid file, USPS dataset test file: " + test_path.ToString() + " does not exist or is a directory.");
data_files_list_.emplace_back(test_path.ToString());
MS_LOG(INFO) << "USPS operator found test data file " << test_path.ToString() << ".";
}
@ -163,7 +166,8 @@ Status USPSOp::GetFiles() {
Status USPSOp::LoadFile(const std::string &data_file, int64_t start_offset, int64_t end_offset, int32_t worker_id) {
std::ifstream data_file_reader(data_file);
if (!data_file_reader.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + data_file);
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open USPS dataset file: " + data_file +
", the file is permission denied.");
}
int64_t rows_total = 0;
@ -210,8 +214,8 @@ Status USPSOp::LoadTensor(std::string *line, TensorRow *trow) {
auto images_buffer = std::make_unique<unsigned char[]>(kUSPSImageSize);
auto labels_buffer = std::make_unique<uint32_t[]>(1);
if (images_buffer == nullptr || labels_buffer == nullptr) {
MS_LOG(ERROR) << "Failed to allocate memory for USPS buffer.";
RETURN_STATUS_UNEXPECTED("Failed to allocate memory for USPS buffer.");
MS_LOG(ERROR) << "[Internal ERROR] Failed to allocate memory for USPS buffer.";
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Failed to allocate memory for USPS buffer.");
}
RETURN_IF_NOT_OK(this->ParseLine(line, images_buffer, labels_buffer));
@ -245,10 +249,12 @@ Status USPSOp::ParseLine(std::string *line, const std::unique_ptr<unsigned char[
} else {
size_t split_pos = item.find(":");
CHECK_FAIL_RETURN_UNEXPECTED(split_pos != std::string::npos, "Invalid data, USPS data file is corrupted.");
CHECK_FAIL_RETURN_UNEXPECTED(split_pos != std::string::npos,
"Invalid data, split character ':' is missing in USPS data file.");
// check pixel index
CHECK_FAIL_RETURN_UNEXPECTED(std::stoi(item.substr(0, split_pos)) == (split_num - 1),
"Invalid data, USPS data file is corrupted.");
"Invalid data, the character before ':' should be " + std::to_string(split_num - 1) +
", but got " + item.substr(0, split_pos) + ".");
std::string pixel_str = item.substr(split_pos + 1, item.length() - split_pos);
// transform the real pixel value from [-1, 1] to the integers within [0, 255]
@ -257,7 +263,10 @@ Status USPSOp::ParseLine(std::string *line, const std::unique_ptr<unsigned char[
line->erase(0, pos + 1);
}
CHECK_FAIL_RETURN_UNEXPECTED(split_num == (kUSPSImageSize + 1), "Invalid data, USPS data file is corrupted.");
CHECK_FAIL_RETURN_UNEXPECTED(split_num == (kUSPSImageSize + 1),
"Invalid data, the number of split characters ':' in USPS data file is corrupted, "
"should be " +
std::to_string(kUSPSImageSize + 1) + ", but got " + std::to_string(split_num) + ".");
return Status::OK();
}
@ -274,7 +283,7 @@ Status USPSOp::CalculateNumRowsPerShard() {
}
std::string file_list = ss.str();
RETURN_STATUS_UNEXPECTED(
"Invalid data, USPSDataset API can't read the data file (interface mismatch or no data found). "
"Invalid data, 'USPSDataset' API can't read the data file (interface mismatch or no data found). "
"Check file: " +
file_list);
}

View File

@ -118,14 +118,15 @@ Status VOCOp::ParseImageIds() {
auto realpath = FileUtils::GetRealPath(image_sets_file.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << image_sets_file;
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + image_sets_file);
MS_LOG(ERROR) << "Invalid file path, " << image_sets_file << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, " + image_sets_file + " does not exist.");
}
std::ifstream in_file;
in_file.open(realpath.value());
if (in_file.fail()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + image_sets_file);
RETURN_STATUS_UNEXPECTED("Invalid ImageSets file, failed to open ImageSets file: " + image_sets_file +
", the file is damaged or permission denied.");
}
std::string id;
while (getline(in_file, id)) {
@ -187,28 +188,30 @@ Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float
std::string invalid_bbox = "{" + std::to_string(static_cast<int>(xmin)) + ", " +
std::to_string(static_cast<int>(ymin)) + ", " + std::to_string(static_cast<int>(xmax)) +
", " + std::to_string(static_cast<int>(ymax)) + "}";
RETURN_STATUS_UNEXPECTED("Invalid bndbox: " + invalid_bbox + " found in " + path);
RETURN_STATUS_UNEXPECTED("Invalid bndbox, the coordinate of bndbox in " + path +
" should be greater than 0, but got " + invalid_bbox);
}
return Status::OK();
}
Status VOCOp::ParseAnnotationBbox(const std::string &path) {
if (!Path(path).Exists()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + path);
RETURN_STATUS_UNEXPECTED("Invalid file path, " + path + " does not exist.");
}
Annotation annotation;
XMLDocument doc;
XMLError e = doc.LoadFile(common::SafeCStr(path));
if (e != XMLError::XML_SUCCESS) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to load xml file: " + path);
RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load " + path + ": the xml file is damaged or incorrect format.");
}
XMLElement *root = doc.RootElement();
if (root == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid data, failed to load root element for xml file.");
RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load root element of " + path +
": the format of xml file is incorrect.");
}
XMLElement *object = root->FirstChildElement("object");
if (object == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid data, no object found in " + path);
RETURN_STATUS_UNEXPECTED("Invalid xml, the node of object is missing in " + path + ".");
}
while (object != nullptr) {
std::string label_name;
@ -226,7 +229,7 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) {
ParseNodeValue(bbox_node, "ymax", &ymax);
RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path));
} else {
RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path);
RETURN_STATUS_UNEXPECTED("Invalid xml, the node of bndbox is missing in " + path);
}
if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 &&
@ -254,7 +257,8 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co
if (decode_ == true) {
Status rc = Decode(*tensor, tensor);
if (rc.IsError()) {
RETURN_STATUS_UNEXPECTED("Invalid data, failed to decode image: " + path);
RETURN_STATUS_UNEXPECTED("Invalid image, failed to decode " + path +
": the image is damaged or permission denied.");
}
}
return Status::OK();
@ -280,7 +284,7 @@ Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) {
}
CHECK_FAIL_RETURN_UNEXPECTED(
item.second.size() == 6,
"Invalid parameter, annotation only support 6 parameters, but got " + std::to_string(item.second.size()));
"[Internal ERROR] annotation only support 6 parameters, but got " + std::to_string(item.second.size()));
std::vector<float> tmp_bbox = {(item.second)[0], (item.second)[1], (item.second)[2], (item.second)[3]};
bbox_data.insert(bbox_data.end(), tmp_bbox.begin(), tmp_bbox.end());
@ -328,8 +332,8 @@ Status VOCOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<in
RETURN_UNEXPECTED_IF_NULL(output_class_indexing);
if ((*output_class_indexing).empty()) {
if (task_type_ != TaskType::Detection) {
MS_LOG(ERROR) << "Invalid parameter, GetClassIndexing only valid in \"Detection\" task.";
RETURN_STATUS_UNEXPECTED("Invalid parameter, GetClassIndexing only valid in \"Detection\" task.");
MS_LOG(ERROR) << "Invalid task, only 'Detection' task support GetClassIndexing.";
RETURN_STATUS_UNEXPECTED("Invalid task, only 'Detection' task support GetClassIndexing.");
}
RETURN_IF_NOT_OK(ParseImageIds());
RETURN_IF_NOT_OK(ParseAnnotationIds());

View File

@ -49,12 +49,12 @@ YesNoOp::YesNoOp(const std::string &file_dir, int32_t num_workers, int32_t queue
Status YesNoOp::PrepareData() {
auto realpath = FileUtils::GetRealPath(dataset_dir_.data());
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_;
RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_);
MS_LOG(ERROR) << "Invalid file path, " << dataset_dir_ << " does not exist.";
RETURN_STATUS_UNEXPECTED("Invalid file path, " + dataset_dir_ + " does not exist.");
}
Path dir(realpath.value());
if (dir.Exists() == false || dir.IsDirectory() == false) {
RETURN_STATUS_UNEXPECTED("Invalid parameter, failed to open speech commands: " + dataset_dir_);
RETURN_STATUS_UNEXPECTED("Invalid directory, " + dataset_dir_ + " does not exist or is not a directory.");
}
std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(&dir);
RETURN_UNEXPECTED_IF_NULL(dir_itr);
@ -101,8 +101,9 @@ Status YesNoOp::Split(const std::string &line, std::vector<int32_t> *split_num)
split_num->emplace_back(stoi(split[i]));
}
} catch (const std::exception &e) {
MS_LOG(ERROR) << "Converting char to int confront with an error in function stoi().";
RETURN_STATUS_UNEXPECTED("Converting char to int confront with an error in function stoi().");
MS_LOG(ERROR) << "[Internal ERROR] Converting char to int confront with an error in function stoi: " << e.what();
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Converting char to int confront with an error in function stoi: " +
std::string(e.what()));
}
return Status::OK();
}

View File

@ -38,7 +38,7 @@ void TakeOp::Print(std::ostream &out, bool show_all) const {
}
}
Status TakeOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); }
Status TakeOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] TakeOp is an inlined operator."); }
Status TakeOp::GetNextRow(TensorRow *row) {
RETURN_UNEXPECTED_IF_NULL(row);

View File

@ -101,9 +101,7 @@ Status ZipOp::ComputeColMap() {
int32_t old_id = pair.second;
// check if name already exists in column name descriptor
if (column_name_id_map_.count(name) == 1) {
RETURN_STATUS_UNEXPECTED("Invalid parameter, key: " + name +
" already exists when zipping datasets. Check for duplicate key names in different "
"dataset.");
RETURN_STATUS_UNEXPECTED("Invalid data, duplicate column " + name + " already exists when zipping datasets.");
}
column_name_id_map_[name] = old_id + colsCurrent;
}
@ -115,7 +113,7 @@ Status ZipOp::ComputeColMap() {
return Status::OK();
}
Status ZipOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); }
Status ZipOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ZipOp is an inlined operator."); }
Status ZipOp::GetNextRow(TensorRow *row) {
RETURN_UNEXPECTED_IF_NULL(row);

View File

@ -210,7 +210,7 @@ def test_cifar10_exception():
with pytest.raises(ValueError, match=error_msg_6):
ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=256)
error_msg_7 = "no .bin files found"
error_msg_7 = r"cifar\(.bin\) files are missing"
with pytest.raises(RuntimeError, match=error_msg_7):
ds1 = ds.Cifar10Dataset(NO_BIN_DIR)
for _ in ds1.__iter__():
@ -360,7 +360,7 @@ def test_cifar100_exception():
with pytest.raises(ValueError, match=error_msg_6):
ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=256)
error_msg_7 = "no .bin files found"
error_msg_7 = r"cifar\(.bin\) files are missing"
with pytest.raises(RuntimeError, match=error_msg_7):
ds1 = ds.Cifar100Dataset(NO_BIN_DIR)
for _ in ds1.__iter__():

View File

@ -300,7 +300,7 @@ def test_coco_case_exception():
pass
assert False
except RuntimeError as e:
assert "required node not found in JSON" in str(e)
assert "the attribute of 'images' is missing" in str(e)
try:
data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_CATEGORY_ID_FILE, task="Detection")
@ -308,7 +308,7 @@ def test_coco_case_exception():
pass
assert False
except RuntimeError as e:
assert "category_id can't find in categories" in str(e)
assert "the attribute of 'category_id': 7 is missing" in str(e)
try:
data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_FILE, task="Detection")
@ -316,7 +316,7 @@ def test_coco_case_exception():
pass
assert False
except RuntimeError as e:
assert "failed to open JSON file" in str(e)
assert "Invalid annotation file, Coco Dataset annotation file:" in str(e)
try:
sampler = ds.PKSampler(3)

View File

@ -239,7 +239,7 @@ def test_csv_dataset_exception():
with pytest.raises(Exception) as err:
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
pass
assert "failed to parse file" in str(err.value)
assert "failed to parse" in str(err.value)
TEST_FILE1 = '../data/dataset/testCSV/quoted.csv'
def exception_func(item):

View File

@ -359,7 +359,6 @@ def test_emnist_exception():
with pytest.raises(RuntimeError, match=error_msg_8):
data = ds.EMnistDataset(DATA_DIR, "mnist", "train")
data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1)
data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
for _ in data.__iter__():
pass
with pytest.raises(RuntimeError, match=error_msg_8):

View File

@ -638,7 +638,7 @@ def test_generator_error_2():
for _ in data1:
pass
print("========", str(info.value))
assert "Generator should return a tuple of NumPy arrays" in str(info.value)
assert "'GeneratorDataset' should return a tuple of NumPy arrays" in str(info.value)
def test_generator_error_3():
@ -663,7 +663,8 @@ def test_generator_error_4():
for _ in data1:
pass
assert "Unexpected error. Result of a tensorOp doesn't match output column names" in str(info.value)
assert "the number of columns returned in 'map' operations should match the number of 'output_columns'"\
in str(info.value)
def test_generator_sequential_sampler():

View File

@ -167,7 +167,7 @@ def test_manifest_dataset_exception():
pass
assert False
except RuntimeError as e:
assert "Invalid data, 'source' is not found in Manifest file" in str(e)
assert "Invalid manifest file, 'source' is missing in" in str(e)
NO_USAGE_DATA_FILE = "../data/dataset/testManifestData/invalidNoUsage.manifest"
try:
@ -176,7 +176,7 @@ def test_manifest_dataset_exception():
pass
assert False
except RuntimeError as e:
assert "Invalid data, 'usage' is not found in Manifest file" in str(e)
assert "Invalid manifest file, 'usage' is missing in" in str(e)
if __name__ == '__main__':

View File

@ -307,7 +307,7 @@ def test_tf_wrong_schema():
pass
except RuntimeError as e:
exception_occurred = True
assert "shape in schema's column 'image' is incorrect" in str(e)
assert "Data dimensions of 'image' do not match" in str(e)
assert exception_occurred, "test_tf_wrong_schema failed."
@ -318,7 +318,7 @@ def test_tfrecord_invalid_columns():
data = ds.TFRecordDataset(FILES, columns_list=invalid_columns_list)
with pytest.raises(RuntimeError) as info:
_ = data.create_dict_iterator(num_epochs=1, output_numpy=True).__next__()
assert "Invalid data, failed to find column name: not_exist" in str(info.value)
assert "Invalid columns_list, tfrecord file failed to find column name: not_exist" in str(info.value)
def test_tfrecord_exception():

View File

@ -214,12 +214,12 @@ def test_usps_exception():
for _ in test_data.__iter__():
pass
error_msg_9 = "failed to find USPS train data file"
error_msg_9 = "usps does not exist or is a directory"
with pytest.raises(RuntimeError, match=error_msg_9):
train_data = ds.USPSDataset(WRONG_DIR, "train")
for _ in train_data.__iter__():
pass
error_msg_10 = "failed to find USPS test data file"
error_msg_10 = "usps.t does not exist or is a directory"
with pytest.raises(RuntimeError, match=error_msg_10):
test_data = ds.USPSDataset(WRONG_DIR, "test")
for _ in test_data.__iter__():

View File

@ -240,7 +240,7 @@ def test_voc_exception():
pass
assert False
except RuntimeError as e:
assert "Invalid bndbox: {321, 121, 421, 120}" in str(e)
assert "should be greater than 0, but got {321, 121, 421, 120}" in str(e)
def exception_func(item):
raise Exception("Error occur!")

View File

@ -68,6 +68,9 @@ def test_auto_offload():
dataset_auto_enabled.create_tuple_iterator(num_epochs=1, output_numpy=True)):
np.testing.assert_array_equal(img_0, img_1)
# Need to turn off here or subsequent test cases will fail.
ds.config.set_auto_offload(False)
def test_offload_concat_dataset_1():
"""

View File

@ -369,9 +369,12 @@ def test_multi_col_map():
# test exceptions
assert "output_columns with value 233 is not of type" in batch_map_config(2, 2, split_col, ["col2"], 233)
assert "column_order with value 233 is not of type" in batch_map_config(2, 2, split_col, ["col2"], ["col1"], 233)
assert "output_columns in batch is not set correctly" in batch_map_config(2, 2, split_col, ["col2"], ["col1"])
assert "Incorrect number of columns" in batch_map_config(2, 2, split_col, ["col2"], ["col3", "col4", "col5"])
assert "col-1 doesn't exist" in batch_map_config(2, 2, split_col, ["col-1"], ["col_x", "col_y"])
assert "columns that are not involved in 'per_batch_map' should not be in output_columns"\
in batch_map_config(2, 2, split_col, ["col2"], ["col1"])
assert "the number of columns returned in 'per_batch_map' function should be 3"\
in batch_map_config(2, 2, split_col, ["col2"], ["col3", "col4", "col5"])
assert "'col-1' of 'input_columns' doesn't exist"\
in batch_map_config(2, 2, split_col, ["col-1"], ["col_x", "col_y"])
def test_exceptions_2():
@ -379,16 +382,16 @@ def test_exceptions_2():
for i in range(num):
yield (np.array([i]),)
def simple_copy(colList, batchInfo):
return ([np.copy(arr) for arr in colList],)
def simple_copy(col_list, batch_info):
return ([np.copy(arr) for arr in col_list],)
def concat_copy(colList, batchInfo):
def concat_copy(col_list, batch_info):
# this will duplicate the number of rows returned, which would be wrong!
return ([np.copy(arr) for arr in colList] * 2,)
return ([np.copy(arr) for arr in col_list] * 2,)
def shrink_copy(colList, batchInfo):
def shrink_copy(col_list, batch_info):
# this will duplicate the number of rows returned, which would be wrong!
return ([np.copy(arr) for arr in colList][0:int(len(colList) / 2)],)
return ([np.copy(arr) for arr in col_list][0:int(len(col_list) / 2)],)
def test_exceptions_config(gen_num, batch_size, in_cols, per_batch_map):
data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).batch(batch_size, input_columns=in_cols,
@ -401,9 +404,9 @@ def test_exceptions_2():
return str(e)
# test exception where column name is incorrect
assert "col:num1 doesn't exist" in test_exceptions_config(4, 2, ["num1"], simple_copy)
assert "expects: 2 rows returned from per_batch_map, got: 4" in test_exceptions_config(4, 2, ["num"], concat_copy)
assert "expects: 4 rows returned from per_batch_map, got: 2" in test_exceptions_config(4, 4, ["num"], shrink_copy)
assert "'num1' of 'input_columns' doesn't exist" in test_exceptions_config(4, 2, ["num1"], simple_copy)
assert "expects: 2 rows returned from 'per_batch_map', got: 4" in test_exceptions_config(4, 2, ["num"], concat_copy)
assert "expects: 4 rows returned from 'per_batch_map', got: 2" in test_exceptions_config(4, 4, ["num"], shrink_copy)
if __name__ == '__main__':