forked from mindspore-Ecosystem/mindspore
optimize datasetops error msg
This commit is contained in:
parent
2384f31ad0
commit
f74b5cea0c
|
@ -90,7 +90,7 @@ Status BarrierOp::blockCond() {
|
|||
{
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
if (Py_IsInitialized() == 0) {
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
|
||||
}
|
||||
// we have condition name, however the flexibility is in python today
|
||||
try {
|
||||
|
@ -99,7 +99,8 @@ Status BarrierOp::blockCond() {
|
|||
// Process the return value
|
||||
if (!py::isinstance<py::bool_>(ret_py_obj)) {
|
||||
return Status(StatusCode::kMDPyFuncException,
|
||||
"Invalid parameter, condition wait function should return true/false.");
|
||||
"Invalid parameter, condition wait function should return boolean, but got " +
|
||||
std::string(ret_py_obj.get_type().str()));
|
||||
}
|
||||
} catch (const py::error_already_set &e) {
|
||||
return Status(StatusCode::kMDPyFuncException, e.what());
|
||||
|
|
|
@ -134,7 +134,7 @@ Status BatchOp::operator()() {
|
|||
if ((num_workers_ > 1 || batch_map_func_) && GetMemoryUsage() > MAX_MEMORY_USAGE_THRESHOLD) {
|
||||
MS_LOG(WARNING) << "Memory consumption is more than " << (GetMemoryUsage() * 100) << "%, "
|
||||
<< "which may cause oom error. Please reduce num_parallel_workers size / "
|
||||
<< "optimize per_batch_map function / other python data preprocess function to "
|
||||
<< "optimize 'per_batch_map' function / other python data preprocess function to "
|
||||
<< "reduce memory usage.";
|
||||
}
|
||||
#endif
|
||||
|
@ -203,8 +203,9 @@ Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, TensorRow *d
|
|||
first_shape.Print(shape1);
|
||||
old_tensor->shape().Print(shape2);
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, batch operation expect same shape for each data row, but got inconsistent shape in column " +
|
||||
std::to_string(i) + " expected shape for this column is:" + shape1.str() + ", got shape:" + shape2.str());
|
||||
"Inconsistent batch shapes, batch operation expect same shape for each data row, "
|
||||
"but got inconsistent shape in column " +
|
||||
std::to_string(i) + ", expected shape for this column is:" + shape1.str() + ", got shape:" + shape2.str());
|
||||
}
|
||||
}
|
||||
} else { // handle string column differently
|
||||
|
@ -300,7 +301,7 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
|
|||
CHECK_FAIL_RETURN_UNEXPECTED(num_rows == out_cols[i].size(),
|
||||
"Invalid data, column: " + out_col_names_[i] +
|
||||
" expects: " + std::to_string(num_rows) +
|
||||
" rows returned from per_batch_map, got: " + std::to_string(out_cols[i].size()));
|
||||
" rows returned from 'per_batch_map', got: " + std::to_string(out_cols[i].size()));
|
||||
for (auto &t_row : *out_q_table) {
|
||||
t_row[col_id] = out_cols[i][row_id++];
|
||||
}
|
||||
|
@ -339,14 +340,16 @@ Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
|
|||
*batch_size = size.cast<int32_t>();
|
||||
if (*batch_size <= 0) {
|
||||
return Status(StatusCode::kMDPyFuncException,
|
||||
"Invalid parameter, batch_size function should return an integer greater than 0, but got: " +
|
||||
"Invalid batch_size function, 'batch_size' function should return an integer greater than 0, "
|
||||
"but got: " +
|
||||
std::to_string(*batch_size));
|
||||
}
|
||||
} catch (const py::error_already_set &e) {
|
||||
return Status(StatusCode::kMDPyFuncException, e.what());
|
||||
} catch (const py::cast_error &e) {
|
||||
return Status(StatusCode::kMDPyFuncException,
|
||||
"Invalid parameter, batch_size function should return an integer greater than 0.");
|
||||
return Status(
|
||||
StatusCode::kMDPyFuncException,
|
||||
"Invalid batch_size function, the return value of batch_size function cast failed: " + std::string(e.what()));
|
||||
}
|
||||
}
|
||||
return Status(StatusCode::kSuccess, "batch_size function call succeeded.");
|
||||
|
@ -379,11 +382,13 @@ Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBat
|
|||
// Parse batch map return value
|
||||
py::tuple ret_tuple = py::cast<py::tuple>(ret_py_obj);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(py::isinstance<py::tuple>(ret_tuple),
|
||||
"per_batch_map function should return a tuple.");
|
||||
"Invalid per_batch_map, 'per_batch_map' function should return a tuple, but got " +
|
||||
std::string(ret_py_obj.get_type().str()));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(ret_tuple.size() == out_col_names_.size(),
|
||||
"Incorrect number of columns returned in per_batch_map function. Expects: " +
|
||||
"Invalid per_batch_map, the number of columns returned in 'per_batch_map' function "
|
||||
"should be " +
|
||||
std::to_string(out_col_names_.size()) +
|
||||
" got: " + std::to_string(ret_tuple.size()));
|
||||
" , but got: " + std::to_string(ret_tuple.size()));
|
||||
for (size_t i = 0; i < ret_tuple.size(); i++) {
|
||||
TensorRow output_batch;
|
||||
// If user returns a type that is neither a list nor an array, issue a error msg.
|
||||
|
@ -405,7 +410,8 @@ Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBat
|
|||
return Status(StatusCode::kMDPyFuncException, e.what());
|
||||
} catch (const py::cast_error &e) {
|
||||
return Status(StatusCode::kMDPyFuncException,
|
||||
"Invalid parameter, per_batch_map function of batch should return a tuple of list of numpy array.");
|
||||
"Invalid per_batch_map, the return value of 'per_batch_map' function cast to py::tuple failed: " +
|
||||
std::string(e.what()));
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
|
@ -432,7 +438,7 @@ Status BatchOp::PadColumns(std::unique_ptr<TensorQTable> *table, const PadInfo &
|
|||
if (pad_shapes[col_id].empty()) pad_shapes[col_id] = max_shapes[col_id]; // fill pad shape with -1
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
pad_shapes[col_id].size() == max_shapes[col_id].size(),
|
||||
"Invalid data, rank of pad_shape must be equal to rank of specified column. pad_shapes rank:" +
|
||||
"Invalid pad_info, rank of pad_shape must be equal to rank of specified column. pad_shapes rank:" +
|
||||
std::to_string(pad_shapes[col_id].size()) + ", column rank: " + std::to_string(max_shapes[col_id].size()));
|
||||
}
|
||||
|
||||
|
@ -482,12 +488,14 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info,
|
|||
for (const auto &p : pad_info) {
|
||||
auto location = column_name_id_map.find(p.first);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(location != column_name_id_map.end(),
|
||||
"Invalid parameter, column name: " + p.first + " does not exist.");
|
||||
"Invalid pad_info, column name: " + p.first + " does not exist.");
|
||||
auto col_id = static_cast<dsize_t>(location->second);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
col_id < pad_vals->size() && col_id < pad_shapes->size(),
|
||||
"Invalid parameter, column id must be less than the size of pad_val and pad_shape, but got: " +
|
||||
std::to_string(col_id));
|
||||
"Invalid pad_info, column name should be match with the size of pad value and pad shape, but got "
|
||||
"column name: " +
|
||||
p.first + ", the size of pad value: " + std::to_string(pad_vals->size()) +
|
||||
" and the size of pad shape: " + std::to_string(pad_shapes->size()) + ".");
|
||||
pad_cols->insert(col_id);
|
||||
(*pad_vals)[col_id] = p.second.second; // set pad values
|
||||
(*pad_shapes)[col_id] = p.second.first.AsVector(); // empty vector if shape is unknown
|
||||
|
@ -498,8 +506,9 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info,
|
|||
|
||||
Status BatchOp::ComputeColMap() {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(child_.size() == 1,
|
||||
"Invalid data, batch operator can't be used as a single operator, "
|
||||
"should be preceded by an operator that reads data, for example, ImageFolderDataset.");
|
||||
"Invalid batch, batch operator can't be used as a single operator, "
|
||||
"should be preceded by an operator that reads data, for example, "
|
||||
"ds1 = ds.ImageFolderDataset().batch().");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!(child_[0]->column_name_id_map().empty()),
|
||||
"Invalid data, the column of the previous operator of the batch cannot be empty.");
|
||||
|
||||
|
@ -514,7 +523,7 @@ Status BatchOp::ComputeColMap() {
|
|||
// check all input columns exist
|
||||
for (const auto &col : in_col_names_) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(child_map_.find(col) != child_map_.end(),
|
||||
"Invalid parameter, col:" + col + " doesn't exist in dataset.");
|
||||
"Invalid input_columns, '" + col + "' of 'input_columns' doesn't exist.");
|
||||
}
|
||||
|
||||
// following logic deals with per_batch_map
|
||||
|
@ -551,8 +560,21 @@ Status BatchOp::ComputeColMap() {
|
|||
}
|
||||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(column_name_id_map_.size() == (child_map_no_in_col.size() + out_col_names_.size()),
|
||||
"Key error in column_name_id_map_. output_columns in batch is not set correctly!");
|
||||
if (column_name_id_map_.size() != (child_map_no_in_col.size() + out_col_names_.size())) {
|
||||
const std::string prefix_str = std::string("[");
|
||||
auto column_no_in_col = std::accumulate(
|
||||
child_map_no_in_col.begin(), child_map_no_in_col.end(), prefix_str,
|
||||
[](const std::string &str, const std::pair<std::string, int32_t> &p) { return str + p.first + ","; });
|
||||
column_no_in_col += "]";
|
||||
auto column_out =
|
||||
std::accumulate(out_col_names_.begin(), out_col_names_.end(), prefix_str,
|
||||
[](const std::string &str, const std::string &out_col) { return str + out_col + ","; });
|
||||
column_out += "]";
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid output_columns, columns that are not involved in 'per_batch_map' should not be "
|
||||
"in output_columns, but got columns that are not in input_columns: " +
|
||||
column_no_in_col + ", output_columns: " + column_out + ".");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -108,7 +108,7 @@ Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, T
|
|||
for (size_t i = 0; i < number_of_arguments; i++) {
|
||||
auto map_item = column_name_id_map_.find(length_dependent_columns_[i]);
|
||||
if (map_item == column_name_id_map_.end()) {
|
||||
RETURN_STATUS_UNEXPECTED("BucketBatchByLength: Couldn't find the specified column(" +
|
||||
RETURN_STATUS_UNEXPECTED("Invalid column, BucketBatchByLength couldn't find the specified column(" +
|
||||
length_dependent_columns_[i] + ") in the dataset.");
|
||||
}
|
||||
int32_t column_index = map_item->second;
|
||||
|
@ -118,7 +118,8 @@ Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, T
|
|||
RETURN_IF_NOT_OK(output.at(0)->GetItemAt(out_element_length, {0}));
|
||||
if (*out_element_length < 0) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid parameter, element_length_function must return an integer greater than or equal to 0, but got" +
|
||||
"Invalid element_length_function, element_length_function must return an integer greater than or equal to 0, "
|
||||
"but got" +
|
||||
std::to_string(*out_element_length));
|
||||
}
|
||||
} else {
|
||||
|
@ -139,7 +140,8 @@ Status BucketBatchByLengthOp::PadAndBatchBucket(int32_t bucket_index, int32_t ba
|
|||
if (pad_shape[i] == TensorShape::kDimUnknown) {
|
||||
if (bucket_index + 1 >= bucket_boundaries_.size()) {
|
||||
std::string error_message =
|
||||
"Invalid data, requested to pad to bucket boundary, element falls in last bucket.";
|
||||
"Invalid data, requested to pad to bucket boundary failed, bucket index should be less than " +
|
||||
std::to_string(bucket_boundaries_.size()) + ", but got " + std::to_string(bucket_index);
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, error_message);
|
||||
}
|
||||
|
||||
|
|
|
@ -41,7 +41,8 @@ BuildSentencePieceVocabOp::BuildSentencePieceVocabOp(std::shared_ptr<SentencePie
|
|||
|
||||
Status BuildSentencePieceVocabOp::operator()() {
|
||||
if (tree_ == nullptr) {
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set.");
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||
"[Internal ERROR] Pipeline init failed, Execution tree not set.");
|
||||
}
|
||||
RETURN_IF_NOT_OK(sentence_queue_->Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask(
|
||||
|
@ -58,7 +59,7 @@ Status BuildSentencePieceVocabOp::operator()() {
|
|||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
|
||||
}
|
||||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "no operator should be after from_dataset (repeat detected)");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "'build_sentencepiece_vocab' does not support 'repeat'.)");
|
||||
eoe_warning = true;
|
||||
}
|
||||
// add empty tensorRow for quit
|
||||
|
@ -71,13 +72,13 @@ Status BuildSentencePieceVocabOp::SentenceThread() {
|
|||
TaskManager::FindMe()->Post();
|
||||
if (col_names_.empty() == true) {
|
||||
auto itr = column_name_id_map_.find("text");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(),
|
||||
"Invalid data, 'text' column does not exist in dataset.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid data, 'text' column does not exist.");
|
||||
col_id_ = itr->second;
|
||||
} else {
|
||||
auto itr = column_name_id_map_.find(col_names_[0]);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(),
|
||||
"Invalid parameter, column name: " + col_names_[0] + " does not exist in dataset.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid column, column name: " + col_names_[0] +
|
||||
" does not exist, check existed "
|
||||
"column with dataset API 'get_col_names'");
|
||||
col_id_ = itr->second;
|
||||
}
|
||||
std::unique_ptr<DatasetSentenceIterator> sentence_iter = std::make_unique<DatasetSentenceIterator>(this);
|
||||
|
@ -89,7 +90,7 @@ Status BuildSentencePieceVocabOp::SentenceThread() {
|
|||
} else {
|
||||
if (vocab_ == nullptr) {
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||
"Invalid parameter, SentencePiece vocab not set.");
|
||||
"[Internal ERROR] SentencePiece vocab should not be null.");
|
||||
}
|
||||
vocab_->set_model_proto(model_proto);
|
||||
}
|
||||
|
@ -131,7 +132,7 @@ bool BuildSentencePieceVocabOp::Done() { return read_done_; }
|
|||
|
||||
void BuildSentencePieceVocabOp::Next(std::string *sentence) {
|
||||
if (sentence == nullptr) {
|
||||
MS_LOG(ERROR) << "BuildSentencePieceVocab get nullptr element, please check data.";
|
||||
MS_LOG(ERROR) << "[Internal ERROR] BuildSentencePieceVocab get nullptr element, please check data.";
|
||||
return;
|
||||
}
|
||||
TensorRow new_row;
|
||||
|
@ -151,8 +152,8 @@ void BuildSentencePieceVocabOp::Next(std::string *sentence) {
|
|||
if (new_row[col_id_]->type().IsNumeric() || new_row[col_id_]->Rank() > 1) {
|
||||
ret_status_ =
|
||||
Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||
"Invalid data, build_sentence_piece_vocab only works on string data with rank equal to 1, got type: " +
|
||||
new_row[col_id_]->type().ToString() + "and rank: " + std::to_string(new_row[col_id_]->Rank()));
|
||||
"Invalid data, build_sentence_piece_vocab only supports string data with rank equal to 1, but got type: " +
|
||||
new_row[col_id_]->type().ToString() + ", rank: " + std::to_string(new_row[col_id_]->Rank()));
|
||||
read_done_ = true;
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -69,7 +69,9 @@ class BuildSentencePieceVocabOp : public PipelineOp {
|
|||
|
||||
Status operator()() override;
|
||||
|
||||
Status Reset() override { RETURN_STATUS_UNEXPECTED("Reset shouldn't be called in BuildSentencePieceVocabOp"); }
|
||||
Status Reset() override {
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Reset shouldn't be called in BuildSentencePieceVocabOp.");
|
||||
}
|
||||
|
||||
std::string Name() const override { return kBuildSentencePieceVocabOp; }
|
||||
|
||||
|
|
|
@ -54,7 +54,8 @@ Status BuildVocabOp::WorkerEntry(int32_t worker_id) {
|
|||
while (!new_row.empty()) {
|
||||
for (int32_t col : col_ids_) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!new_row[col]->type().IsNumeric(),
|
||||
"Invalid data, build_vocab only works on string data, but got numeric data type: " +
|
||||
"Invalid datatype, 'build_vocab' only supports string type of input, but got "
|
||||
"numeric type: " +
|
||||
new_row[col]->type().ToString());
|
||||
for (auto itr = new_row[col]->begin<std::string_view>(); itr != new_row[col]->end<std::string_view>(); ++itr) {
|
||||
(*wrkr_map)[std::string(*itr)] += 1;
|
||||
|
@ -79,7 +80,8 @@ Status BuildVocabOp::WorkerEntry(int32_t worker_id) {
|
|||
Status BuildVocabOp::operator()() {
|
||||
// launch the collector thread
|
||||
if (tree_ == nullptr) {
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set.");
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||
"[Internal ERROR] Pipeline init failed, Execution tree not set.");
|
||||
}
|
||||
RETURN_IF_NOT_OK(distributor_queue_->Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(collector_queue_->Register(tree_->AllTasks()));
|
||||
|
@ -96,8 +98,9 @@ Status BuildVocabOp::operator()() {
|
|||
col_ids_.reserve(col_names_.size());
|
||||
for (std::string col : col_names_) {
|
||||
auto itr = column_name_id_map_.find(col);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(),
|
||||
"Invalid parameter, column name: " + col + " does not exist in dataset.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(itr != column_name_id_map_.end(), "Invalid column name, column name: " + col +
|
||||
" does not exist, check existed column "
|
||||
"with dataset API 'get_col_names'");
|
||||
col_ids_.push_back(itr->second);
|
||||
}
|
||||
} else {
|
||||
|
@ -113,7 +116,8 @@ Status BuildVocabOp::operator()() {
|
|||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
|
||||
}
|
||||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning, "no operator should be after from_dataset (repeat detected)");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!eoe_warning,
|
||||
"Invalid repeat operator, BuildVocab does not support 'repeat' operator.");
|
||||
eoe_warning = true;
|
||||
}
|
||||
|
||||
|
@ -137,7 +141,8 @@ Status BuildVocabOp::CollectorThread() {
|
|||
++num_quited_worker;
|
||||
}
|
||||
} // all frequencies are obtained
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!word_cnt_.empty(), "Invalid data, there are no words in the dataset.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!word_cnt_.empty(),
|
||||
"Invalid data, BuildVocab load data failed that no words found in vocab, check vocab.");
|
||||
std::vector<std::string> words;
|
||||
// make sure enough is reserved, this will become a partially sorted list eventually
|
||||
words.reserve(wrkr_map->size());
|
||||
|
@ -158,7 +163,7 @@ Status BuildVocabOp::CollectorThread() {
|
|||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(err_msg.empty(),
|
||||
"Invalid data, these special words are already in the dataset: " + err_msg + ".");
|
||||
"Invalid special words, these special words are already in the vocab: " + err_msg + ".");
|
||||
|
||||
int64_t num_words = std::min(static_cast<int64_t>(words.size()), top_k_);
|
||||
if (num_words == 0) {
|
||||
|
|
|
@ -66,7 +66,7 @@ class BuildVocabOp : public ParallelOp<TensorRow, TensorRow> {
|
|||
|
||||
Status operator()() override;
|
||||
|
||||
Status Reset() override { RETURN_STATUS_UNEXPECTED("Reset shouldn't be called in BuildVocabOp"); }
|
||||
Status Reset() override { RETURN_STATUS_UNEXPECTED("[Internal ERROR] Reset shouldn't be called in BuildVocabOp"); }
|
||||
|
||||
private:
|
||||
const int32_t interval_;
|
||||
|
|
|
@ -191,7 +191,7 @@ Status CacheBase::FetchFromCache(int32_t worker_id) {
|
|||
if (AllowCacheMiss()) {
|
||||
++num_cache_miss_;
|
||||
} else {
|
||||
std::string errMsg = "Row id " + std::to_string(row_id) + " not found.";
|
||||
std::string errMsg = "[Internal ERROR] Row id " + std::to_string(row_id) + " not found.";
|
||||
RETURN_STATUS_UNEXPECTED(errMsg);
|
||||
}
|
||||
}
|
||||
|
@ -225,7 +225,8 @@ Status CacheBase::UpdateColumnMapFromCache() {
|
|||
|
||||
Status CacheBase::GetPrefetchRow(row_id_type row_id, TensorRow *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(row_id >= 0, "Expect positive row id, but got:" + std::to_string(row_id));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(row_id >= 0,
|
||||
"[Internal ERROR] Expect positive row id, but got:" + std::to_string(row_id));
|
||||
RETURN_IF_NOT_OK(prefetch_.PopFront(row_id, out));
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -278,7 +279,7 @@ Status CacheBase::Prefetcher(int32_t worker_id) {
|
|||
cache_miss.clear();
|
||||
std::unique_ptr<IOBlock> blk;
|
||||
RETURN_IF_NOT_OK(prefetch_queues_[worker_id]->PopFront(&blk));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!blk->eof(), "Expect eoe or a regular io block.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!blk->eof(), "[Internal ERROR] Expect eoe or a regular io block.");
|
||||
if (!blk->eoe()) {
|
||||
RETURN_IF_NOT_OK(blk->GetKeys(&prefetch_keys));
|
||||
Status rc;
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace dataset {
|
|||
Status CacheLookupOp::operator()() {
|
||||
if (!sampler_) {
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||
"Invalid parameter, CacheLookupOp requires a sampler before it can be executed, but got nullptr.");
|
||||
"Invalid sampler, Cache requires a sampler before it can be executed, but got nullptr.");
|
||||
}
|
||||
RETURN_IF_NOT_OK(RegisterResources());
|
||||
|
||||
|
|
|
@ -147,7 +147,8 @@ Status CacheMergeOp::CacheMissWorkerEntry(int32_t workerId) {
|
|||
} else {
|
||||
row_id_type row_id = new_row.getId();
|
||||
if (row_id < 0) {
|
||||
std::string errMsg = "Expect positive row id, but got: " + std::to_string(row_id);
|
||||
std::string errMsg =
|
||||
"[Internal ERROR] row id should be greater than or equal to 0, but got: " + std::to_string(row_id);
|
||||
RETURN_STATUS_UNEXPECTED(errMsg);
|
||||
}
|
||||
if (cache_missing_rows_) {
|
||||
|
@ -213,7 +214,8 @@ Status CacheMergeOp::PrepareOperator() { // Run any common code from super clas
|
|||
// specific logic
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
child_.size() == kNumChildren,
|
||||
"Incorrect number of children of CacheMergeOp, required num is 2, but got:" + std::to_string(child_.size()));
|
||||
"[Internal ERROR] Incorrect number of children of CacheMergeOp, required num is 2, but got:" +
|
||||
std::to_string(child_.size()));
|
||||
RETURN_IF_NOT_OK(DatasetOp::PrepareOperator());
|
||||
// Get the computed check sum from all ops in the cache miss class
|
||||
uint32_t cache_crc = DatasetOp::GenerateCRC(child_[kCacheMissChildIdx]);
|
||||
|
@ -231,7 +233,7 @@ Status CacheMergeOp::PrepareOperator() { // Run any common code from super clas
|
|||
}
|
||||
|
||||
Status CacheMergeOp::ComputeColMap() {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "Invalid data, cache miss stream is empty.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(child_[kCacheMissChildIdx] != nullptr, "[Internal ERROR] cache miss stream is empty.");
|
||||
if (column_name_id_map().empty()) {
|
||||
column_name_id_map_ = child_[kCacheMissChildIdx]->column_name_id_map();
|
||||
}
|
||||
|
@ -270,7 +272,7 @@ Status CacheMergeOp::GetRq(row_id_type row_id, CacheMergeOp::TensorRowCacheReque
|
|||
RETURN_IF_NOT_OK(mem.allocate(1));
|
||||
*out = mem.GetMutablePointer();
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, map insert fail.");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] map insert fail.");
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
|
|
|
@ -43,7 +43,7 @@ Status CacheOp::operator()() {
|
|||
RETURN_UNEXPECTED_IF_NULL(tree_);
|
||||
if (!sampler_) {
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||
"Invalid parameter, CacheOp requires a sampler before it can be executed, but got nullptr.");
|
||||
"Invalid sampler, CacheOp requires a sampler before it can be executed, but got nullptr.");
|
||||
}
|
||||
RETURN_IF_NOT_OK(RegisterResources());
|
||||
|
||||
|
@ -145,9 +145,9 @@ Status CacheOp::WaitForCachingAllRows() {
|
|||
BuildPhaseDone = true;
|
||||
break;
|
||||
case CacheServiceState::kOutOfMemory:
|
||||
return Status(StatusCode::kMDOutOfMemory, "Cache server is running out of memory");
|
||||
return Status(StatusCode::kMDOutOfMemory, "Cache server is running out of memory, check memory usage.");
|
||||
case CacheServiceState::kNoSpace:
|
||||
return Status(StatusCode::kMDNoSpace, "Cache server is running of out spill storage");
|
||||
return Status(StatusCode::kMDNoSpace, "Cache server is running of out spill storage, check memory usage.");
|
||||
case CacheServiceState::kNone:
|
||||
case CacheServiceState::kError:
|
||||
default:
|
||||
|
|
|
@ -74,9 +74,17 @@ Status ConcatOp::Verify(int32_t id, const TensorRow &new_row) {
|
|||
// Compare the data type and data rank with these in child[0]
|
||||
int32_t index = 0;
|
||||
for (auto item : new_row) {
|
||||
if ((item->type() != data_type_[index]) || item->Rank() != data_rank_[index++]) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, data type or data rank is not the same with previous dataset.");
|
||||
if (item->type() != data_type_[index]) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid datatype, the data type of two datasets concated should be the same, but got " +
|
||||
item->type().ToString() + " and " + data_type_[index].ToString() + ".");
|
||||
}
|
||||
if (item->Rank() != data_rank_[index]) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid datatype, the data rank of two datasets concated should be the same, but got " +
|
||||
std::to_string(item->Rank()) + " and " + std::to_string(data_rank_[index]) + ".");
|
||||
}
|
||||
index++;
|
||||
}
|
||||
}
|
||||
verified_ = true;
|
||||
|
@ -89,12 +97,13 @@ Status ConcatOp::ComputeColMap() {
|
|||
// Obtain columns_name_id_map from child_[0]
|
||||
column_name_id_map_ = child_[0]->column_name_id_map();
|
||||
if (column_name_id_map_.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Child column name map cannot be empty!");
|
||||
}
|
||||
// Verify all children have the same column name map
|
||||
for (size_t i = 0; i < child_.size(); ++i) {
|
||||
if (child_[i]->column_name_id_map() != column_name_id_map_) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, column name or column order is not the same with previous dataset.");
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid columns, 'column name' or 'column order' of concat datasets should be the same.");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -118,7 +127,7 @@ Status ConcatOp::GetNumClasses(int64_t *num_classes) {
|
|||
*num_classes = max_num_classes;
|
||||
return Status::OK();
|
||||
}
|
||||
Status ConcatOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); }
|
||||
Status ConcatOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ConcatOp is an inlined operator."); }
|
||||
|
||||
bool ConcatOp::IgnoreSample() {
|
||||
bool is_not_mappable_or_second_ne_zero = true;
|
||||
|
@ -184,10 +193,10 @@ Status ConcatOp::GetNextRow(TensorRow *row) {
|
|||
return Status::OK();
|
||||
}
|
||||
if (row->eof()) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(cur_child_ == 0, "Received an unexpected EOF.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(cur_child_ == 0, "[Internal ERROR] Received an unexpected EOF.");
|
||||
for (int32_t i = cur_child_ + 1; i < child_.size(); i++) {
|
||||
RETURN_IF_NOT_OK(child_[i]->GetNextRow(row));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(row->eof(), "Row must be an EOF.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(row->eof(), "[Internal ERROR] Row must be an EOF.");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -63,7 +63,7 @@ Status DatasetOp::AddChild(std::shared_ptr<DatasetOp> child) {
|
|||
}
|
||||
if (operator_id_ == kInvalidOperatorId) {
|
||||
std::string err_msg(
|
||||
"Cannot add child node. Tree node connections can only "
|
||||
"[Internal ERROR] Cannot add child node. Tree node connections can only "
|
||||
"be made if the node belongs to a tree.");
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
@ -71,7 +71,7 @@ Status DatasetOp::AddChild(std::shared_ptr<DatasetOp> child) {
|
|||
// disallow relationships with other trees
|
||||
if (tree_ != child->tree_) {
|
||||
std::string err_msg(
|
||||
"Cannot add child node. Tree node connections can only be made if both nodes belong to the same tree.");
|
||||
"Invalid operator structure, the relationship of operators should be one by one, but got too many branches.");
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
child_.push_back(child);
|
||||
|
@ -82,7 +82,7 @@ Status DatasetOp::AddChild(std::shared_ptr<DatasetOp> child) {
|
|||
Status DatasetOp::RemoveChild(std::shared_ptr<DatasetOp> child) {
|
||||
if (operator_id_ == kInvalidOperatorId) {
|
||||
std::string err_msg(
|
||||
"Cannot remove child node. Tree node connections can only "
|
||||
"[Internal ERROR] Cannot remove child node. Tree node connections can only "
|
||||
"be made if the node belongs to a tree.");
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
@ -90,7 +90,7 @@ Status DatasetOp::RemoveChild(std::shared_ptr<DatasetOp> child) {
|
|||
// disallow relationships with other trees
|
||||
if (tree_ != child->tree_) {
|
||||
std::string err_msg(
|
||||
"Cannot remove child node. Tree node connections can only be made if both nodes belong to the same tree.");
|
||||
"Invalid operator structure, the relationship of operators should be one by one, but got too many branches.");
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
@ -132,11 +132,15 @@ void DatasetOp::RemoveParent(const DatasetOp *parent) {
|
|||
// Removes this node from the tree and connects it's parent/child together
|
||||
Status DatasetOp::Remove() {
|
||||
if (parent_.size() > 1) {
|
||||
std::string err_msg("[Internal ERROR], no support for the relationship between operators is not one-to-one.");
|
||||
std::string err_msg(
|
||||
"Invalid operator structure, the relationship between operators should be one-to-one, but encountered more than "
|
||||
"one parent, namely: " +
|
||||
std::to_string(parent_.size()));
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
if (child_.size() > 1) {
|
||||
std::string err_msg("[Internal ERROR], no support for the relationship between operators is not one-to-one.");
|
||||
std::string err_msg(
|
||||
"Invalid operator structure, the relationship of operators should be one by one, but got too many branches.");
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
@ -151,7 +155,8 @@ Status DatasetOp::Remove() {
|
|||
// If we have a parent, then assign child's parent to point to our parent.
|
||||
if (!parent_.empty()) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(parent_[0]->Children().size() == 1,
|
||||
"Removing a node whose parent has more than 1 child is not supported.");
|
||||
"Invalid operator structure, the relationship of operators should be one by one, "
|
||||
"but got too many branches.");
|
||||
child_[0]->parent_[0] = parent_[0];
|
||||
} else {
|
||||
// We don't have a parent, so we are the root node being removed.
|
||||
|
@ -293,7 +298,8 @@ Status DatasetOp::GetClassIndexing(std::vector<std::pair<std::string, std::vecto
|
|||
return child_[child_.size() - 1]->GetClassIndexing(output_class_indexing);
|
||||
} else {
|
||||
*output_class_indexing = {};
|
||||
RETURN_STATUS_UNEXPECTED("Trying to get class index from leaf node, missing override.");
|
||||
RETURN_STATUS_UNEXPECTED("Unsupported scenario, GetClassIndexing failed for " + Name() +
|
||||
" doesn't support GetClassIndexing yet.");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -343,12 +349,14 @@ std::string DatasetOp::ColumnNameMapAsString() const {
|
|||
// Operations changing the column map must overwrite this function.
|
||||
Status DatasetOp::ComputeColMap() {
|
||||
if (child_.size() > 1) {
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR], no support for the relationship between operators is not one-to-one.");
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid operator structure, the relationship of operators should be one by one, but got too many branches.");
|
||||
}
|
||||
if (column_name_id_map_.empty()) {
|
||||
column_name_id_map_ = child_[0]->column_name_id_map();
|
||||
if (column_name_id_map_.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("Child column name map cannot be empty!");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid column list, the column list of " + child_[0]->Name() +
|
||||
" should have one column at least, but got empty.");
|
||||
}
|
||||
MS_LOG(DEBUG) << "Setting column map:\n" << DatasetOp::ColumnNameMapAsString();
|
||||
} else {
|
||||
|
|
|
@ -119,8 +119,8 @@ Status DeviceQueueOp::FilterMetadata(TensorRow *row) {
|
|||
Status DeviceQueueOp::CheckExceptions(const TensorRow &row) const {
|
||||
// this method checks if the row meets the conditions to be sent to TDT
|
||||
for (const auto &item : row) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Invalid data, cannot send string tensor to device.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Invalid data, cannot send tensor with no data to device.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Invalid datatype, cannot send string data to device.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Invalid data, the data send to device is null.");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -152,7 +152,8 @@ Status DeviceQueueOp::operator()() {
|
|||
}
|
||||
}
|
||||
if (tdtInstancePtr->acl_handle_ == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Create channel for sending data failed, please check DEVICE ID setting.");
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"[Internal ERROR] Create channel for sending data failed, please check DEVICE ID setting.");
|
||||
}
|
||||
RETURN_IF_NOT_OK(SendDataToAscend());
|
||||
#endif
|
||||
|
@ -343,7 +344,8 @@ Status DeviceQueueOp::SendRowToTdt(TensorRow curr_row, bool is_profiling_enable,
|
|||
#ifdef ENABLE_TDTQUE
|
||||
Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) {
|
||||
if (!create_data_info_queue_) {
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "DataInfo queue is not created.");
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||
"[Internal ERROR] DataInfo queue is not created.");
|
||||
}
|
||||
// This place has a race condition with operator(), so the first one
|
||||
// arrive here will do the initialize work.
|
||||
|
@ -359,7 +361,7 @@ Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) {
|
|||
}
|
||||
#else
|
||||
Status DeviceQueueOp::GetDataInfo(DATA_INFO *data_info) {
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "GetDataInfo is not supported yet.");
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "'GetDataInfo' only supported on Ascend.");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -446,7 +448,7 @@ Status DeviceQueueOp::PushDataToGPU() {
|
|||
if (!ps::PsDataPrefetch::GetInstance().PrefetchData(channel_name_, items[0].data_ptr_, items[0].data_len_,
|
||||
items[0].data_type_)) {
|
||||
return Status(StatusCode::kMDTimeOut, __LINE__, __FILE__,
|
||||
"Failed to prefetch data in current PS mode(cache data when sending).");
|
||||
"[Internal ERROR] Failed to prefetch data in current PS mode(cache data when sending).");
|
||||
}
|
||||
RETURN_IF_NOT_OK(RetryPushData(handle, items));
|
||||
#ifndef ENABLE_SECURITY
|
||||
|
@ -623,18 +625,19 @@ Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items,
|
|||
for (auto &sub_item : *items) {
|
||||
auto rc = pool_[worker_id]->Allocate(sub_item.data_len_, &sub_item.data_ptr_);
|
||||
if (rc.IsError() || sub_item.data_ptr_ == nullptr) {
|
||||
return Status(StatusCode::kMDOutOfMemory, __LINE__, __FILE__, "Memory malloc failed.");
|
||||
return Status(StatusCode::kMDOutOfMemory, __LINE__, __FILE__, "Memory malloc failed, check memory usage.");
|
||||
}
|
||||
if (curr_row[i] == nullptr) {
|
||||
MS_LOG(ERROR) << "The pointer curr_row[" << i << "] is null";
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "TensorRow 'curr_row' contains nullptr.");
|
||||
MS_LOG(ERROR) << "[Internal ERROR] The pointer curr_row[" << i << "] is null";
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||
"[Internal ERROR] TensorRow 'curr_row' contains nullptr.");
|
||||
}
|
||||
sub_item.data_type_ = curr_row[i]->type().ToString();
|
||||
const unsigned char *column_data = curr_row[i]->GetBuffer();
|
||||
if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data,
|
||||
static_cast<uint32_t>(curr_row[i++]->SizeInBytes())) != 0) {
|
||||
MS_LOG(ERROR) << "memcpy_s failed!";
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "memcpy failed when using memcpy_s do copy.");
|
||||
MS_LOG(ERROR) << "[Internal ERROR] memcpy_s failed.";
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "[Internal ERROR] memcpy_s failed.");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ void EpochCtrlOp::Print(std::ostream &out, bool show_all) const {
|
|||
Status EpochCtrlOp::GetNextRow(TensorRow *row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
if (child_.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("EpochCtrlOp can't be the leaf node(first operator) of pipeline.");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] EpochCtrlOp can't be the leaf node(first operator) of pipeline.");
|
||||
}
|
||||
|
||||
// `retry_if_eoe` is false because EpochCtrlOp does not eat EOE.
|
||||
|
|
|
@ -143,7 +143,7 @@ Status FilterOp::WorkerCompute(const TensorRow &in_row, bool *out_predicate) {
|
|||
Status FilterOp::CheckInput(const TensorRow &input) const {
|
||||
for (auto &item : input) {
|
||||
if (item == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, input tensor is null.");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] input tensor is null.");
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
|
|
|
@ -192,7 +192,7 @@ Status MapOp::WorkerEntry(int32_t worker_id) {
|
|||
}
|
||||
RETURN_IF_NOT_OK(worker_out_queues_[worker_id]->EmplaceBack(std::move(in_row)));
|
||||
} else {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(in_row.size() != 0, "MapOp got an empty TensorRow.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(in_row.size() != 0, "[Internal ERROR] MapOp got an empty TensorRow.");
|
||||
TensorRow out_row;
|
||||
// Perform the compute function of TensorOp(s) and store the result in new_tensor_table.
|
||||
RETURN_IF_NOT_OK(WorkerCompute(in_row, &out_row, job_list));
|
||||
|
@ -244,7 +244,11 @@ Status MapOp::WorkerCompute(const TensorRow &in_row, TensorRow *out_row,
|
|||
|
||||
// Sanity check a row in result_table
|
||||
if (!result_table.empty() && out_columns_.size() != result_table[0].size()) {
|
||||
RETURN_STATUS_UNEXPECTED("Result of a tensorOp doesn't match output column names");
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid columns, the number of columns returned in 'map' operations should match "
|
||||
"the number of 'output_columns', but got the number of columns returned in 'map' operations: " +
|
||||
std::to_string(result_table[0].size()) +
|
||||
", the number of 'output_columns': " + std::to_string(out_columns_.size()) + ".");
|
||||
}
|
||||
|
||||
// Merging the data processed by job (result_table) with the data that are not used.
|
||||
|
@ -299,7 +303,8 @@ Status MapOp::InitPrivateVariable(std::unordered_map<std::string, int32_t> *col_
|
|||
if (in_columns_.empty()) {
|
||||
auto itr =
|
||||
std::find_if(col_name_id_map->begin(), col_name_id_map->end(), [](const auto &it) { return it.second == 0; });
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(itr != col_name_id_map->end(), "Column name id map doesn't have id 0");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(itr != col_name_id_map->end(),
|
||||
"[Internal ERROR] Column name id map doesn't have id 0");
|
||||
MS_LOG(INFO) << "Input columns empty for map op, will apply to the first column in the current table.";
|
||||
in_columns_.push_back(itr->first);
|
||||
|
||||
|
|
|
@ -74,7 +74,7 @@ TensorRow ProjectOp::Project(const TensorRow &row) {
|
|||
// However, the ProjectOp is defined as a inlined operator, so it is invalid to launch the
|
||||
// functor since this op runs inlined inside another operator. The function is overloaded to
|
||||
// ensure that it is not called by mistake (it will generate an error).
|
||||
Status ProjectOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. ProjectOp is an inlined operator."); }
|
||||
Status ProjectOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ProjectOp is an inlined operator."); }
|
||||
|
||||
Status ProjectOp::EoeReceived(int32_t worker_id) {
|
||||
state_ = OpState::kDeOpIdle;
|
||||
|
@ -92,7 +92,7 @@ Status ProjectOp::ComputeColMap() {
|
|||
for (size_t i = 0; i < columns_to_project_.size(); i++) {
|
||||
std::string ¤t_column = columns_to_project_[i];
|
||||
if (child_column_name_mapping.find(current_column) == child_column_name_mapping.end()) {
|
||||
std::string err_msg = "Invalid parameter, column name: " + current_column + " does not exist in dataset.";
|
||||
std::string err_msg = "Invalid column, column name: " + current_column + " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
// Setup the new column name mapping for ourself (base class field)
|
||||
|
|
|
@ -41,7 +41,7 @@ Status RenameOp::GetNextRow(TensorRow *row) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status RenameOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. RenameOp is an inlined operator."); }
|
||||
Status RenameOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] RenameOp is an inlined operator."); }
|
||||
|
||||
// Rename core functionality to compute the new column name id map.
|
||||
// We need to overwrite the super class ComputeColMap here because we're making a modification of the
|
||||
|
@ -71,7 +71,7 @@ Status RenameOp::ComputeColMap() {
|
|||
MS_LOG(DEBUG) << "Rename operator index found " << index << " value " << id << ".";
|
||||
if (new_col_name.find(out_columns_[index]) != new_col_name.end()) {
|
||||
std::string err_msg(
|
||||
"Invalid parameter, rename operation does not support rename one column name into another already exist "
|
||||
"Invalid column, rename operation does not support rename one column name into another already exist "
|
||||
"column name, existing column name is: " +
|
||||
out_columns_[index] + ".");
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
|
@ -82,7 +82,7 @@ Status RenameOp::ComputeColMap() {
|
|||
// not found
|
||||
if (new_col_name.find(name) != new_col_name.end()) {
|
||||
std::string err_msg(
|
||||
"Invalid parameter, rename operation does not support rename one column name into another already exist "
|
||||
"Invalid column, rename operation does not support rename one column name into another already exist "
|
||||
"column name, existing column name is: " +
|
||||
name + ".");
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
|
@ -95,7 +95,7 @@ Status RenameOp::ComputeColMap() {
|
|||
// only checks number of renamed columns have been found, this input check doesn't check everything
|
||||
if (found != in_columns_.size()) {
|
||||
MS_LOG(DEBUG) << "Rename operator column names found: " << found << " out of " << in_columns_.size() << ".";
|
||||
std::string err_msg = "Invalid parameter, column to be renamed does not exist in dataset.";
|
||||
std::string err_msg = "Invalid column, column to be renamed does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ void RepeatOp::Print(std::ostream &out, bool show_all) const {
|
|||
Status RepeatOp::GetNextRow(TensorRow *row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
if (child_.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("Pipeline init failed, RepeatOp can't be the first op in pipeline.");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Pipeline init failed, RepeatOp can't be the first op in pipeline.");
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(child_[0]->GetNextRow(row));
|
||||
|
@ -108,7 +108,7 @@ Status RepeatOp::EoeReceived(int32_t worker_id) {
|
|||
// However, the RepeatOp is defined as a inlined operator, so it is invalid to launch the
|
||||
// functor since this op runs inlined inside another operator. The function is overloaded to
|
||||
// ensure that it is not called by mistake (it will generate an error).
|
||||
Status RepeatOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. RepeatOp is an inlined operator."); }
|
||||
Status RepeatOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] RepeatOp is an inlined operator."); }
|
||||
|
||||
// Base-class override for handling cases when an eof is received.
|
||||
Status RepeatOp::EofReceived(int32_t worker_id) {
|
||||
|
|
|
@ -205,7 +205,8 @@ Status ShuffleOp::InitShuffleBuffer() {
|
|||
// rows.
|
||||
if (shuffle_buffer_state_ != kShuffleStateInit) {
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||
"Invalid shuffle buffer state, shuffle buffer should be init first or reset after each epoch.");
|
||||
"[Internal ERROR] Invalid shuffle buffer state, shuffle buffer should be init first or reset "
|
||||
"after each epoch.");
|
||||
}
|
||||
|
||||
// Before we drop into the fetching loop, call the fetch once for the first time
|
||||
|
@ -220,7 +221,7 @@ Status ShuffleOp::InitShuffleBuffer() {
|
|||
}
|
||||
|
||||
if (new_row.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, unable to fetch a single row for shuffle buffer.");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Unable to fetch a single row for shuffle buffer.");
|
||||
}
|
||||
|
||||
// Now fill the rest of the shuffle buffer until we are unable to get the next row or we reached
|
||||
|
|
|
@ -43,7 +43,7 @@ void SkipOp::Print(std::ostream &out, bool show_all) const {
|
|||
}
|
||||
}
|
||||
|
||||
Status SkipOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); }
|
||||
Status SkipOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] SkipOp is an inlined operator."); }
|
||||
|
||||
Status SkipOp::GetNextRow(TensorRow *row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
|
|
|
@ -64,7 +64,7 @@ Status AlbumOp::PrepareData() {
|
|||
dirname_offset_ = folder_path_.length();
|
||||
std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder);
|
||||
if (!folder.Exists() || dirItr == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_ + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid folder, " + folder_path_ + " does not exist or permission denied.");
|
||||
}
|
||||
MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << ".";
|
||||
|
||||
|
@ -94,7 +94,7 @@ Status AlbumOp::PrepareData() {
|
|||
// This function does not return status because we want to just skip bad input, not crash
|
||||
bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
|
||||
if (valid == nullptr) {
|
||||
MS_LOG(ERROR) << "Album parameter can't be nullptr.";
|
||||
MS_LOG(ERROR) << "[Internal ERROR] Album parameter can't be nullptr.";
|
||||
return false;
|
||||
}
|
||||
std::ifstream file_handle;
|
||||
|
@ -214,8 +214,8 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
|
|||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither int32 nor int64, it is " +
|
||||
data_schema_->Column(col_num).Type().ToString());
|
||||
RETURN_STATUS_UNEXPECTED("Invalid column type, column type of " + data_schema_->Column(col_num).Name() +
|
||||
" should be int32 or int64, but got " + data_schema_->Column(col_num).Type().ToString());
|
||||
}
|
||||
row->push_back(std::move(label));
|
||||
return Status::OK();
|
||||
|
@ -243,7 +243,8 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col
|
|||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array));
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither float32 nor float64, it is " +
|
||||
RETURN_STATUS_UNEXPECTED("Invalid column type, column type of " + data_schema_->Column(col_num).Name() +
|
||||
" should be float32 nor float64, but got " +
|
||||
data_schema_->Column(col_num).Type().ToString());
|
||||
}
|
||||
row->push_back(std::move(float_array));
|
||||
|
@ -323,7 +324,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
|
|||
|
||||
std::ifstream file_handle(folder_path_ + file);
|
||||
if (!file_handle.is_open()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid json file, " + folder_path_ + file + " does not exist or permission denied.");
|
||||
}
|
||||
std::string line;
|
||||
while (getline(file_handle, line)) {
|
||||
|
@ -342,7 +343,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
|
|||
}
|
||||
} catch (const std::exception &err) {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, " + folder_path_ + file + " load failed: " + std::string(err.what()));
|
||||
}
|
||||
}
|
||||
file_handle.close();
|
||||
|
|
|
@ -60,16 +60,16 @@ Status CelebAOp::ParseAttrFile() {
|
|||
|
||||
auto realpath = FileUtils::GetRealPath((folder_path / "list_attr_celeba.txt").ToString().data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << (folder_path / "list_attr_celeba.txt").ToString();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" +
|
||||
(folder_path / "list_attr_celeba.txt").ToString());
|
||||
MS_LOG(ERROR) << "Invalid file path, " << (folder_path / "list_attr_celeba.txt").ToString() << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, " + (folder_path / "list_attr_celeba.txt").ToString() +
|
||||
" does not exist.");
|
||||
}
|
||||
|
||||
std::ifstream attr_file(realpath.value());
|
||||
if (!attr_file.is_open()) {
|
||||
std::string attr_file_name = (folder_path / "list_attr_celeba.txt").ToString();
|
||||
return Status(StatusCode::kMDFileNotExist, __LINE__, __FILE__,
|
||||
"Invalid file, failed to open Celeba attr file: " + attr_file_name);
|
||||
"Invalid attr file, failed to open: " + attr_file_name + ", permission denied.");
|
||||
}
|
||||
|
||||
attr_file_ = (folder_path / "list_attr_celeba.txt").ToString();
|
||||
|
@ -89,12 +89,11 @@ Status CelebAOp::ParseAttrFile() {
|
|||
try {
|
||||
num_rows_in_attr_file_ = static_cast<int64_t>(std::stoul(rows_num)); // First line is rows number in attr file
|
||||
} catch (std::invalid_argument &e) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, failed to convert rows_num from attr_file to unsigned long, invalid value: " + rows_num + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid rows_num, failed to convert rows_num: " + rows_num + " to unsigned long in " +
|
||||
attr_file_ + ".");
|
||||
} catch (std::out_of_range &e) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, failed to convert rows_num from attr_file to unsigned long, value out of range: " + rows_num +
|
||||
".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid rows_num, rows_num in " + attr_file_ + " is out of range, rows_num is " +
|
||||
rows_num + ".");
|
||||
}
|
||||
|
||||
(void)getline(attr_file, attr_name); // Second line is attribute name,ignore it
|
||||
|
@ -125,8 +124,8 @@ bool CelebAOp::CheckDatasetTypeValid() {
|
|||
Path folder_path(folder_path_);
|
||||
partition_file_.open((folder_path / "list_eval_partition.txt").ToString());
|
||||
if (!partition_file_.is_open()) {
|
||||
MS_LOG(ERROR) << "Invalid file, fail to open CelebA partition file, path="
|
||||
<< (folder_path / "list_eval_partition.txt").ToString();
|
||||
MS_LOG(ERROR) << "Invalid eval partition file, failed to open eval partition file: "
|
||||
<< (folder_path / "list_eval_partition.txt").ToString() << " does not exist or permission denied.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -140,10 +139,12 @@ bool CelebAOp::CheckDatasetTypeValid() {
|
|||
try {
|
||||
type = std::stoi(vec[1]);
|
||||
} catch (std::invalid_argument &e) {
|
||||
MS_LOG(WARNING) << "Invalid data, failed to convert to int, invalid value: " << vec[1] << ".";
|
||||
MS_LOG(WARNING) << "Invalid number, the second word in list_eval_partition.txt should be numeric, but got: "
|
||||
<< vec[1] << ".";
|
||||
return false;
|
||||
} catch (std::out_of_range &e) {
|
||||
MS_LOG(WARNING) << "Invalid data, failed to convert to int, value out of range: " << vec[1] << ".";
|
||||
MS_LOG(WARNING) << "Invalid number, the second word in list_eval_partition.txt is out of range, word is: " << vec[1]
|
||||
<< ".";
|
||||
return false;
|
||||
}
|
||||
// train:0, valid=1, test=2
|
||||
|
@ -185,12 +186,11 @@ Status CelebAOp::PrepareData() {
|
|||
try {
|
||||
value = std::stoi(split[label_index]);
|
||||
} catch (std::invalid_argument &e) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, failed to convert item from attr_file to int, corresponding value: " +
|
||||
split[label_index] + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid label index, the label index in " + file_path.ToString() +
|
||||
" should be numeric, but got: " + split[label_index] + ".");
|
||||
} catch (std::out_of_range &e) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, failed to convert item from attr_file to int as out of range, corresponding value: " +
|
||||
split[label_index] + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid label index, the label index in " + file_path.ToString() +
|
||||
" is out of range, index is " + split[label_index] + ".");
|
||||
}
|
||||
image_labels.second.push_back(value);
|
||||
}
|
||||
|
@ -242,7 +242,8 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
|
|||
Status rc = Decode(image, &image);
|
||||
if (rc.IsError()) {
|
||||
image = nullptr;
|
||||
std::string err_msg = "Invalid data, failed to decode image: " + image_path.ToString();
|
||||
std::string err_msg =
|
||||
"Invalid image, " + image_path.ToString() + " decode failed, the image is broken or permission denied.";
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -113,7 +113,7 @@ Status CifarOp::ReadCifar10BlockData() {
|
|||
// check the validity of the file path
|
||||
Path file_path(file);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
|
||||
"Invalid file, failed to find cifar10 file: " + file);
|
||||
"Invalid cifar10 file, " + file + " does not exist or is a directory.");
|
||||
std::string file_name = file_path.Basename();
|
||||
|
||||
if (usage_ == "train") {
|
||||
|
@ -125,12 +125,12 @@ Status CifarOp::ReadCifar10BlockData() {
|
|||
}
|
||||
|
||||
std::ifstream in(file, std::ios::binary);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar10 file: " + file +
|
||||
", make sure file not damaged or permission denied.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
in.is_open(), "Invalid cifar10 file, failed to open " + file + ", the file is damaged or permission denied.");
|
||||
|
||||
for (uint32_t index = 0; index < num_cifar10_records / kCifarBlockImageNum; ++index) {
|
||||
(void)in.read(reinterpret_cast<char *>(&(image_data[0])), block_size * sizeof(unsigned char));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar10 file: " + file +
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid cifar10 file, failed to read data from: " + file +
|
||||
", re-download dataset(make sure it is CIFAR-10 binary version).");
|
||||
(void)cifar_raw_data_block_->EmplaceBack(image_data);
|
||||
// Add file path info
|
||||
|
@ -155,7 +155,7 @@ Status CifarOp::ReadCifar100BlockData() {
|
|||
// check the validity of the file path
|
||||
Path file_path(file);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
|
||||
"Invalid file, failed to find cifar100 file: " + file);
|
||||
"Invalid cifar100 file, " + file + " does not exist or is a directory.");
|
||||
std::string file_name = file_path.Basename();
|
||||
|
||||
// if usage is train/test, get only these 2 files
|
||||
|
@ -167,16 +167,16 @@ Status CifarOp::ReadCifar100BlockData() {
|
|||
} else if (file_name.find("train") != std::string::npos) {
|
||||
num_cifar100_records = num_cifar100_train_records;
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, Cifar100 train/test file not found in: " + file_name);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid cifar100 file, Cifar100 train/test file is missing in: " + file_name);
|
||||
}
|
||||
|
||||
std::ifstream in(file, std::ios::binary);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar100 file: " + file +
|
||||
", make sure file not damaged or permission denied.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
in.is_open(), "Invalid cifar100 file, failed to open " + file + ", the file is damaged or permission denied.");
|
||||
|
||||
for (uint32_t index = 0; index < num_cifar100_records / kCifarBlockImageNum; index++) {
|
||||
(void)in.read(reinterpret_cast<char *>(&(image_data[0])), block_size * sizeof(unsigned char));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar100 file: " + file +
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid cifar100 file, failed to read data from: " + file +
|
||||
", re-download dataset(make sure it is CIFAR-100 binary version).");
|
||||
(void)cifar_raw_data_block_->EmplaceBack(image_data);
|
||||
// Add file path info
|
||||
|
@ -200,10 +200,10 @@ Status CifarOp::GetCifarFiles() {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open directory: " + dir_path.ToString() +
|
||||
", make sure file not damaged or permission denied.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid directory, " + dir_path.ToString() + " is not a directory or permission denied.");
|
||||
}
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!cifar_files_.empty(), "Invalid file, no .bin files found under " + folder_path_);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!cifar_files_.empty(),
|
||||
"Invalid cifar folder, cifar(.bin) files are missing under " + folder_path_);
|
||||
std::sort(cifar_files_.begin(), cifar_files_.end());
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -306,9 +306,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage,
|
|||
constexpr int64_t num_cifar10_records = 10000;
|
||||
for (auto &file : op->cifar_files_) {
|
||||
Path file_path(file);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
file_path.Exists() && !file_path.IsDirectory(),
|
||||
"Invalid file, failed to open cifar10 file: " + file + ", make sure file not damaged or permission denied.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
|
||||
"Invalid cifar10 file, " + file + " does not exist or is a directory.");
|
||||
std::string file_name = file_path.Basename();
|
||||
|
||||
if (op->usage_ == "train") {
|
||||
|
@ -321,8 +320,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage,
|
|||
|
||||
std::ifstream in(file, std::ios::binary);
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar10 file: " + file +
|
||||
", make sure file not damaged or permission denied.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
in.is_open(), "Invalid cifar10 file, failed to open " + file + ", the file is damaged or permission denied.");
|
||||
*count = *count + num_cifar10_records;
|
||||
}
|
||||
return Status::OK();
|
||||
|
@ -334,9 +333,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage,
|
|||
Path file_path(file);
|
||||
std::string file_name = file_path.Basename();
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
file_path.Exists() && !file_path.IsDirectory(),
|
||||
"Invalid file, failed to find cifar100 file: " + file + ", make sure file not damaged or permission denied.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
|
||||
"Invalid cifar100 file, " + file + " does not exist or is a directory.");
|
||||
|
||||
if (op->usage_ == "train" && file_path.Basename().find("train") == std::string::npos) continue;
|
||||
if (op->usage_ == "test" && file_path.Basename().find("test") == std::string::npos) continue;
|
||||
|
@ -347,8 +345,8 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage,
|
|||
num_cifar100_records += kCifar100RecordsPerTrainFile;
|
||||
}
|
||||
std::ifstream in(file, std::ios::binary);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(in.is_open(), "Invalid file, failed to open cifar100 file: " + file +
|
||||
", make sure file not damaged or permission denied.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
in.is_open(), "Invalid cifar100 file, failed to open " + file + ", the file is damaged or permission denied.");
|
||||
}
|
||||
*count = num_cifar100_records;
|
||||
return Status::OK();
|
||||
|
|
|
@ -56,7 +56,8 @@ Status CityscapesOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
} else {
|
||||
std::ifstream file_handle(data.second);
|
||||
if (!file_handle.is_open()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + data.second);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + data.second +
|
||||
", the json is damaged or permission denied.");
|
||||
}
|
||||
std::string contents((std::istreambuf_iterator<char>(file_handle)), std::istreambuf_iterator<char>());
|
||||
nlohmann::json contents_js = nlohmann::json::parse(contents);
|
||||
|
@ -71,13 +72,15 @@ Status CityscapesOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
if (decode_ == true) {
|
||||
Status rc = Decode(image, &image);
|
||||
if (rc.IsError()) {
|
||||
std::string err = "Invalid data, failed to decode image: " + data.first;
|
||||
std::string err =
|
||||
"Invalid image, failed to decode " + data.first + ", the image is damaged or permission denied.";
|
||||
RETURN_STATUS_UNEXPECTED(err);
|
||||
}
|
||||
if (task_ != taskSuffix) {
|
||||
Status rc_t = Decode(task, &task);
|
||||
if (rc_t.IsError()) {
|
||||
std::string err_t = "Invalid data, failed to decode image: " + data.second;
|
||||
std::string err_t =
|
||||
"Invalid image, failed to decode " + data.second + ", the image is damaged or permission denied.";
|
||||
RETURN_STATUS_UNEXPECTED(err_t);
|
||||
}
|
||||
}
|
||||
|
@ -106,8 +109,8 @@ void CityscapesOp::Print(std::ostream &out, bool show_all) const {
|
|||
Status CityscapesOp::PrepareData() {
|
||||
auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data());
|
||||
if (!real_dataset_dir.has_value()) {
|
||||
MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_;
|
||||
RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_);
|
||||
MS_LOG(ERROR) << "Invalid file path, Cityscapes Dataset dir: " << dataset_dir_ << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, Cityscapes Dataset dir: " + dataset_dir_ + " does not exist.");
|
||||
}
|
||||
|
||||
Path dataset_dir(real_dataset_dir.value());
|
||||
|
@ -143,15 +146,18 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con
|
|||
|
||||
Path images_dir_p(images_dir);
|
||||
if (!images_dir_p.IsDirectory()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, " + images_dir_p.ToString() + " is an invalid directory path.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, Cityscapes Dataset image dir: " + images_dir_p.ToString() +
|
||||
" is not a directory path.");
|
||||
}
|
||||
Path task_dir_p(task_dir);
|
||||
if (!task_dir_p.IsDirectory()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, " + task_dir_p.ToString() + " is an invalid directory path.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, Cityscapes Dataset task dir: " + task_dir_p.ToString() +
|
||||
" is not a directory path.");
|
||||
}
|
||||
std::shared_ptr<Path::DirIterator> d_it = Path::DirIterator::OpenDirectory(&images_dir_p);
|
||||
if (d_it == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + images_dir_p.ToString());
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open Cityscapes Dataset image directory: " +
|
||||
images_dir_p.ToString());
|
||||
}
|
||||
|
||||
while (d_it->HasNext()) {
|
||||
|
@ -165,7 +171,8 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con
|
|||
Path task_city_dir = task_dir_p / city_dir.Basename();
|
||||
std::shared_ptr<Path::DirIterator> img_city_it = Path::DirIterator::OpenDirectory(&img_city_dir);
|
||||
if (img_city_it == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + img_city_dir.ToString());
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open Cityscapes Dataset image city directory: " +
|
||||
img_city_dir.ToString());
|
||||
}
|
||||
|
||||
while (img_city_it->HasNext()) {
|
||||
|
@ -179,13 +186,15 @@ Status CityscapesOp::GetCityscapesDataByUsage(const std::string &images_dir, con
|
|||
Path task_file_path = task_city_dir / (img_file_name.substr(0, img_file_name.find("_leftImg8bit")) + "_" +
|
||||
GetTaskSuffix(task_, real_quality_mode));
|
||||
if (!task_file_path.Exists()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, " + task_file_path.ToString() + " not found.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, Cityscapes Dataset task file: " + task_file_path.ToString() +
|
||||
" does not exist.");
|
||||
}
|
||||
|
||||
image_task_map_[image_file_path.ToString()] = task_file_path.ToString();
|
||||
}
|
||||
} catch (const std::exception &err) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to load Cityscapes Dataset: " + dataset_dir_);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to load Cityscapes Dataset from " + dataset_dir_ + ": " +
|
||||
std::string(err.what()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -213,7 +222,9 @@ Status CityscapesOp::CountDatasetInfo() {
|
|||
num_rows_ = static_cast<int64_t>(image_task_pairs_.size());
|
||||
if (num_rows_ == 0) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, no valid data matching the dataset API CityscapesDataset. Please check file path or dataset API.");
|
||||
"Invalid data, no valid data matching the dataset API 'CityscapesDataset'. Please check dataset API or file "
|
||||
"path: " +
|
||||
dataset_dir_ + ".");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ Status ClueOp::GetValue(const nlohmann::json &js, std::vector<std::string> key_c
|
|||
if (cursor.find(key_chain[i]) != cursor.end()) {
|
||||
cursor = cursor[key_chain[i]];
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, in given JSON file, failed to find key: " + key_chain[i]);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid json file, in given JSON file, failed to find key: " + key_chain[i]);
|
||||
}
|
||||
}
|
||||
std::string final_str = key_chain.back();
|
||||
|
@ -84,13 +84,13 @@ Status ClueOp::GetValue(const nlohmann::json &js, std::vector<std::string> key_c
|
|||
Status ClueOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) {
|
||||
auto realpath = FileUtils::GetRealPath(file.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file);
|
||||
std::string err_msg = "Invalid file path, " + file + " does not exist.";
|
||||
LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
}
|
||||
|
||||
std::ifstream handle(realpath.value());
|
||||
if (!handle.is_open()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + file);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file + ", the file is damaged or permission denied.");
|
||||
}
|
||||
|
||||
int64_t rows_total = 0;
|
||||
|
@ -115,7 +115,7 @@ Status ClueOp::LoadFile(const std::string &file, int64_t start_offset, int64_t e
|
|||
js = nlohmann::json::parse(line);
|
||||
} catch (const std::exception &err) {
|
||||
// Catch any exception and convert to Status return code
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse JSON file: " + file);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid json, failed to parse " + file + ", " + std::string(err.what()));
|
||||
}
|
||||
int cols_count = cols_to_keyword_.size();
|
||||
TensorRow t_row(cols_count, nullptr);
|
||||
|
@ -219,7 +219,7 @@ Status ClueOp::CalculateNumRowsPerShard() {
|
|||
}
|
||||
std::string file_list = ss.str();
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, CLUEDataset API can't read the data file (interface mismatch or no data found). "
|
||||
"Invalid data, 'CLUEDataset' API can't read the data file (interface mismatch or no data found). "
|
||||
"Check file path:" +
|
||||
file_list);
|
||||
}
|
||||
|
@ -232,13 +232,13 @@ Status ClueOp::CalculateNumRowsPerShard() {
|
|||
int64_t CountTotalRowsPerFile(const std::string &file) {
|
||||
auto realpath = FileUtils::GetRealPath(file.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Get real path failed, path=" << file;
|
||||
MS_LOG(ERROR) << "Invalid file, " << file << " does not exist.";
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::ifstream handle(realpath.value());
|
||||
if (!handle.is_open()) {
|
||||
MS_LOG(ERROR) << "Invalid file, failed to open file: " << file;
|
||||
MS_LOG(ERROR) << "Invalid file, failed to open " << file << ": the file is damaged or permission denied.";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -78,8 +78,8 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
std::shared_ptr<Tensor> image, coordinate;
|
||||
auto itr = coordinate_map_.find(image_id);
|
||||
if (itr == coordinate_map_.end()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id +
|
||||
" in annotation node is not found in image node in JSON file.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + image_id +
|
||||
" is missing from image node in annotation file: " + annotation_path_);
|
||||
}
|
||||
|
||||
std::string kImageFile = image_folder_path_ + std::string("/") + image_id;
|
||||
|
@ -115,7 +115,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
} else if (task_type_ == TaskType::Panoptic) {
|
||||
RETURN_IF_NOT_OK(LoadMixTensorRow(row_id, image_id, image, coordinate, trow));
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff or Panoptic.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid task, task type should be Detection, Stuff, Keypoint or Panoptic.");
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
@ -128,8 +128,8 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima
|
|||
std::vector<uint32_t> iscrowd_row;
|
||||
auto itr_item = simple_item_map_.find(image_id);
|
||||
if (itr_item == simple_item_map_.end()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id +
|
||||
" in annotation node is not found in image node in JSON file.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + image_id +
|
||||
" is missing in the node of image from annotation file: " + annotation_path_ + ".");
|
||||
}
|
||||
|
||||
std::vector<uint32_t> annotation = itr_item->second;
|
||||
|
@ -153,7 +153,7 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima
|
|||
std::string img_id;
|
||||
size_t pos = image_id.find(".");
|
||||
if (pos == std::string::npos) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\"");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid image, 'image_id': " + image_id + " should be with suffix like \".jpg\"");
|
||||
}
|
||||
std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id));
|
||||
std::shared_ptr<Tensor> filename;
|
||||
|
@ -171,8 +171,8 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_
|
|||
std::vector<uint32_t> item_queue;
|
||||
auto itr_item = simple_item_map_.find(image_id);
|
||||
if (itr_item == simple_item_map_.end()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id +
|
||||
" in annotation node is not found in image node in JSON file.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid image_id, the attribute of 'image_id': " + image_id +
|
||||
" is missing in the node of 'image' from annotation file: " + annotation_path_);
|
||||
}
|
||||
|
||||
item_queue = itr_item->second;
|
||||
|
@ -186,7 +186,7 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_
|
|||
std::string img_id;
|
||||
size_t pos = image_id.find(".");
|
||||
if (pos == std::string::npos) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\"");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid image, 'image_id': " + image_id + " should be with suffix like \".jpg\"");
|
||||
}
|
||||
std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id));
|
||||
std::shared_ptr<Tensor> filename;
|
||||
|
@ -206,8 +206,8 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id,
|
|||
std::vector<uint32_t> area_row;
|
||||
auto itr_item = simple_item_map_.find(image_id);
|
||||
if (itr_item == simple_item_map_.end()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + image_id +
|
||||
" in annotation node is not found in image node in JSON file.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid image_id, the attribute of 'image_id': " + image_id +
|
||||
" is missing in the node of 'image' from annotation file: " + annotation_path_);
|
||||
}
|
||||
|
||||
std::vector<uint32_t> annotation = itr_item->second;
|
||||
|
@ -237,7 +237,7 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id,
|
|||
std::string img_id;
|
||||
size_t pos = image_id.find(".");
|
||||
if (pos == std::string::npos) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, image : " + image_id + ", should be with suffix like \".jpg\"");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid image, " + image_id + " should be with suffix like \".jpg\"");
|
||||
}
|
||||
std::copy(image_id.begin(), image_id.begin() + pos, std::back_inserter(img_id));
|
||||
std::shared_ptr<Tensor> filename;
|
||||
|
@ -252,7 +252,9 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id,
|
|||
template <typename T>
|
||||
Status CocoOp::SearchNodeInJson(const nlohmann::json &input_tree, std::string node_name, T *output_node) {
|
||||
auto node = input_tree.find(node_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node != input_tree.end(), "Invalid data, required node not found in JSON: " + node_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node != input_tree.end(), "Invalid annotation, the attribute of '" + node_name +
|
||||
"' is missing in annotation file: " + annotation_path_ +
|
||||
".");
|
||||
(*output_node) = *node;
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -262,17 +264,19 @@ Status CocoOp::PrepareData() {
|
|||
try {
|
||||
auto realpath = FileUtils::GetRealPath(annotation_path_.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << annotation_path_;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + annotation_path_);
|
||||
std::string err_msg = "Invalid file path, Coco Dataset annotation file: " + annotation_path_ + " does not exist.";
|
||||
LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
}
|
||||
|
||||
std::ifstream in(realpath.value());
|
||||
if (!in.is_open()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open annotation file: " + annotation_path_);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation file, Coco Dataset annotation file: " + annotation_path_ +
|
||||
" open failed, permission denied!");
|
||||
}
|
||||
in >> js;
|
||||
} catch (const std::exception &err) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open JSON file: " + annotation_path_ + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation file, Coco Dataset annotation file:" + annotation_path_ +
|
||||
" load failed, error description: " + std::string(err.what()));
|
||||
}
|
||||
|
||||
std::vector<std::string> image_que;
|
||||
|
@ -292,8 +296,8 @@ Status CocoOp::PrepareData() {
|
|||
RETURN_IF_NOT_OK(SearchNodeInJson(annotation, std::string(kJsonAnnoImageId), &image_id));
|
||||
auto itr_file = image_index_.find(image_id);
|
||||
if (itr_file == image_index_.end()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, image_id: " + std::to_string(image_id) +
|
||||
" in annotation node is not found in image node in JSON file.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'image_id': " + std::to_string(image_id) +
|
||||
" is missing in the node of 'image' from annotation file: " + annotation_path_);
|
||||
}
|
||||
file_name = itr_file->second;
|
||||
switch (task_type_) {
|
||||
|
@ -313,7 +317,7 @@ Status CocoOp::PrepareData() {
|
|||
RETURN_IF_NOT_OK(PanopticColumnLoad(annotation, file_name, image_id));
|
||||
break;
|
||||
default:
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid task, task type should be Detection, Stuff, Keypoint or Panoptic.");
|
||||
}
|
||||
}
|
||||
for (auto img : image_que) {
|
||||
|
@ -322,7 +326,7 @@ Status CocoOp::PrepareData() {
|
|||
num_rows_ = image_ids_.size();
|
||||
if (num_rows_ == 0) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, CocoDataset API can't read the data file (interface mismatch or no data found). "
|
||||
"Invalid data, 'CocoDataset' API can't read the data file (interface mismatch or no data found). "
|
||||
"Check file in directory: " +
|
||||
image_folder_path_ + ".");
|
||||
}
|
||||
|
@ -331,7 +335,8 @@ Status CocoOp::PrepareData() {
|
|||
|
||||
Status CocoOp::ImageColumnLoad(const nlohmann::json &image_tree, std::vector<std::string> *image_vec) {
|
||||
if (image_tree.size() == 0) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, no \"image\" node found in JSON file: " + annotation_path_ + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'image' node is missing in annotation file: " + annotation_path_ +
|
||||
".");
|
||||
}
|
||||
for (auto img : image_tree) {
|
||||
std::string file_name;
|
||||
|
@ -354,8 +359,8 @@ Status CocoOp::DetectionColumnLoad(const nlohmann::json &annotation_tree, const
|
|||
RETURN_IF_NOT_OK(SearchNodeInJson(annotation_tree, std::string(kJsonAnnoCategoryId), &category_id));
|
||||
auto search_category = category_set_.find(category_id);
|
||||
if (search_category == category_set_.end())
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, category_id can't find in categories where category_id: " + std::to_string(category_id) + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'category_id': " + std::to_string(category_id) +
|
||||
" is missing in the node of 'categories' from annotation file: " + annotation_path_);
|
||||
auto node_iscrowd = annotation_tree.find(kJsonAnnoIscrowd);
|
||||
if (node_iscrowd != annotation_tree.end()) iscrowd = *node_iscrowd;
|
||||
bbox.insert(bbox.end(), node_bbox.begin(), node_bbox.end());
|
||||
|
@ -392,13 +397,13 @@ Status CocoOp::KeypointColumnLoad(const nlohmann::json &annotation_tree, const s
|
|||
const int32_t &unique_id) {
|
||||
auto itr_num_keypoint = annotation_tree.find(kJsonAnnoNumKeypoints);
|
||||
if (itr_num_keypoint == annotation_tree.end())
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, no num_keypoint found in annotation file where image_id: " + std::to_string(unique_id) + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'num_keypoint' node is missing in annotation file: " +
|
||||
annotation_path_ + " where 'image_id': " + std::to_string(unique_id) + ".");
|
||||
simple_item_map_[image_file].push_back(*itr_num_keypoint);
|
||||
auto itr_keypoint = annotation_tree.find(kJsonAnnoKeypoints);
|
||||
if (itr_keypoint == annotation_tree.end())
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, no keypoint found in annotation file where image_id: " + std::to_string(unique_id) + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'keypoint' node is missing in annotation file: " +
|
||||
annotation_path_ + " where 'image_id': " + std::to_string(unique_id) + ".");
|
||||
coordinate_map_[image_file].push_back(*itr_keypoint);
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -407,31 +412,34 @@ Status CocoOp::PanopticColumnLoad(const nlohmann::json &annotation_tree, const s
|
|||
const int32_t &image_id) {
|
||||
auto itr_segments = annotation_tree.find(kJsonAnnoSegmentsInfo);
|
||||
if (itr_segments == annotation_tree.end())
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, no segments_info found in annotation file where image_id: " + std::to_string(image_id) + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation, the 'segments_info' node is missing in annotation file: " +
|
||||
annotation_path_ + " where 'image_id': " + std::to_string(image_id) + ".");
|
||||
for (auto info : *itr_segments) {
|
||||
std::vector<float> bbox;
|
||||
uint32_t category_id = 0;
|
||||
auto itr_bbox = info.find(kJsonAnnoBbox);
|
||||
if (itr_bbox == info.end())
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, no bbox found in segments_info(in annotation file) where image_id: " +
|
||||
std::to_string(image_id) + ".");
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid annotation, the 'bbox' attribute is missing in the node of 'segments_info' where 'image_id': " +
|
||||
std::to_string(image_id) + " from annotation file: " + annotation_path_ + ".");
|
||||
bbox.insert(bbox.end(), itr_bbox->begin(), itr_bbox->end());
|
||||
coordinate_map_[image_file].push_back(bbox);
|
||||
|
||||
RETURN_IF_NOT_OK(SearchNodeInJson(info, std::string(kJsonAnnoCategoryId), &category_id));
|
||||
auto search_category = category_set_.find(category_id);
|
||||
if (search_category == category_set_.end())
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, category_id can't find in categories where category_id: " + std::to_string(category_id) + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation, the attribute of 'category_id': " + std::to_string(category_id) +
|
||||
" is missing in the node of 'categories' from " + annotation_path_ + ".");
|
||||
auto itr_iscrowd = info.find(kJsonAnnoIscrowd);
|
||||
if (itr_iscrowd == info.end())
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, no iscrowd found in segments_info where image_id: " + std::to_string(image_id) + ".");
|
||||
"Invalid annotation, the attribute of 'iscrowd' is missing in the node of 'segments_info' where 'image_id': " +
|
||||
std::to_string(image_id) + " from annotation file: " + annotation_path_ + ".");
|
||||
auto itr_area = info.find(kJsonAnnoArea);
|
||||
if (itr_area == info.end())
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, no area found in segments_info where image_id: " + std::to_string(image_id) + ".");
|
||||
"Invalid annotation, the attribute of 'area' is missing in the node of 'segments_info' where 'image_id': " +
|
||||
std::to_string(image_id) + " from annotation file: " + annotation_path_ + ".");
|
||||
simple_item_map_[image_file].push_back(category_id);
|
||||
simple_item_map_[image_file].push_back(*itr_iscrowd);
|
||||
simple_item_map_[image_file].push_back(*itr_area);
|
||||
|
@ -441,7 +449,8 @@ Status CocoOp::PanopticColumnLoad(const nlohmann::json &annotation_tree, const s
|
|||
|
||||
Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) {
|
||||
if (categories_tree.size() == 0) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, no categories found in annotation_path: " + annotation_path_);
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid annotation, the 'categories' node is missing in annotation file: " + annotation_path_ + ".");
|
||||
}
|
||||
for (auto category : categories_tree) {
|
||||
int32_t id = 0;
|
||||
|
@ -449,7 +458,9 @@ Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) {
|
|||
std::vector<int32_t> label_info;
|
||||
auto itr_id = category.find(kJsonId);
|
||||
if (itr_id == category.end()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, no JSON id found in categories of " + annotation_path_);
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid annotation, the attribute of 'id' is missing in the node of 'categories' from annotation file: " +
|
||||
annotation_path_);
|
||||
}
|
||||
id = *itr_id;
|
||||
label_info.push_back(id);
|
||||
|
@ -458,13 +469,16 @@ Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) {
|
|||
auto itr_name = category.find(kJsonCategoriesName);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
itr_name != category.end(),
|
||||
"Invalid data, no categories name found in categories where id: " + std::to_string(id));
|
||||
"Invalid annotation, the attribute of 'name' is missing in the node of 'categories' where 'id': " +
|
||||
std::to_string(id));
|
||||
name = *itr_name;
|
||||
|
||||
if (task_type_ == TaskType::Panoptic) {
|
||||
auto itr_isthing = category.find(kJsonCategoriesIsthing);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(itr_isthing != category.end(),
|
||||
"Invalid data, nothing found in categories of " + annotation_path_);
|
||||
"Invalid annotation, the attribute of 'isthing' is missing in the node of "
|
||||
"'categories' from annotation file: " +
|
||||
annotation_path_);
|
||||
label_info.push_back(*itr_isthing);
|
||||
}
|
||||
label_index_.emplace_back(std::make_pair(name, label_info));
|
||||
|
@ -477,7 +491,8 @@ Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &c
|
|||
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(*tensor, tensor);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(rc.IsOk(), "Invalid data, failed to decode image: " + path);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
rc.IsOk(), "Invalid image, failed to decode " + path + ": the image is broken or permission denied.");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -505,8 +520,8 @@ Status CocoOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<i
|
|||
RETURN_UNEXPECTED_IF_NULL(output_class_indexing);
|
||||
if ((*output_class_indexing).empty()) {
|
||||
if ((task_type_ != TaskType::Detection) && (task_type_ != TaskType::Panoptic)) {
|
||||
MS_LOG(ERROR) << "Invalid parameter, GetClassIndex only valid in \"Detection\" and \"Panoptic\" task.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, GetClassIndex only valid in \"Detection\" and \"Panoptic\" task.");
|
||||
MS_LOG(ERROR) << "Invalid task, only 'Detection' and 'Panoptic' task support GetClassIndex.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid task, only 'Detection' and 'Panoptic' task support GetClassIndex.");
|
||||
}
|
||||
RETURN_IF_NOT_OK(PrepareData());
|
||||
for (const auto &label : label_index_) {
|
||||
|
|
|
@ -111,7 +111,11 @@ int CsvOp::CsvParser::PutRecord(int c) {
|
|||
std::string s = std::string(str_buf_.begin(), str_buf_.begin() + pos_);
|
||||
std::shared_ptr<Tensor> t;
|
||||
if (cur_col_ >= column_default_.size()) {
|
||||
err_message_ = "Number of file columns does not match the default records";
|
||||
std::stringstream ss;
|
||||
ss << "Invalid columns, the size of column_names should be less than the size of 'column_defaults', "
|
||||
<< "but got the size of column_names: " << cur_col_
|
||||
<< ", the size of column_defaults : " << column_default_.size() << ".";
|
||||
err_message_ = ss.str();
|
||||
return -1;
|
||||
}
|
||||
Status rc;
|
||||
|
@ -139,7 +143,11 @@ int CsvOp::CsvParser::PutRecord(int c) {
|
|||
break;
|
||||
}
|
||||
if (cur_col_ >= cur_row_.size()) {
|
||||
err_message_ = "Number of file columns does not match the tensor table";
|
||||
std::stringstream ss;
|
||||
ss << "Invalid columns, the size of column_names should be greater than or equal to the size of columns of "
|
||||
<< "loading data, but got the size of column_names: " << cur_col_
|
||||
<< ", the size of columns in original loaded dataset: " << column_default_.size() << ".";
|
||||
err_message_ = ss.str();
|
||||
return -1;
|
||||
}
|
||||
cur_row_[cur_col_] = std::move(t);
|
||||
|
@ -166,7 +174,11 @@ int CsvOp::CsvParser::PutRow(int c) {
|
|||
}
|
||||
|
||||
if (cur_col_ != column_default_.size()) {
|
||||
err_message_ = "The number of columns does not match the definition.";
|
||||
std::stringstream ss;
|
||||
ss << "Invalid columns, the size of column_names should be less than the size of 'column_defaults', "
|
||||
<< "but got the size of column_names: " << cur_col_
|
||||
<< ", the size of 'column_defaults': " << column_default_.size() << ".";
|
||||
err_message_ = ss.str();
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -201,11 +213,11 @@ int CsvOp::CsvParser::EndFile(int c) {
|
|||
|
||||
int CsvOp::CsvParser::CatchException(int c) {
|
||||
if (GetMessage(c) == Message::MS_QUOTE && cur_state_ == State::UNQUOTE) {
|
||||
err_message_ = "Invalid quote in unquote field.";
|
||||
err_message_ = "Invalid csv file, unexpected quote in unquote field from " + file_path_ + ".";
|
||||
} else if (GetMessage(c) == Message::MS_END_OF_FILE && cur_state_ == State::QUOTE) {
|
||||
err_message_ = "Reach the end of file in quote field.";
|
||||
err_message_ = "Invalid csv file, reach the end of file in quote field, check " + file_path_ + ".";
|
||||
} else if (GetMessage(c) == Message::MS_NORMAL && cur_state_ == State::SECOND_QUOTE) {
|
||||
err_message_ = "Receive unquote char in quote field.";
|
||||
err_message_ = "Invalid csv file, receive unquote char in quote field, check " + file_path_ + ".";
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
@ -459,14 +471,14 @@ Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t en
|
|||
|
||||
auto realpath = FileUtils::GetRealPath(file.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << file;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, " + DatasetName() + " file get real path failed, path=" + file);
|
||||
MS_LOG(ERROR) << "Invalid file path, " << file << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, " + file + " does not exist.");
|
||||
}
|
||||
|
||||
std::ifstream ifs;
|
||||
ifs.open(realpath.value(), std::ifstream::in);
|
||||
if (!ifs.is_open()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + " file: " + file);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file + ", the file is damaged or permission denied.");
|
||||
}
|
||||
if (column_name_list_.empty()) {
|
||||
std::string tmp;
|
||||
|
@ -483,17 +495,18 @@ Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t en
|
|||
if (err != 0) {
|
||||
// if error code is -2, the returned error is interrupted
|
||||
if (err == -2) return Status(kMDInterrupted);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse file: " + file + ": line " +
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse csv file: " + file + " at line " +
|
||||
std::to_string(csv_parser.GetTotalRows() + 1) +
|
||||
". Error message: " + csv_parser.GetErrorMessage());
|
||||
}
|
||||
}
|
||||
} catch (std::invalid_argument &ia) {
|
||||
std::string err_row = std::to_string(csv_parser.GetTotalRows() + 1);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, " + file + ": line " + err_row + ", type does not match.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid csv, csv file: " + file + " parse failed at line " + err_row +
|
||||
", type does not match.");
|
||||
} catch (std::out_of_range &oor) {
|
||||
std::string err_row = std::to_string(csv_parser.GetTotalRows() + 1);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, " + file + ": line " + err_row + ", value out of range.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid csv, " + file + " parse failed at line " + err_row + " : value out of range.");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -594,13 +607,14 @@ int64_t CsvOp::CountTotalRows(const std::string &file) {
|
|||
CsvParser csv_parser(0, jagged_rows_connector_.get(), field_delim_, column_default_list_, file);
|
||||
Status rc = csv_parser.InitCsvParser();
|
||||
if (rc.IsError()) {
|
||||
MS_LOG(ERROR) << "[Internal ERROR], failed to initialize " + DatasetName(true) + " Parser. Error:" << rc;
|
||||
MS_LOG(ERROR) << "[Internal ERROR], failed to initialize " + DatasetName(true) + " Parser. Error description:"
|
||||
<< rc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto realpath = FileUtils::GetRealPath(file.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << file;
|
||||
MS_LOG(ERROR) << "Invalid file path, csv file: " << file << " does not exist.";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -673,8 +687,8 @@ Status CsvOp::ComputeColMap() {
|
|||
|
||||
/* Process exception if ERROR in column name solving*/
|
||||
if (!rc.IsOk()) {
|
||||
MS_LOG(ERROR) << "Invalid file, fail to analyse column name map, path=" + csv_file;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, fail to analyse column name map, path=" + csv_file);
|
||||
MS_LOG(ERROR) << "Invalid file, failed to get column name list from csv file: " + csv_file;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to get column name list from csv file: " + csv_file);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -689,9 +703,10 @@ Status CsvOp::ComputeColMap() {
|
|||
|
||||
if (column_default_list_.size() != column_name_id_map_.size()) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid parameter, the number of column names does not match the default column, size of default column_list: " +
|
||||
"Invalid parameter, the size of column_names should be equal to the size of 'column_defaults', but got "
|
||||
" size of 'column_defaults': " +
|
||||
std::to_string(column_default_list_.size()) +
|
||||
", size of column_name: " + std::to_string(column_name_id_map_.size()));
|
||||
", size of column_names: " + std::to_string(column_name_id_map_.size()));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
@ -703,7 +718,7 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) {
|
|||
if (!check_flag_) {
|
||||
auto realpath = FileUtils::GetRealPath(csv_file_name.data());
|
||||
if (!realpath.has_value()) {
|
||||
std::string err_msg = "Invalid file, " + DatasetName() + " file get real path failed, path=" + csv_file_name;
|
||||
std::string err_msg = "Invalid file path, csv file: " + csv_file_name + " does not exist.";
|
||||
MS_LOG(ERROR) << err_msg;
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
@ -721,11 +736,9 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) {
|
|||
if (column_name_id_map_.find(col_names[i]) == column_name_id_map_.end()) {
|
||||
column_name_id_map_[col_names[i]] = i;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Invalid parameter, duplicate column names are not allowed: " + col_names[i] +
|
||||
", The corresponding data files: " + csv_file_name;
|
||||
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column names are not allowed: " + col_names[i] +
|
||||
", The corresponding data files: " + csv_file_name);
|
||||
MS_LOG(ERROR) << "Invalid parameter, duplicate column " << col_names[i] << " for csv file: " << csv_file_name;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column " + col_names[i] +
|
||||
" for csv file: " + csv_file_name);
|
||||
}
|
||||
}
|
||||
check_flag_ = true;
|
||||
|
@ -736,11 +749,10 @@ Status CsvOp::ColMapAnalyse(const std::string &csv_file_name) {
|
|||
if (column_name_id_map_.find(column_name_list_[i]) == column_name_id_map_.end()) {
|
||||
column_name_id_map_[column_name_list_[i]] = i;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Invalid parameter, duplicate column names are not allowed: " + column_name_list_[i] +
|
||||
", The corresponding data files: " + csv_file_name;
|
||||
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column names are not allowed: " +
|
||||
column_name_list_[i] + ", The corresponding data files: " + csv_file_name);
|
||||
MS_LOG(ERROR) << "Invalid parameter, duplicate column " << column_name_list_[i]
|
||||
<< " for csv file: " << csv_file_name << ".";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, duplicate column " + column_name_list_[i] +
|
||||
" for csv file: " + csv_file_name + ".");
|
||||
}
|
||||
}
|
||||
check_flag_ = true;
|
||||
|
@ -764,7 +776,7 @@ bool CsvOp::ColumnNameValidate() {
|
|||
for (auto &csv_file : csv_files_list_) {
|
||||
auto realpath = FileUtils::GetRealPath(csv_file.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " file get real path failed, path=" << csv_file;
|
||||
MS_LOG(ERROR) << "Invalid file path, csv file: " << csv_file << " does not exist.";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -781,9 +793,8 @@ bool CsvOp::ColumnNameValidate() {
|
|||
match_file = csv_file;
|
||||
} else { // Case the other files
|
||||
if (col_names != record) {
|
||||
MS_LOG(ERROR)
|
||||
<< "Invalid parameter, every corresponding column name must be identical, either element or permutation. "
|
||||
<< "Invalid files are: " + match_file + " and " + csv_file;
|
||||
MS_LOG(ERROR) << "Invalid parameter, every column name should be equal the record from csv, but got column: "
|
||||
<< col_names << ", csv record: " << record << ". Check " + match_file + " and " + csv_file + ".";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -76,13 +76,15 @@ Status DIV2KOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
if (decode_ == true) {
|
||||
Status hr_rc = Decode(hr_image, &hr_image);
|
||||
if (hr_rc.IsError()) {
|
||||
std::string err = "Invalid data, failed to decode image: " + data.first;
|
||||
std::string err =
|
||||
"Invalid image, failed to decode " + data.first + ", the image is damaged or permission denied.";
|
||||
RETURN_STATUS_UNEXPECTED(err);
|
||||
}
|
||||
|
||||
Status lr_rc = Decode(lr_image, &lr_image);
|
||||
if (lr_rc.IsError()) {
|
||||
std::string err = "Invalid data, failed to decode image: " + data.second;
|
||||
std::string err =
|
||||
"Invalid image, failed to decode " + data.second + ", the image is damaged or permission denied.";
|
||||
RETURN_STATUS_UNEXPECTED(err);
|
||||
}
|
||||
}
|
||||
|
@ -141,7 +143,7 @@ Status DIV2KOp::GetDIV2KLRDirRealName(const std::string &hr_dir_key, const std::
|
|||
out_str += ("\t" + item.first + ": " + item.second + ",\n");
|
||||
});
|
||||
out_str += "\n}";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid param, " + lr_dir_key + " not found in DatasetPramMap: \n" + out_str);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid param, dir: " + lr_dir_key + " not found under div2k dataset dir, " + out_str);
|
||||
}
|
||||
|
||||
if (downgrade_2017.find(downgrade_) != downgrade_2017.end() && scale_2017.find(scale_) != scale_2017.end()) {
|
||||
|
@ -158,8 +160,8 @@ Status DIV2KOp::GetDIV2KDataByUsage() {
|
|||
|
||||
auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data());
|
||||
if (!real_dataset_dir.has_value()) {
|
||||
MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_;
|
||||
RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_);
|
||||
MS_LOG(ERROR) << "Invalid file path, div2k dataset dir: " << dataset_dir_ << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, div2k dataset dir: " + dataset_dir_ + " does not exist.");
|
||||
}
|
||||
|
||||
Path dataset_dir(real_dataset_dir.value());
|
||||
|
@ -167,14 +169,15 @@ Status DIV2KOp::GetDIV2KDataByUsage() {
|
|||
Path lr_images_dir = dataset_dir / lr_dir_real_name_;
|
||||
|
||||
if (!hr_images_dir.IsDirectory()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, " + hr_images_dir.ToString() + " is an invalid directory path.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, div2k hr image dir: " + hr_images_dir.ToString() + " is not a directory.");
|
||||
}
|
||||
if (!lr_images_dir.IsDirectory()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, " + lr_images_dir.ToString() + " is an invalid directory path.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, div2k lr image dir: " + lr_images_dir.ToString() + " is not a directory.");
|
||||
}
|
||||
auto hr_it = Path::DirIterator::OpenDirectory(&hr_images_dir);
|
||||
if (hr_it == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + hr_images_dir.ToString());
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open div2k hr image dir: " + hr_images_dir.ToString() +
|
||||
", permission denied.");
|
||||
}
|
||||
|
||||
std::string image_name;
|
||||
|
@ -202,12 +205,14 @@ Status DIV2KOp::GetDIV2KDataByUsage() {
|
|||
|
||||
Path lr_image_file_path(lr_image_file_path_);
|
||||
if (!lr_image_file_path.Exists()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, " + lr_image_file_path.ToString() + " not found.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, div2k image file: " + lr_image_file_path.ToString() +
|
||||
" does not exist.");
|
||||
}
|
||||
|
||||
image_hr_lr_map_[hr_image_file_path.ToString()] = lr_image_file_path.ToString();
|
||||
} catch (const std::exception &err) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to load DIV2K Dataset: " + dataset_dir_);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to load DIV2K Dataset from " + dataset_dir_ + ": " +
|
||||
std::string(err.what()));
|
||||
}
|
||||
}
|
||||
for (auto item : image_hr_lr_map_) {
|
||||
|
@ -220,7 +225,8 @@ Status DIV2KOp::CountDatasetInfo() {
|
|||
num_rows_ = static_cast<int64_t>(image_hr_lr_pairs_.size());
|
||||
if (num_rows_ == 0) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, no valid data matching the dataset API DIV2KDataset. Please check file path or dataset API.");
|
||||
"Invalid data, no valid data matching the dataset API 'DIV2KDataset'. Please check dataset API or file path: " +
|
||||
dataset_dir_ + ".");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -58,11 +58,12 @@ Status EMnistOp::WalkAllFiles() {
|
|||
const std::string train_prefix = "-train";
|
||||
const std::string test_prefix = "-test";
|
||||
auto realpath = FileUtils::GetRealPath(folder_path_.data());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Get real path failed: " + folder_path_);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Invalid file path, " + folder_path_ + " does not exist.");
|
||||
Path dir(realpath.value());
|
||||
auto dir_it = Path::DirIterator::OpenDirectory(&dir);
|
||||
if (dir_it == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open directory: " + dir.ToString());
|
||||
RETURN_STATUS_UNEXPECTED("Invalid path, failed to open emnist dataset dir: " + dir.ToString() +
|
||||
", the directory is not a directory or permission denied.");
|
||||
}
|
||||
std::string prefix;
|
||||
prefix = "emnist-" + name_; // used to match usage == "all".
|
||||
|
@ -88,7 +89,9 @@ Status EMnistOp::WalkAllFiles() {
|
|||
std::sort(image_names_.begin(), image_names_.end());
|
||||
std::sort(label_names_.begin(), label_names_.end());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(),
|
||||
"Invalid data, num of images is not equal to num of labels.");
|
||||
"Invalid data, num of image files should be equal to num of label files under " +
|
||||
realpath.value() + ", but got num of images: " + std::to_string(image_names_.size()) +
|
||||
", num of labels: " + std::to_string(label_names_.size()) + ".");
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -118,12 +121,12 @@ Status EMnistOp::CountTotalRows(const std::string &dir, const std::string &name,
|
|||
for (size_t i = 0; i < op->image_names_.size(); ++i) {
|
||||
std::ifstream image_reader;
|
||||
image_reader.open(op->image_names_[i], std::ios::binary);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(),
|
||||
"Invalid file, failed to open image file: " + op->image_names_[i]);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), "Invalid file, failed to open " + op->image_names_[i] +
|
||||
": the image file is damaged or permission denied.");
|
||||
std::ifstream label_reader;
|
||||
label_reader.open(op->label_names_[i], std::ios::binary);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(),
|
||||
"Invalid file, failed to open label file: " + op->label_names_[i]);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), "Invalid file, failed to open " + op->label_names_[i] +
|
||||
": the label file is damaged or permission denied.");
|
||||
uint32_t num_images;
|
||||
Status s = op->CheckImage(op->image_names_[i], &image_reader, &num_images);
|
||||
image_reader.close();
|
||||
|
@ -134,8 +137,10 @@ Status EMnistOp::CountTotalRows(const std::string &dir, const std::string &name,
|
|||
label_reader.close();
|
||||
RETURN_IF_NOT_OK(s);
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
|
||||
"Invalid data, num of images is not equal to num of labels.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
(num_images == num_labels),
|
||||
"Invalid data, num of images should be equal to num of labels, but got num of images: " +
|
||||
std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + ".");
|
||||
*count = *count + num_images;
|
||||
}
|
||||
|
||||
|
|
|
@ -90,7 +90,8 @@ void FakeImageOp::Print(std::ostream &out, bool show_all) const {
|
|||
Status FakeImageOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const {
|
||||
if (cls_ids == nullptr || !cls_ids->empty() || label_list_.empty()) {
|
||||
if (label_list_.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("No image found in dataset. Check if image was generated successfully.");
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"[Internal ERROR] No image found in dataset. Check if image was generated successfully.");
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"[Internal ERROR] Map for storing image-index pair is nullptr or has been set in other place, "
|
||||
|
@ -126,7 +127,7 @@ Status FakeImageOp::PrepareData() {
|
|||
|
||||
label_list_.shrink_to_fit();
|
||||
num_rows_ = label_list_.size();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "Generate image failed, please check dataset API.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0, "Invalid data, generate fake data failed, please check dataset API.");
|
||||
image_tensor_.clear();
|
||||
image_tensor_.resize(num_rows_);
|
||||
return Status::OK();
|
||||
|
|
|
@ -56,12 +56,12 @@ Status FashionMnistOp::CountTotalRows(const std::string &dir, const std::string
|
|||
for (size_t i = 0; i < op->image_names_.size(); ++i) {
|
||||
std::ifstream image_reader;
|
||||
image_reader.open(op->image_names_[i], std::ios::binary);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(),
|
||||
"Invalid file, failed to open image file: " + op->image_names_[i]);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_reader.is_open(), "Invalid file, failed to open " + op->image_names_[i] +
|
||||
": the image file is damaged or permission denied.");
|
||||
std::ifstream label_reader;
|
||||
label_reader.open(op->label_names_[i], std::ios::binary);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(),
|
||||
"Invalid file, failed to open label file: " + op->label_names_[i]);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(label_reader.is_open(), "Invalid file, failed to open " + op->label_names_[i] +
|
||||
": the label file is damaged or permission denied.");
|
||||
uint32_t num_images;
|
||||
Status s = op->CheckImage(op->image_names_[i], &image_reader, &num_images);
|
||||
image_reader.close();
|
||||
|
@ -72,8 +72,10 @@ Status FashionMnistOp::CountTotalRows(const std::string &dir, const std::string
|
|||
label_reader.close();
|
||||
RETURN_IF_NOT_OK(s);
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
|
||||
"Invalid data, num of images is not equal to num of labels.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
(num_images == num_labels),
|
||||
"Invalid data, num of images should be equal to num of labels, but got num of images: " +
|
||||
std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + ".");
|
||||
*count = *count + num_images;
|
||||
}
|
||||
|
||||
|
|
|
@ -48,7 +48,8 @@ Status FlickrOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
if (decode_ == true) {
|
||||
Status rc = Decode(image, &image);
|
||||
if (rc.IsError()) {
|
||||
std::string err = "Invalid data, failed to decode image: " + data.first;
|
||||
std::string err =
|
||||
"Invalid image, failed to decode " + data.first + ": the image is damaged or permission denied!";
|
||||
RETURN_STATUS_UNEXPECTED(err);
|
||||
}
|
||||
}
|
||||
|
@ -76,13 +77,14 @@ void FlickrOp::Print(std::ostream &out, bool show_all) const {
|
|||
Status FlickrOp::PrepareData() {
|
||||
auto real_file_path = FileUtils::GetRealPath(file_path_.data());
|
||||
if (!real_file_path.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_path_;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_path_);
|
||||
MS_LOG(ERROR) << "Invalid file path, " << file_path_ << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_path_ + " does not exist.");
|
||||
}
|
||||
|
||||
std::ifstream file_handle(real_file_path.value());
|
||||
if (!file_handle.is_open()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Flickr annotation file: " + file_path_);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid annotation file, failed to open " + file_path_ +
|
||||
" : the file is damaged or permission denied.");
|
||||
}
|
||||
|
||||
std::string line;
|
||||
|
@ -102,16 +104,16 @@ Status FlickrOp::PrepareData() {
|
|||
image_name = line.substr(0, flag_idx - 2); // -2 because "#[0-4]\t"
|
||||
if (image_name.empty()) {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, image_name is not found in Flickr annotation file: " + file_path_ +
|
||||
"; line: " + line);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, the attribute of image_name is missing in flickr dataset file: " +
|
||||
file_path_ + ", line: " + line);
|
||||
}
|
||||
|
||||
image_file_path = (dataset_dir / image_name).ToString();
|
||||
std::string annotation = line.substr(flag_idx + 1);
|
||||
if (annotation.empty()) {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, annotation is not found in Flickr annotation file: " + file_path_ +
|
||||
"; line: " + line);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, the attribute of annotation is missing in flickr dataset file: " +
|
||||
file_path_ + ", line: " + line);
|
||||
}
|
||||
|
||||
bool valid = false;
|
||||
|
@ -127,7 +129,8 @@ Status FlickrOp::PrepareData() {
|
|||
image_annotation_map_[image_file_path].emplace_back(annotation);
|
||||
} catch (const std::exception &err) {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Flickr annotation file: " + file_path_);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse flickr dataset file: " + file_path_ + ": " +
|
||||
std::string(err.what()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -146,8 +149,8 @@ Status FlickrOp::PrepareData() {
|
|||
Status FlickrOp::CheckImageType(const std::string &file_name, bool *valid) {
|
||||
auto real_file_name = FileUtils::GetRealPath(file_name.data());
|
||||
if (!real_file_name.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_name;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_name);
|
||||
MS_LOG(ERROR) << "Invalid file path, flickr dataset file: " << file_name << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, flickr dataset file: " + file_name + " does not exist.");
|
||||
}
|
||||
|
||||
std::ifstream file_handle;
|
||||
|
@ -155,14 +158,16 @@ Status FlickrOp::CheckImageType(const std::string &file_name, bool *valid) {
|
|||
*valid = false;
|
||||
file_handle.open(real_file_name.value(), std::ios::binary | std::ios::in);
|
||||
if (!file_handle.is_open()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open image file: " + file_name);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid flickr file, failed to open " + file_name +
|
||||
": the file is damaged or permission denied.");
|
||||
}
|
||||
unsigned char file_type[read_num];
|
||||
(void)file_handle.read(reinterpret_cast<char *>(file_type), read_num);
|
||||
|
||||
if (file_handle.fail()) {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid flickr file, failed to read " + file_name +
|
||||
": the file is damaged or the file content is incomplete.");
|
||||
}
|
||||
file_handle.close();
|
||||
if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) {
|
||||
|
|
|
@ -64,7 +64,7 @@ Status GeneratorOp::CreateGeneratorObject() {
|
|||
// Acquire Python GIL
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
if (Py_IsInitialized() == 0) {
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized.");
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized.");
|
||||
}
|
||||
try {
|
||||
py::array sample_ids;
|
||||
|
@ -93,15 +93,17 @@ Status GeneratorOp::Init() {
|
|||
Status GeneratorOp::PyRowToTensorRow(py::object py_data, TensorRow *tensor_row) {
|
||||
if (!py::isinstance<py::tuple>(py_data)) {
|
||||
return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__,
|
||||
"Invalid data, Generator should return a tuple of NumPy arrays, currently returned is not a tuple.");
|
||||
"Invalid python function, the 'source' of 'GeneratorDataset' should return a tuple of NumPy arrays, "
|
||||
"but got " +
|
||||
std::string(py_data.get_type().str()));
|
||||
}
|
||||
py::tuple py_row = py_data.cast<py::tuple>();
|
||||
// Check if returned number of columns matches with column names
|
||||
if (py_row.size() != column_names_.size()) {
|
||||
return Status(
|
||||
StatusCode::kMDPyFuncException, __LINE__, __FILE__,
|
||||
"Invalid data, Generator should return same number of NumPy arrays as specified in column_names, the size of"
|
||||
" column_names is:" +
|
||||
"Invalid python function, the 'source' of 'GeneratorDataset' should return same number of NumPy arrays as "
|
||||
"specified in column_names, the size of column_names is:" +
|
||||
std::to_string(column_names_.size()) +
|
||||
" and number of returned NumPy array is:" + std::to_string(py_row.size()));
|
||||
}
|
||||
|
@ -110,15 +112,18 @@ Status GeneratorOp::PyRowToTensorRow(py::object py_data, TensorRow *tensor_row)
|
|||
py::object ret_py_ele = py_row[i];
|
||||
if (!py::isinstance<py::array>(ret_py_ele)) {
|
||||
return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__,
|
||||
"Invalid data, Generator should return a tuple of NumPy arrays. Ensure each item in tuple that "
|
||||
"returned by source function of GeneratorDataset be NumPy array.");
|
||||
"Invalid python function, 'GeneratorDataset' should return a tuple of NumPy arrays, but got " +
|
||||
std::string(ret_py_ele.get_type().str()));
|
||||
}
|
||||
std::shared_ptr<Tensor> tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_ele.cast<py::array>(), &tensor));
|
||||
if ((!column_types_.empty()) && (column_types_[i] != DataType::DE_UNKNOWN) &&
|
||||
(column_types_[i] != tensor->type())) {
|
||||
return Status(StatusCode::kMDPyFuncException, __LINE__, __FILE__,
|
||||
"Invalid data, type of returned data in GeneratorDataset is not same with specified column_types.");
|
||||
"Invalid python function, type of returned data in 'GeneratorDataset' should be same with "
|
||||
"specified column_types, but the type of returned data: " +
|
||||
std::string(ret_py_ele.get_type().str()) +
|
||||
", specified column type: " + column_types_[i].ToString());
|
||||
}
|
||||
tensor_row->push_back(tensor);
|
||||
}
|
||||
|
@ -173,7 +178,7 @@ Status GeneratorOp::operator()() {
|
|||
{
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
if (Py_IsInitialized() == 0) {
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
|
||||
}
|
||||
try {
|
||||
#ifndef ENABLE_SECURITY
|
||||
|
|
|
@ -93,7 +93,8 @@ Status ImageFolderOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
if (decode_ == true) {
|
||||
Status rc = Decode(image, &image);
|
||||
if (rc.IsError()) {
|
||||
std::string err = "Invalid data, failed to decode image: " + folder_path_ + (pair_ptr->first);
|
||||
std::string err = "Invalid image, " + folder_path_ + (pair_ptr->first) +
|
||||
" decode failed, the image is broken or permission denied.";
|
||||
RETURN_STATUS_UNEXPECTED(err);
|
||||
}
|
||||
}
|
||||
|
@ -121,7 +122,7 @@ void ImageFolderOp::Print(std::ostream &out, bool show_all) const {
|
|||
Status ImageFolderOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const {
|
||||
if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) {
|
||||
if (image_label_pairs_.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, " + DatasetName(true) +
|
||||
RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + DatasetName(true) +
|
||||
"Dataset API can't read the data file(interface mismatch or no data found). Check " +
|
||||
DatasetName() + " file path: " + folder_path_);
|
||||
} else {
|
||||
|
@ -156,7 +157,7 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) {
|
|||
Path folder(folder_path_ + folder_name);
|
||||
std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder);
|
||||
if (folder.Exists() == false || dirItr == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + folder_name);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + folder_name + " does not exist or permission denied.");
|
||||
}
|
||||
std::set<std::string> imgs; // use this for ordering
|
||||
while (dirItr->HasNext()) {
|
||||
|
@ -193,8 +194,8 @@ Status ImageFolderOp::RecursiveWalkFolder(Path *dir) {
|
|||
RETURN_IF_NOT_OK(folder_name_queue_->EmplaceBack(subdir.ToString().substr(dirname_offset_)));
|
||||
}
|
||||
if (recursive_ == true) {
|
||||
MS_LOG(ERROR) << "RecursiveWalkFolder(&subdir) functionality is disabled permanently. No recursive walk of "
|
||||
<< "directory will be performed.";
|
||||
MS_LOG(ERROR) << "[Internal ERROR] RecursiveWalkFolder(&subdir) functionality is disabled permanently. "
|
||||
<< "No recursive walk of directory will be performed.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -206,7 +207,7 @@ Status ImageFolderOp::StartAsyncWalk() {
|
|||
TaskManager::FindMe()->Post();
|
||||
Path dir(folder_path_);
|
||||
if (dir.Exists() == false || dir.IsDirectory() == false) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + folder_path_);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid dataset_dir, " + folder_path_ + " may not exist or the path is not a directory.");
|
||||
}
|
||||
dirname_offset_ = folder_path_.length();
|
||||
RETURN_IF_NOT_OK(RecursiveWalkFolder(&dir));
|
||||
|
@ -242,10 +243,9 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
|
|||
std::string err_msg = "";
|
||||
int64_t row_cnt = 0;
|
||||
err_msg += (dir.Exists() == false || dir.IsDirectory() == false)
|
||||
? "Invalid parameter, input path is invalid or not set, path: " + path
|
||||
? "Invalid dataset_dir, " + path + " does not exist or the path is not a directory. "
|
||||
: "";
|
||||
err_msg +=
|
||||
(num_classes == nullptr && num_rows == nullptr) ? "Invalid parameter, num_class and num_rows are null.\n" : "";
|
||||
err_msg += (num_classes == nullptr && num_rows == nullptr) ? "[Internal ERROR] num_class and num_rows are null." : "";
|
||||
if (err_msg.empty() == false) {
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
@ -266,7 +266,7 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
|
|||
} else {
|
||||
for (const auto &p : class_index) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(folder_names.find(p.first) != folder_names.end(),
|
||||
"Invalid parameter, folder: " + p.first + " doesn't exist in " + path + " .");
|
||||
"Invalid subdirectory, class: " + p.first + " doesn't exist in " + path + " .");
|
||||
}
|
||||
(*num_classes) = class_index.size();
|
||||
}
|
||||
|
@ -277,7 +277,8 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
|
|||
Path subdir(folder_paths.front());
|
||||
dir_itr = Path::DirIterator::OpenDirectory(&subdir);
|
||||
if (subdir.Exists() == false || dir_itr == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + subdir.ToString());
|
||||
RETURN_STATUS_UNEXPECTED("Invalid subdirectory, ImageFolder Dataset subdirectory: " + subdir.ToString() +
|
||||
" does not exist or permission denied");
|
||||
}
|
||||
while (dir_itr->HasNext()) {
|
||||
if (exts.empty() || exts.find(subdir.Extension()) != exts.end()) {
|
||||
|
|
|
@ -64,7 +64,7 @@ FilenameBlock::FilenameBlock(IOBlockFlags io_block_flags)
|
|||
// Gets the filename from the block using the provided index container
|
||||
Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj<std::string> &index) const {
|
||||
if (out_filename == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Failed to get filename from FilenameBlock.");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Failed to get filename from FilenameBlock.");
|
||||
}
|
||||
|
||||
// a FilenameBlock only has one key. Call base class method to fetch that key
|
||||
|
@ -77,7 +77,7 @@ Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj<
|
|||
auto &it = r.first;
|
||||
*out_filename = it.value();
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Could not find filename from index.");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Could not find filename from index.");
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -38,15 +38,15 @@ LJSpeechOp::LJSpeechOp(const std::string &file_dir, int32_t num_workers, int32_t
|
|||
Status LJSpeechOp::PrepareData() {
|
||||
auto real_path = FileUtils::GetRealPath(folder_path_.data());
|
||||
if (!real_path.has_value()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + folder_path_);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, LJSpeech Dataset folder: " + folder_path_ + " does not exist.");
|
||||
}
|
||||
Path root_folder(real_path.value());
|
||||
Path metadata_file_path = root_folder / "metadata.csv";
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(metadata_file_path.Exists() && !metadata_file_path.IsDirectory(),
|
||||
"Invalid file, failed to find metadata file: " + metadata_file_path.ToString());
|
||||
"Invalid file, failed to find LJSpeech metadata file: " + metadata_file_path.ToString());
|
||||
std::ifstream csv_reader(metadata_file_path.ToString());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(csv_reader.is_open(),
|
||||
"Invalid file, failed to open metadata file: " + metadata_file_path.ToString() +
|
||||
"Invalid file, failed to open LJSpeech metadata file: " + metadata_file_path.ToString() +
|
||||
", make sure file not damaged or permission denied.");
|
||||
std::string line = "";
|
||||
while (getline(csv_reader, line)) {
|
||||
|
@ -64,8 +64,8 @@ Status LJSpeechOp::PrepareData() {
|
|||
}
|
||||
if (meta_info_list_.empty()) {
|
||||
csv_reader.close();
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Reading failed, unable to read valid data from the metadata file: " + metadata_file_path.ToString() + ".");
|
||||
RETURN_STATUS_UNEXPECTED("Reading failed, unable to read valid data from the LJSpeech metadata file: " +
|
||||
metadata_file_path.ToString() + ".");
|
||||
}
|
||||
num_rows_ = meta_info_list_.size();
|
||||
csv_reader.close();
|
||||
|
@ -76,7 +76,7 @@ Status LJSpeechOp::PrepareData() {
|
|||
// 1 function call produces 1 TensorTow
|
||||
Status LJSpeechOp::LoadTensorRow(row_id_type index, TensorRow *trow) {
|
||||
int32_t num_items = meta_info_list_.size();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(index >= 0 && index < num_items, "The input index is out of range.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(index >= 0 && index < num_items, "[Internal ERROR] The input index is out of range.");
|
||||
std::shared_ptr<Tensor> waveform;
|
||||
std::shared_ptr<Tensor> sample_rate_scalar;
|
||||
std::shared_ptr<Tensor> transcription, normalized_transcription;
|
||||
|
@ -118,7 +118,7 @@ void LJSpeechOp::Print(std::ostream &out, bool show_all) const {
|
|||
Status LJSpeechOp::CountTotalRows(const std::string &dir, int64_t *count) {
|
||||
auto real_path = FileUtils::GetRealPath(dir.data());
|
||||
if (!real_path.has_value()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + dir);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, " + dir + " does not exist.");
|
||||
}
|
||||
Path root_folder(real_path.value());
|
||||
Path metadata_file_path = root_folder / "metadata.csv";
|
||||
|
|
|
@ -63,7 +63,8 @@ Status ManifestOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
if (decode_ == true) {
|
||||
Status rc = Decode(image, &image);
|
||||
if (rc.IsError()) {
|
||||
std::string err = "Invalid data, failed to decode image: " + data.first;
|
||||
std::string err =
|
||||
"Invalid image, failed to decode: " + data.first + ", the image is damaged or permission denied.";
|
||||
RETURN_STATUS_UNEXPECTED(err);
|
||||
}
|
||||
}
|
||||
|
@ -91,7 +92,7 @@ void ManifestOp::Print(std::ostream &out, bool show_all) const {
|
|||
Status ManifestOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const {
|
||||
if (cls_ids == nullptr || !cls_ids->empty() || image_labelname_.empty()) {
|
||||
if (image_labelname_.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, no image found in dataset.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid manifest file, image data is missing in " + file_);
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"[Internal ERROR] Map for containing image-index pair is nullptr or has been set in other place,"
|
||||
|
@ -120,13 +121,14 @@ Status ManifestOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids)
|
|||
Status ManifestOp::PrepareData() {
|
||||
auto realpath = FileUtils::GetRealPath(file_.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, get real path failed, path=" + file_);
|
||||
MS_LOG(ERROR) << "Invalid file path, " << file_ << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_ + " does not exist.");
|
||||
}
|
||||
|
||||
std::ifstream file_handle(realpath.value());
|
||||
if (!file_handle.is_open()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Manifest file: " + file_);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + file_ +
|
||||
": manifest file is damaged or permission denied!");
|
||||
}
|
||||
std::string line;
|
||||
std::set<std::string> classes;
|
||||
|
@ -137,7 +139,7 @@ Status ManifestOp::PrepareData() {
|
|||
std::string image_file_path = js.value("source", "");
|
||||
if (image_file_path == "") {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, 'source' is not found in Manifest file: " + file_ + " at line " +
|
||||
RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'source' is missing in file: " + file_ + " at line " +
|
||||
std::to_string(line_count));
|
||||
}
|
||||
// If image is not JPEG/PNG/GIF/BMP, drop it
|
||||
|
@ -149,7 +151,7 @@ Status ManifestOp::PrepareData() {
|
|||
std::string usage = js.value("usage", "");
|
||||
if (usage == "") {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, 'usage' is not found in Manifest file: " + file_ + " at line " +
|
||||
RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'usage' is missing in file: " + file_ + " at line " +
|
||||
std::to_string(line_count));
|
||||
}
|
||||
(void)std::transform(usage.begin(), usage.end(), usage.begin(), ::tolower);
|
||||
|
@ -164,7 +166,7 @@ Status ManifestOp::PrepareData() {
|
|||
classes.insert(label_name);
|
||||
if (label_name == "") {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, 'name' of label is not found in Manifest file: " + file_ +
|
||||
RETURN_STATUS_UNEXPECTED("Invalid manifest file, 'name' attribute of label is missing in file: " + file_ +
|
||||
" at line " + std::to_string(line_count));
|
||||
}
|
||||
if (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) {
|
||||
|
@ -180,7 +182,8 @@ Status ManifestOp::PrepareData() {
|
|||
line_count++;
|
||||
} catch (const std::exception &err) {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse manifest file: " + file_);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid manifest file, parse ManiFest file: " + file_ + " failed, " +
|
||||
std::string(err.what()));
|
||||
}
|
||||
}
|
||||
num_classes_ = classes.size();
|
||||
|
@ -193,8 +196,8 @@ Status ManifestOp::PrepareData() {
|
|||
Status ManifestOp::CheckImageType(const std::string &file_name, bool *valid) {
|
||||
auto realpath = FileUtils::GetRealPath(file_name.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file_name;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + file_name);
|
||||
MS_LOG(ERROR) << "Invalid file path, " << file_name << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, " + file_name + " does not exist.");
|
||||
}
|
||||
|
||||
std::ifstream file_handle;
|
||||
|
@ -202,14 +205,16 @@ Status ManifestOp::CheckImageType(const std::string &file_name, bool *valid) {
|
|||
*valid = false;
|
||||
file_handle.open(realpath.value(), std::ios::binary | std::ios::in);
|
||||
if (!file_handle.is_open()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open image file: " + file_name);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid manifest file, failed to open " + file_name +
|
||||
" : the manifest file is damaged or permission denied.");
|
||||
}
|
||||
unsigned char file_type[read_num];
|
||||
(void)file_handle.read(reinterpret_cast<char *>(file_type), read_num);
|
||||
|
||||
if (file_handle.fail()) {
|
||||
file_handle.close();
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid manifest file, failed to read " + file_name +
|
||||
" : the manifest file is damaged or permission denied.");
|
||||
}
|
||||
file_handle.close();
|
||||
if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) {
|
||||
|
|
|
@ -68,7 +68,8 @@ Status MindRecordOp::Init() {
|
|||
data_schema_ = std::make_unique<DataSchema>();
|
||||
|
||||
std::vector<std::string> col_names = shard_reader_->GetShardColumn()->GetColumnName();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!col_names.empty(), "Invalid data, no column names are specified.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!col_names.empty(),
|
||||
"Invalid column, no column names are specified, check mindrecord file.");
|
||||
std::vector<mindrecord::ColumnDataType> col_data_types = shard_reader_->GetShardColumn()->GeColumnDataType();
|
||||
std::vector<std::vector<int64_t>> col_shapes = shard_reader_->GetShardColumn()->GetColumnShape();
|
||||
|
||||
|
@ -107,9 +108,8 @@ Status MindRecordOp::Init() {
|
|||
if (!load_all_cols) {
|
||||
std::unique_ptr<DataSchema> tmp_schema = std::make_unique<DataSchema>();
|
||||
for (std::string colname : columns_to_load_) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
colname_to_ind.find(colname) != colname_to_ind.end(),
|
||||
"Invalid data, specified loading column name: " + colname + " does not exist in data file.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(colname_to_ind.find(colname) != colname_to_ind.end(),
|
||||
"Invalid column, " + colname + " does not exist in data file.");
|
||||
RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->Column(colname_to_ind[colname])));
|
||||
}
|
||||
data_schema_ = std::move(tmp_schema);
|
||||
|
@ -177,7 +177,7 @@ Status MindRecordOp::WorkerEntry(int32_t worker_id) {
|
|||
}
|
||||
RETURN_IF_NOT_OK(worker_in_queues_[worker_id]->PopFront(&io_block));
|
||||
}
|
||||
RETURN_STATUS_UNEXPECTED("Unexpected nullptr received in worker.");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Unexpected nullptr received in worker.");
|
||||
}
|
||||
|
||||
Status MindRecordOp::GetRowFromReader(TensorRow *fetched_row, uint64_t row_id, int32_t worker_id) {
|
||||
|
@ -231,14 +231,15 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
|
|||
RETURN_IF_NOT_OK(shard_column->GetColumnFromJson(column_name, sample_json_, &data_ptr, &n_bytes));
|
||||
} else if (category == mindrecord::ColumnInBlob) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(sample_bytes_.find(column_name) != sample_bytes_.end(),
|
||||
"Invalid data, failed to retrieve blob data from padding sample.");
|
||||
"Invalid padded_sample, failed to retrieve blob data from padding sample, "
|
||||
"check 'padded_sample'.");
|
||||
|
||||
std::string ss(sample_bytes_[column_name]);
|
||||
n_bytes = ss.size();
|
||||
data_ptr = std::make_unique<unsigned char[]>(n_bytes);
|
||||
std::copy(ss.begin(), ss.end(), data_ptr.get());
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, retrieved data type is unknown.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid datatype, retrieved data type is unknown.");
|
||||
}
|
||||
if (data == nullptr) {
|
||||
data = reinterpret_cast<const unsigned char *>(data_ptr.get());
|
||||
|
@ -254,7 +255,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
|
|||
DataType type = column.Type();
|
||||
|
||||
// Set shape
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0, "Found memory size of column data type is 0.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0,
|
||||
"[Internal ERROR] Found memory size of column data type is 0.");
|
||||
auto num_elements = n_bytes / column_data_type_size;
|
||||
if (type == DataType::DE_STRING) {
|
||||
std::string s{data, data + n_bytes};
|
||||
|
|
|
@ -128,7 +128,7 @@ class MindRecordOp : public MappableLeafOp {
|
|||
const mindrecord::json &columns_json, const mindrecord::TaskType task_type);
|
||||
|
||||
Status LoadTensorRow(row_id_type row_id, TensorRow *row) override {
|
||||
return Status(StatusCode::kMDSyntaxError, "Cannot call this method.");
|
||||
return Status(StatusCode::kMDSyntaxError, "[Internal ERROR] Cannot call this method.");
|
||||
}
|
||||
// Private function for computing the assignment of the column name map.
|
||||
// @return - Status
|
||||
|
|
|
@ -69,7 +69,7 @@ void MnistOp::Print(std::ostream &out, bool show_all) const {
|
|||
Status MnistOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const {
|
||||
if (cls_ids == nullptr || !cls_ids->empty() || image_label_pairs_.empty()) {
|
||||
if (image_label_pairs_.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, no image found in " + DatasetName() + " file.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, image data is missing.");
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"[Internal ERROR] Map for containing image-index pair is nullptr or has been set in other place,"
|
||||
|
@ -89,7 +89,7 @@ Status MnistOp::ReadFromReader(std::ifstream *reader, uint32_t *result) {
|
|||
uint32_t res = 0;
|
||||
reader->read(reinterpret_cast<char *>(&res), 4);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!reader->fail(),
|
||||
"Invalid data, failed to read 4 bytes from " + DatasetName() + " file.");
|
||||
"Invalid file, failed to read 4 bytes from " + DatasetName() + " file.");
|
||||
*result = SwapEndian(res);
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -100,17 +100,22 @@ uint32_t MnistOp::SwapEndian(uint32_t val) const {
|
|||
}
|
||||
|
||||
Status MnistOp::CheckImage(const std::string &file_name, std::ifstream *image_reader, uint32_t *num_images) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_reader->is_open(),
|
||||
"Invalid file, failed to open " + DatasetName() + " image file: " + file_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_reader->is_open(), "Invalid " + DatasetName() + " file, failed to open " +
|
||||
file_name + " : the file is damaged or permission denied.");
|
||||
int64_t image_len = image_reader->seekg(0, std::ios::end).tellg();
|
||||
(void)image_reader->seekg(0, std::ios::beg);
|
||||
// The first 16 bytes of the image file are type, number, row and column
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_len >= 16, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_len >= 16,
|
||||
"Invalid " + DatasetName() + " file, the first data length of " + file_name +
|
||||
" should be 16 bytes(contains type, number, row and column), but got " +
|
||||
std::to_string(image_len) + ".");
|
||||
|
||||
uint32_t magic_number;
|
||||
RETURN_IF_NOT_OK(ReadFromReader(image_reader, &magic_number));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kMnistImageFileMagicNumber,
|
||||
"Invalid file, this is not the " + DatasetName() + " image file: " + file_name);
|
||||
"Invalid " + DatasetName() + " file, the image number of " + file_name + " should be " +
|
||||
std::to_string(kMnistImageFileMagicNumber) + ", but got " +
|
||||
std::to_string(magic_number));
|
||||
|
||||
uint32_t num_items;
|
||||
RETURN_IF_NOT_OK(ReadFromReader(image_reader, &num_items));
|
||||
|
@ -120,28 +125,38 @@ Status MnistOp::CheckImage(const std::string &file_name, std::ifstream *image_re
|
|||
RETURN_IF_NOT_OK(ReadFromReader(image_reader, &cols));
|
||||
// The image size of the Mnist dataset is fixed at [28,28]
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((rows == kMnistImageRows) && (cols == kMnistImageCols),
|
||||
"Invalid data, shape of image is not equal to (28, 28).");
|
||||
"Invalid " + DatasetName() + " file, shape of image in " + file_name +
|
||||
" should be (28, 28), but got (" + std::to_string(rows) + ", " + std::to_string(cols) +
|
||||
").");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((image_len - 16) == num_items * rows * cols,
|
||||
"Invalid data, got truncated data len: " + std::to_string(image_len - 16) +
|
||||
", which is not equal to real data len: " + std::to_string(num_items * rows * cols));
|
||||
"Invalid " + DatasetName() + " file, truncated data length of " + file_name +
|
||||
" should be " + std::to_string(image_len - 16) + ", but got " +
|
||||
std::to_string(num_items * rows * cols));
|
||||
*num_images = num_items;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status MnistOp::CheckLabel(const std::string &file_name, std::ifstream *label_reader, uint32_t *num_labels) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(),
|
||||
"Invalid file, failed to open " + DatasetName() + " label file: " + file_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(), "Invalid " + DatasetName() + " file, failed to open " +
|
||||
file_name + " : the file is damaged or permission denied!");
|
||||
int64_t label_len = label_reader->seekg(0, std::ios::end).tellg();
|
||||
(void)label_reader->seekg(0, std::ios::beg);
|
||||
// The first 8 bytes of the image file are type and number
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 8, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 8, "Invalid " + DatasetName() + " file, the first data length of " +
|
||||
file_name + " should be 8 bytes(contains type and number), but got " +
|
||||
std::to_string(label_len) + ".");
|
||||
uint32_t magic_number;
|
||||
RETURN_IF_NOT_OK(ReadFromReader(label_reader, &magic_number));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kMnistLabelFileMagicNumber,
|
||||
"Invalid file, this is not the " + DatasetName() + " label file: " + file_name);
|
||||
"Invalid " + DatasetName() + " file, the number of labels in " + file_name +
|
||||
" should be " + std::to_string(kMnistLabelFileMagicNumber) + ", but got " +
|
||||
std::to_string(magic_number) + ".");
|
||||
uint32_t num_items;
|
||||
RETURN_IF_NOT_OK(ReadFromReader(label_reader, &num_items));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((label_len - 8) == num_items, "Invalid data, number of labels is wrong.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((label_len - 8) == num_items, "Invalid " + DatasetName() +
|
||||
" file, the data length of labels in " + file_name +
|
||||
" should be " + std::to_string(label_len - 8) +
|
||||
", but got " + std::to_string(num_items) + ".");
|
||||
*num_labels = num_items;
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -151,7 +166,10 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la
|
|||
RETURN_IF_NOT_OK(CheckImage(image_names_[index], image_reader, &num_images));
|
||||
RETURN_IF_NOT_OK(CheckLabel(label_names_[index], label_reader, &num_labels));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
|
||||
"Invalid data, num_images is not equal to num_labels. Ensure data file is not damaged.");
|
||||
"Invalid " + DatasetName() + " file, the images number of " + image_names_[index] +
|
||||
" should be equal to the labels number of " + label_names_[index] +
|
||||
", but got images number: " + std::to_string(num_images) +
|
||||
", labels number: " + std::to_string(num_labels) + ".");
|
||||
// The image size of the Mnist dataset is fixed at [28,28]
|
||||
int64_t size = kMnistImageRows * kMnistImageCols;
|
||||
auto images_buf = std::make_unique<char[]>(size * num_images);
|
||||
|
@ -163,13 +181,13 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la
|
|||
}
|
||||
(void)image_reader->read(images_buf.get(), size * num_images);
|
||||
if (image_reader->fail()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " image: " + image_names_[index] +
|
||||
", size:" + std::to_string(size * num_images) + ". Ensure data file is not damaged.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, failed to read " + image_names_[index] +
|
||||
" : the file is damaged or permission denied!");
|
||||
}
|
||||
(void)label_reader->read(labels_buf.get(), num_images);
|
||||
if (label_reader->fail()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " label:" + label_names_[index] +
|
||||
", size: " + std::to_string(num_images) + ". Ensure data file is not damaged.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid " + DatasetName() + " file, failed to read " + label_names_[index] +
|
||||
" : the file is damaged or the file content is incomplete.");
|
||||
}
|
||||
TensorShape img_tensor_shape = TensorShape({kMnistImageRows, kMnistImageCols, 1});
|
||||
for (int64_t j = 0; j != num_images; ++j) {
|
||||
|
@ -244,8 +262,10 @@ Status MnistOp::WalkAllFiles() {
|
|||
std::sort(image_names_.begin(), image_names_.end());
|
||||
std::sort(label_names_.begin(), label_names_.end());
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(),
|
||||
"Invalid data, num of images is not equal to num of labels.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
image_names_.size() == label_names_.size(),
|
||||
"Invalid " + DatasetName() + " file, num of images should be equal to num of labels, but got num of images: " +
|
||||
std::to_string(image_names_.size()) + ", num of labels: " + std::to_string(label_names_.size()) + ".");
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -279,7 +299,9 @@ Status MnistOp::CountTotalRows(const std::string &dir, const std::string &usage,
|
|||
uint32_t num_labels;
|
||||
RETURN_IF_NOT_OK(op->CheckLabel(op->label_names_[i], &label_reader, &num_labels));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
|
||||
"Invalid data, num of images is not equal to num of labels.");
|
||||
"Invalid " + op->DatasetName() +
|
||||
" file, num of images should be equal to num of labels, but got num of images: " +
|
||||
std::to_string(num_images) + ", num of labels: " + std::to_string(num_labels) + ".");
|
||||
*count = *count + num_images;
|
||||
|
||||
// Close the readers
|
||||
|
|
|
@ -215,7 +215,8 @@ bool NonMappableLeafOp::NeedPushFileToBlockQueue(const std::string &file_name, i
|
|||
bool push = false;
|
||||
int64_t start_index = device_id_ * num_rows_per_shard_;
|
||||
if (device_id_ + 1 < 0) {
|
||||
MS_LOG(ERROR) << "Device id is invalid, got " + std::to_string(device_id_);
|
||||
MS_LOG(ERROR) << "Invalid device id, device id should be greater than or equal 0, but got "
|
||||
<< std::to_string(device_id_);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -141,7 +141,8 @@ Status PhotoTourOp::GetFileContent(const std::string &info_file, std::string *an
|
|||
RETURN_UNEXPECTED_IF_NULL(ans);
|
||||
std::ifstream reader;
|
||||
reader.open(info_file);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open PhotoTour info file: " + info_file);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open " + info_file +
|
||||
": PhotoTour info file is damaged or permission denied.");
|
||||
(void)reader.seekg(0, std::ios::end);
|
||||
std::size_t size = reader.tellg();
|
||||
(void)reader.seekg(0, std::ios::beg);
|
||||
|
@ -183,7 +184,9 @@ Status PhotoTourOp::ReadInfoFile(const std::string &data_dir, const std::string
|
|||
switch (col_idx) {
|
||||
case ID_3DPOINT: {
|
||||
std::string item = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour info file failed: " + info_file_path);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
|
||||
"Invalid data, reading PhotoTour info file failed: " + info_file_path +
|
||||
" at line: " + std::to_string(pos) + ", the content should not be empty.");
|
||||
int id_3dpoint = std::atoi(item.c_str());
|
||||
labels_.push_back(id_3dpoint);
|
||||
col_idx = UNKNOWN;
|
||||
|
@ -191,7 +194,9 @@ Status PhotoTourOp::ReadInfoFile(const std::string &data_dir, const std::string
|
|||
}
|
||||
case UNKNOWN: {
|
||||
std::string item2 = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(), "Reading PhotoTour info file failed: " + info_file_path);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
!item2.empty(), "Invalid data, Reading PhotoTour info file failed: " + info_file_path +
|
||||
" at line: " + std::to_string(pos) + ", the content in file should not be empty.");
|
||||
col_idx = ID_3DPOINT;
|
||||
break;
|
||||
}
|
||||
|
@ -225,34 +230,44 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri
|
|||
switch (col_idx) {
|
||||
case PATCH_ID1: {
|
||||
std::string item = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
|
||||
"Invalid data,Reading PhotoTour matched file failed: " + info_file_path +
|
||||
" at line: " + std::to_string(pos) + ", the content should not be empty.");
|
||||
patch_id1 = std::atoi(item.c_str());
|
||||
col_idx = LABEL1;
|
||||
break;
|
||||
}
|
||||
case LABEL1: {
|
||||
std::string item = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
|
||||
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
|
||||
" at line: " + std::to_string(pos) + ", the content should not be empty.");
|
||||
label1 = std::atoi(item.c_str());
|
||||
col_idx = UNUSED1;
|
||||
break;
|
||||
}
|
||||
case UNUSED1: {
|
||||
std::string item = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
|
||||
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
|
||||
" at line: " + std::to_string(pos) + ", the content should not be empty.");
|
||||
col_idx = PATCH_ID2;
|
||||
break;
|
||||
}
|
||||
case PATCH_ID2: {
|
||||
std::string item = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
|
||||
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
|
||||
" at line: " + std::to_string(pos) + ", the content should not be empty.");
|
||||
patch_id2 = std::atoi(item.c_str());
|
||||
col_idx = LABEL2;
|
||||
break;
|
||||
}
|
||||
case LABEL2: {
|
||||
std::string item = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
|
||||
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
|
||||
" at line: " + std::to_string(pos) + ", the content should not be empty.");
|
||||
label2 = std::atoi(item.c_str());
|
||||
col_idx = UNUSED2;
|
||||
matches_.push_back(std::make_tuple(patch_id1, patch_id2, uint32_t(label1 == label2)));
|
||||
|
@ -260,13 +275,17 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri
|
|||
}
|
||||
case UNUSED2: {
|
||||
std::string item = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(),
|
||||
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
|
||||
" at line: " + std::to_string(pos) + ", the content should not be empty.");
|
||||
col_idx = UNUSED3;
|
||||
break;
|
||||
}
|
||||
case UNUSED3: {
|
||||
std::string item2 = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(), "Reading PhotoTour matched file failed: " + info_file_path);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item2.empty(),
|
||||
"Invalid data, Reading PhotoTour matched file failed: " + info_file_path +
|
||||
" at line: " + std::to_string(pos) + ", the content should not be empty.");
|
||||
col_idx = PATCH_ID1;
|
||||
break;
|
||||
}
|
||||
|
@ -281,8 +300,9 @@ Status PhotoTourOp::ReadMatchedFile(const std::string &data_dir, const std::stri
|
|||
|
||||
Status PhotoTourOp::GetPhotoTourDataTensor(uint32_t index, std::shared_ptr<Tensor> *image_tensor) {
|
||||
RETURN_UNEXPECTED_IF_NULL(image_tensor);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(index < kLens.at(name_),
|
||||
"Index exceeds the maximum count of image, got: " + std::to_string(index));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
index < kLens.at(name_),
|
||||
"[Internal ERROR] Index exceeds the maximum count of image, got: " + std::to_string(index));
|
||||
|
||||
int image_id = index / (kPatchNumPerRow * kPatchNumPerCol);
|
||||
int row_in_image = (index % (kPatchNumPerRow * kPatchNumPerCol)) / kPatchNumPerRow;
|
||||
|
@ -320,7 +340,7 @@ Status PhotoTourOp::PrepareData() {
|
|||
chosen_dataset_folder_path_ = (Path(dataset_dir_) / Path(name_)).ToString();
|
||||
train_ = kTrain.at(usage_);
|
||||
auto real_folder_path = FileUtils::GetRealPath(chosen_dataset_folder_path_.data());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Get real path failed: " + chosen_dataset_folder_path_);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), chosen_dataset_folder_path_ + " does not exist.");
|
||||
|
||||
std::vector<cv::String> file_names;
|
||||
cv::glob(real_folder_path.value(), file_names);
|
||||
|
|
|
@ -107,7 +107,8 @@ Status Places365Op::GetFileContent(const std::string &info_file, std::string *an
|
|||
RETURN_UNEXPECTED_IF_NULL(ans);
|
||||
std::ifstream reader;
|
||||
reader.open(info_file);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!reader.fail(), "Invalid file, failed to open Places365 file: " + info_file);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
!reader.fail(), "Invalid file, failed to open " + info_file + ": Places365 file is damaged or permission denied.");
|
||||
reader.seekg(0, std::ios::end);
|
||||
std::size_t size = reader.tellg();
|
||||
reader.seekg(0, std::ios::beg);
|
||||
|
@ -153,21 +154,21 @@ Status Places365Op::LoadCategories(const std::string &category_meta_name) {
|
|||
while ((pos = s.find(" ")) != std::string::npos) {
|
||||
switch (col_idx) {
|
||||
case CATEGORY: {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(),
|
||||
"Reading places365 category file failed: " + category_meta_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " +
|
||||
category_meta_name + ", space characters not found.");
|
||||
category = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!category.empty(),
|
||||
"Reading places365 category file failed: " + category_meta_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!category.empty(), "Invalid data, Reading places365 category file failed: " +
|
||||
category_meta_name + ", space characters not found.");
|
||||
// switch the type of substring.
|
||||
col_idx = LABEL;
|
||||
break;
|
||||
}
|
||||
case LABEL: {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(),
|
||||
"Reading places365 category file failed: " + category_meta_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " +
|
||||
category_meta_name + ", space characters not found.");
|
||||
std::string label_item = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!label_item.empty(),
|
||||
"Reading places365 category file failed: " + category_meta_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!label_item.empty(), "Invalid data, Reading places365 category file failed: " +
|
||||
category_meta_name + ", space characters not found.");
|
||||
label = std::atoi(label_item.c_str());
|
||||
// switch the type of substring.
|
||||
col_idx = CATEGORY;
|
||||
|
@ -204,19 +205,21 @@ Status Places365Op::LoadFileLists(const std::string &filelists_meta_name) {
|
|||
while ((pos = s.find(" ")) != std::string::npos) {
|
||||
switch (col_idx) {
|
||||
case PATH: {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(),
|
||||
"Reading places365 category file failed: " + filelists_meta_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " +
|
||||
filelists_meta_name + ", space characters not found.");
|
||||
path = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!path.empty(), "Reading places365 filelist file failed: " + filelists_meta_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!path.empty(), "Invalid data, Reading places365 filelist file failed: " +
|
||||
filelists_meta_name + ", space characters not found.");
|
||||
// switch the type of substring.
|
||||
col_idx = LABEL;
|
||||
break;
|
||||
}
|
||||
case LABEL: {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(),
|
||||
"Reading places365 category file failed: " + filelists_meta_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(pos + 1 <= s.size(), "Invalid data, Reading places365 category file failed: " +
|
||||
filelists_meta_name + ", space characters not found.");
|
||||
std::string item = get_splited_str(pos);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Reading places365 filelist file failed: " + filelists_meta_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!item.empty(), "Invalid data, Reading places365 filelist file failed: " +
|
||||
filelists_meta_name + ", space characters not found.");
|
||||
label = std::atoi(item.c_str());
|
||||
// switch the type of substring.
|
||||
col_idx = PATH;
|
||||
|
@ -233,13 +236,15 @@ Status Places365Op::LoadFileLists(const std::string &filelists_meta_name) {
|
|||
|
||||
Status Places365Op::GetPlaces365DataTensor(uint32_t index, std::shared_ptr<Tensor> *image_tensor) {
|
||||
std::string file_path = image_path_label_pairs_[index].first;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(Path(file_path).Exists(), file_path + " File not exists.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(Path(file_path).Exists(),
|
||||
"Invalid file path, Places365 image: " + file_path + " does not exists.");
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(file_path, image_tensor));
|
||||
if (decode_) {
|
||||
Status rc = Decode(*image_tensor, image_tensor);
|
||||
if (rc.IsError()) {
|
||||
*image_tensor = nullptr;
|
||||
std::string err_msg = "Invalid data, failed to decode image: " + file_path;
|
||||
std::string err_msg =
|
||||
"Invalid image, failed to decode " + file_path + ": the image is damaged or permission denied.";
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
|
||||
}
|
||||
}
|
||||
|
@ -249,14 +254,15 @@ Status Places365Op::GetPlaces365DataTensor(uint32_t index, std::shared_ptr<Tenso
|
|||
|
||||
Status Places365Op::PrepareData() {
|
||||
auto real_folder_path = FileUtils::GetRealPath(root_.data());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Get real path failed: " + root_);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Invalid file path, " + root_ + " does not exist.");
|
||||
|
||||
RETURN_IF_NOT_OK(LoadCategories((Path(real_folder_path.value()) / Path(kCategoriesMeta)).ToString()));
|
||||
RETURN_IF_NOT_OK(LoadFileLists((Path(real_folder_path.value()) / Path(kFileListMeta.at(usage_))).ToString()));
|
||||
num_rows_ = image_path_label_pairs_.size();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
num_rows_ > 0,
|
||||
"Invalid data, no valid data matching the dataset API Places365Dataset. Please check file path or dataset API.");
|
||||
"Invalid data, no valid data matching the dataset API Places365Dataset. Please check dataset API or file path: " +
|
||||
root_ + ".");
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -281,7 +287,7 @@ Status Places365Op::CountTotalRows(const std::string &dir, const std::string &us
|
|||
|
||||
for (size_t i = 0; i < op->image_path_label_pairs_.size(); ++i) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(Path(op->image_path_label_pairs_[i].first).Exists(),
|
||||
op->image_path_label_pairs_[i].first + " File not exists.");
|
||||
"Invalid file path, " + op->image_path_label_pairs_[i].first + " does not exists.");
|
||||
}
|
||||
*count = op->image_path_label_pairs_.size();
|
||||
return Status::OK();
|
||||
|
|
|
@ -111,7 +111,9 @@ Status QMnistOp::CountTotalRows(const std::string &dir, const std::string &usage
|
|||
uint32_t num_labels;
|
||||
RETURN_IF_NOT_OK(op->CheckLabel(op->label_names_[i], &label_reader, &num_labels));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
|
||||
"Invalid data, num of images is not equal to num of labels.");
|
||||
"Invalid data, num of images should be equal to num of labels loading from " + dir +
|
||||
", but got num of images: " + std::to_string(num_images) +
|
||||
", num of labels: " + std::to_string(num_labels) + ".");
|
||||
|
||||
if (usage == "test10k") {
|
||||
// only use the first 10k samples and drop the last 50k samples
|
||||
|
@ -141,7 +143,8 @@ Status QMnistOp::WalkAllFiles() {
|
|||
const std::string nist_prefix = "xnist";
|
||||
|
||||
auto real_folder_path = FileUtils::GetRealPath(folder_path_.data());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(), "Get real path failed: " + folder_path_);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(real_folder_path.has_value(),
|
||||
"Invalid QMnist folder, " + folder_path_ + " does not exist or permission denied!");
|
||||
Path root_dir(real_folder_path.value());
|
||||
|
||||
if (usage_ == "train") {
|
||||
|
@ -162,20 +165,25 @@ Status QMnistOp::WalkAllFiles() {
|
|||
label_names_.push_back((root_dir / Path(nist_prefix + "-" + label_ext)).ToString());
|
||||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_names_.size() == label_names_.size(),
|
||||
"Invalid data, num of images is not equal to num of labels.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
image_names_.size() == label_names_.size(),
|
||||
"Invalid data, num of Qmnist image files should be equal to num of Qmnist label files under directory:" +
|
||||
folder_path_ + ", but got num of image files: " + std::to_string(image_names_.size()) +
|
||||
", num of label files: " + std::to_string(label_names_.size()) + ".");
|
||||
|
||||
for (size_t i = 0; i < image_names_.size(); i++) {
|
||||
Path file_path(image_names_[i]);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
|
||||
"Failed to find " + DatasetName() + " image file: " + file_path.ToString());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
file_path.Exists() && !file_path.IsDirectory(),
|
||||
"Invalid file path, Qmnist data file: " + file_path.ToString() + " does not exist or is a directory.");
|
||||
MS_LOG(INFO) << DatasetName(true) << " operator found image file at " << file_path.ToString() << ".";
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < label_names_.size(); i++) {
|
||||
Path file_path(label_names_[i]);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(file_path.Exists() && !file_path.IsDirectory(),
|
||||
"Failed to find " + DatasetName() + " label file: " + file_path.ToString());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
file_path.Exists() && !file_path.IsDirectory(),
|
||||
"Invalid file path, Qmnist data file: " + file_path.ToString() + " does not exist or is a directory.");
|
||||
MS_LOG(INFO) << DatasetName(true) << " operator found label file at " << file_path.ToString() << ".";
|
||||
}
|
||||
|
||||
|
@ -189,7 +197,9 @@ Status QMnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *l
|
|||
RETURN_IF_NOT_OK(CheckImage(image_names_[index], image_reader, &num_images));
|
||||
RETURN_IF_NOT_OK(CheckLabel(label_names_[index], label_reader, &num_labels));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((num_images == num_labels),
|
||||
"Invalid data, num_images is not equal to num_labels. Ensure data file is not damaged.");
|
||||
"Invalid data, num of images should be equal to num of labels loading from " +
|
||||
folder_path_ + ", but got num of images: " + std::to_string(num_images) +
|
||||
", num of labels: " + std::to_string(num_labels) + ".");
|
||||
|
||||
// The image size of the QMNIST dataset is fixed at [28,28]
|
||||
int64_t image_size = kQMnistImageRows * kQMnistImageCols;
|
||||
|
@ -216,16 +226,16 @@ Status QMnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *l
|
|||
}
|
||||
(void)image_reader->read(images_buf.get(), image_size * num_images);
|
||||
if (image_reader->fail()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " image: " + image_names_[index] +
|
||||
", size:" + std::to_string(image_size * num_images) +
|
||||
". Ensure data file is not damaged.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + std::to_string(image_size * num_images) +
|
||||
" bytes from " + image_names_[index] +
|
||||
": the data file is damaged or the content is incomplete.");
|
||||
}
|
||||
// uint32_t use 4 bytes in memory
|
||||
(void)label_reader->read(reinterpret_cast<char *>(labels_buf.get()), label_length * num_labels * 4);
|
||||
if (label_reader->fail()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + DatasetName() + " label:" + label_names_[index] +
|
||||
", size: " + std::to_string(label_length * num_labels) +
|
||||
". Ensure data file is not damaged.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to read " + std::to_string(label_length * num_labels * 4) +
|
||||
" bytes from " + label_names_[index] +
|
||||
": the data file is damaged or content is incomplete.");
|
||||
}
|
||||
TensorShape image_tensor_shape = TensorShape({kQMnistImageRows, kQMnistImageCols, 1});
|
||||
TensorShape label_tensor_shape = TensorShape({kQMnistLabelLength});
|
||||
|
@ -258,23 +268,32 @@ Status QMnistOp::CheckLabel(const std::string &file_name, std::ifstream *label_r
|
|||
RETURN_UNEXPECTED_IF_NULL(label_reader);
|
||||
RETURN_UNEXPECTED_IF_NULL(num_labels);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(label_reader->is_open(),
|
||||
"Invalid file, failed to open " + DatasetName() + " label file: " + file_name);
|
||||
"Invalid file, failed to open " + file_name + ": the label file is permission denied.");
|
||||
int64_t label_len = label_reader->seekg(0, std::ios::end).tellg();
|
||||
(void)label_reader->seekg(0, std::ios::beg);
|
||||
// The first 12 bytes of the label file are type, number and length
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 12, "Invalid file, " + DatasetName() + " file is corrupted: " + file_name);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(label_len >= 12,
|
||||
"Invalid file, load " + file_name +
|
||||
" failed: the first 12 bytes of the label file should be type, number and length, " +
|
||||
"but got the first read bytes : " + std::to_string(label_len));
|
||||
uint32_t magic_number;
|
||||
RETURN_IF_NOT_OK(ReadFromReader(label_reader, &magic_number));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(magic_number == kQMnistLabelFileMagicNumber,
|
||||
"Invalid file, this is not the " + DatasetName() + " label file: " + file_name);
|
||||
"Invalid label file, the number of labels loading from " + file_name + " should be " +
|
||||
std::to_string(kQMnistLabelFileMagicNumber) + ", but got " +
|
||||
std::to_string(magic_number) + ".");
|
||||
uint32_t num_items;
|
||||
RETURN_IF_NOT_OK(ReadFromReader(label_reader, &num_items));
|
||||
uint32_t length;
|
||||
RETURN_IF_NOT_OK(ReadFromReader(label_reader, &length));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(length == kQMnistLabelLength, "Invalid data, length of labels is not equal to 8.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(length == kQMnistLabelLength, "Invalid data, length of every label loading from " +
|
||||
file_name + " should be equal to 8, but got " +
|
||||
std::to_string(length) + ".");
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((label_len - 12) == num_items * kQMnistLabelLength * 4,
|
||||
"Invalid data, number of labels is wrong.");
|
||||
"Invalid data, the total bytes of labels loading from Qmnist label file: " + file_name +
|
||||
" should be " + std::to_string(label_len - 12) + ", but got " +
|
||||
std::to_string(num_items * kQMnistLabelLength * 4) + ".");
|
||||
*num_labels = num_items;
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -98,7 +98,7 @@ void RandomDataOp::GenerateSchema() {
|
|||
std::make_unique<ColDescriptor>(col_name, DataType(newType), TensorImpl::kFlexible, rank, new_shape.get());
|
||||
|
||||
Status rc = data_schema_->AddColumn(*new_col);
|
||||
if (rc.IsError()) MS_LOG(ERROR) << "Failed to generate a schema. Message:" << rc;
|
||||
if (rc.IsError()) MS_LOG(ERROR) << "[Internal ERROR] Failed to generate a schema. Message:" << rc;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -136,7 +136,8 @@ Status RandomDataOp::CreateRandomRow(TensorRow *new_row) {
|
|||
buf = std::make_unique<unsigned char[]>(size_in_bytes);
|
||||
int ret_code = memset_s(buf.get(), size_in_bytes, random_byte, size_in_bytes);
|
||||
if (ret_code != 0) {
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor.");
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||
"[Internal ERROR] memset_s failed to set random bytes for a tensor.");
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.Type(), buf.get(), &new_tensor));
|
||||
|
|
|
@ -53,7 +53,7 @@ Status DistributedSamplerRT::InitSampler() {
|
|||
CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0, "Invalid parameter, num_samples must be greater than 0, but got " +
|
||||
std::to_string(num_samples_) + ".\n");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
num_rows_ > 0, "Invalid parameter, num_rows must be greater than 0, but got " + std::to_string(num_rows_) + ".\n");
|
||||
num_rows_ > 0, "[Internal ERROR] num_rows must be greater than 0, but got " + std::to_string(num_rows_) + ".\n");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
device_id_ < num_devices_ && device_id_ >= 0 && num_rows_ > 0 && num_samples_ > 0,
|
||||
"Invalid parameter, num_shard must be greater than shard_id and greater than 0, got num_shard: " +
|
||||
|
@ -96,7 +96,7 @@ Status DistributedSamplerRT::GetNextSample(TensorRow *out) {
|
|||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
if (cnt_ > samples_per_tensor_) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" +
|
||||
"[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" +
|
||||
std::to_string(cnt_) + ", samples_per_tensor(num_samples): " + std::to_string(samples_per_tensor_));
|
||||
} else if (cnt_ == samples_per_tensor_ && (non_empty_ || !even_dist_)) {
|
||||
(*out) = TensorRow(TensorRow::kFlagEOE);
|
||||
|
|
|
@ -29,7 +29,7 @@ Status MindRecordSamplerRT::GetNextSample(TensorRow *out) {
|
|||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
if (next_id_ > num_samples_) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " +
|
||||
"[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " +
|
||||
std::to_string(next_id_) + ", num_samples_: " + std::to_string(num_samples_));
|
||||
} else if (next_id_ == num_samples_) {
|
||||
(*out) = TensorRow(TensorRow::kFlagEOE);
|
||||
|
@ -53,8 +53,8 @@ Status MindRecordSamplerRT::InitSampler() {
|
|||
if (!sample_ids_) {
|
||||
// Note, sample_ids_.empty() is okay and will just give no sample ids.
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Init Sampler failed as sample_ids is empty, here ShardReader did not provide a valid sample ids vector via"
|
||||
" MindRecordSamplerRT");
|
||||
"[Internal ERROR]Init Sampler failed as sample_ids is empty, here ShardReader did not provide a valid sample ids "
|
||||
"vector via MindRecordSamplerRT.");
|
||||
}
|
||||
|
||||
// Usually, the num samples is given from the user interface. In our case, that data is in mindrecord.
|
||||
|
|
|
@ -44,7 +44,7 @@ Status PKSamplerRT::InitSampler() {
|
|||
// Compute that here for this case to find the total number of samples that are available to return.
|
||||
// (in this case, samples per class * total classes).
|
||||
if (samples_per_class_ > std::numeric_limits<int64_t>::max() / static_cast<int64_t>(labels_.size())) {
|
||||
RETURN_STATUS_UNEXPECTED("Overflow in counting num_rows");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Overflow in counting num_rows");
|
||||
}
|
||||
num_rows_ = samples_per_class_ * static_cast<int64_t>(labels_.size());
|
||||
|
||||
|
@ -72,7 +72,7 @@ Status PKSamplerRT::GetNextSample(TensorRow *out) {
|
|||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
if (next_id_ > num_samples_ || num_samples_ == 0) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " +
|
||||
"[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got: " +
|
||||
std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_));
|
||||
} else if (next_id_ == num_samples_) {
|
||||
(*out) = TensorRow(TensorRow::kFlagEOE);
|
||||
|
|
|
@ -39,7 +39,7 @@ Status PythonSamplerRT::GetNextSample(TensorRow *out) {
|
|||
{
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
if (Py_IsInitialized() == 0) {
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
|
||||
}
|
||||
try {
|
||||
py::object py_ret = py_sampler_instance.attr("_get_indices")();
|
||||
|
@ -57,7 +57,8 @@ Status PythonSamplerRT::GetNextSample(TensorRow *out) {
|
|||
return Status(StatusCode::kMDPyFuncException, e.what());
|
||||
} catch (const py::cast_error &e) {
|
||||
return Status(StatusCode::kMDPyFuncException,
|
||||
"Invalid data, python sampler iterator should return an integer index.");
|
||||
"Invalid data, Python sampler iterator should return an integer index, but error raised: " +
|
||||
std::string(e.what()));
|
||||
}
|
||||
}
|
||||
(*out) = {sample_ids};
|
||||
|
@ -71,7 +72,7 @@ Status PythonSamplerRT::InitSampler() {
|
|||
return Status::OK();
|
||||
}
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
num_rows_ > 0, "Invalid parameter, num_rows must be greater than 0, but got " + std::to_string(num_rows_));
|
||||
num_rows_ > 0, "[Internal ERROR] num_rows must be greater than 0, but got " + std::to_string(num_rows_));
|
||||
// Special value of 0 for num_samples means that the user wants to sample the entire set of data.
|
||||
// If the user asked to sample more rows than exists in the dataset, adjust the num_samples accordingly.
|
||||
if (num_samples_ == 0 || num_samples_ > num_rows_) {
|
||||
|
@ -80,12 +81,13 @@ Status PythonSamplerRT::InitSampler() {
|
|||
{
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
if (Py_IsInitialized() == 0) {
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
|
||||
}
|
||||
try {
|
||||
py_sampler_instance.attr("_handshake")(num_rows_, num_samples_);
|
||||
} catch (const py::error_already_set &e) {
|
||||
return Status(StatusCode::kMDPyFuncException, e.what());
|
||||
return Status(StatusCode::kMDPyFuncException,
|
||||
"[Internal ERROR] python sampler execute _handshake failed: " + std::string(e.what()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -98,7 +100,7 @@ Status PythonSamplerRT::ResetSampler() {
|
|||
need_to_reset_ = false;
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
if (Py_IsInitialized() == 0) {
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
|
||||
}
|
||||
try {
|
||||
py_sampler_instance.attr("reset")();
|
||||
|
|
|
@ -36,8 +36,9 @@ RandomSamplerRT::RandomSamplerRT(bool replacement, int64_t num_samples, bool res
|
|||
Status RandomSamplerRT::GetNextSample(TensorRow *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
if (next_id_ > num_samples_) {
|
||||
RETURN_STATUS_UNEXPECTED("Sampler index must be less than or equal to num_samples(total rows in dataset), but got" +
|
||||
std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_));
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got" +
|
||||
std::to_string(next_id_) + ", num_samplers:" + std::to_string(num_samples_));
|
||||
} else if (next_id_ == num_samples_) {
|
||||
(*out) = TensorRow(TensorRow::kFlagEOE);
|
||||
} else {
|
||||
|
@ -81,7 +82,7 @@ Status RandomSamplerRT::InitSampler() {
|
|||
}
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
num_samples_ > 0 && num_rows_ > 0,
|
||||
"Invalid parameter, num_samples and num_rows must be greater than 0, but got num_samples: " +
|
||||
"[Internal ERROR] num_samples and num_rows must be greater than 0, but got num_samples: " +
|
||||
std::to_string(num_samples_) + ", num_rows: " + std::to_string(num_rows_));
|
||||
samples_per_tensor_ = samples_per_tensor_ > num_samples_ ? num_samples_ : samples_per_tensor_;
|
||||
rnd_.seed(seed_);
|
||||
|
|
|
@ -28,7 +28,7 @@ Status RandomAccessOp::GetNumRowsInDataset(int64_t *num) const {
|
|||
// Here, it is just a getter method to return the value. However, it is invalid if there is
|
||||
// not a value set for this count, so generate a failure if that is the case.
|
||||
if (num == nullptr || num_rows_ == -1) {
|
||||
RETURN_STATUS_UNEXPECTED("Get num rows in Dataset failed, num_rows has not been set yet.");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Get num rows in Dataset failed, num_rows has not been set yet.");
|
||||
}
|
||||
(*num) = num_rows_;
|
||||
return Status::OK();
|
||||
|
@ -55,7 +55,7 @@ Status SamplerRT::HandshakeRandomAccessOp(const RandomAccessOp *op) {
|
|||
RETURN_IF_NOT_OK(child_sampler->HandshakeRandomAccessOp(op));
|
||||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(op != nullptr, "RandomAccessOp init failed, as it is nullptr.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(op != nullptr, "[Internal ERROR] RandomAccessOp init failed, as it is nullptr.");
|
||||
|
||||
// If there's a child sampler, set the row count to be it's sample count
|
||||
if (HasChildSampler()) {
|
||||
|
@ -114,7 +114,7 @@ Status SamplerRT::GetAllIdsThenReset(py::array *data) {
|
|||
{
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
if (Py_IsInitialized() == 0) {
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "Python Interpreter is finalized");
|
||||
return Status(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized");
|
||||
}
|
||||
try {
|
||||
RETURN_IF_NOT_OK(sample_ids->GetDataAsNumpy(data));
|
||||
|
@ -127,7 +127,9 @@ Status SamplerRT::GetAllIdsThenReset(py::array *data) {
|
|||
#endif
|
||||
|
||||
Status SamplerRT::SetNumSamples(int64_t num_samples) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "Invalid parameter, num_samples must be greater than or equal to 0.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
num_samples >= 0,
|
||||
"Invalid parameter, 'num_samples' must be greater than or equal to 0, but got " + std::to_string(num_samples));
|
||||
num_samples_ = num_samples;
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -161,13 +163,13 @@ Status SamplerRT::AddChild(std::shared_ptr<SamplerRT> child) {
|
|||
// Only samplers can be added, not any other DatasetOp.
|
||||
std::shared_ptr<SamplerRT> sampler = std::dynamic_pointer_cast<SamplerRT>(child);
|
||||
if (!sampler) {
|
||||
std::string err_msg("Cannot add child, child is not a sampler object.");
|
||||
std::string err_msg("[Internal ERROR] Cannot add child, child is not a sampler object.");
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
// Samplers can have at most 1 child.
|
||||
if (!child_.empty()) {
|
||||
std::string err_msg("Cannot add child sampler, this sampler already has a child.");
|
||||
std::string err_msg("[Internal ERROR] Cannot add child sampler, this sampler already has a child.");
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ class RandomAccessOp {
|
|||
// @param std::map<int64_t, std::vector<int64_t>> * map
|
||||
// @return Status The status code returned
|
||||
virtual Status GetClassIds(std::map<int32_t, std::vector<int64_t>> *map) const {
|
||||
RETURN_STATUS_UNEXPECTED("GetClassIds needs to be override to support PK");
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] GetClassIds needs to be override to support PK.");
|
||||
}
|
||||
|
||||
// default destructor
|
||||
|
|
|
@ -29,7 +29,7 @@ Status SequentialSamplerRT::GetNextSample(TensorRow *out) {
|
|||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
if (id_count_ > num_samples_) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" +
|
||||
"[Internal ERROR] Sampler index must be less than or equal to num_samples(total rows in dataset), but got:" +
|
||||
std::to_string(id_count_) + ", num_samples_: " + std::to_string(num_samples_));
|
||||
} else if (id_count_ == num_samples_) {
|
||||
(*out) = TensorRow(TensorRow::kFlagEOE);
|
||||
|
|
|
@ -41,10 +41,9 @@ Status WeightedRandomSamplerRT::InitSampler() {
|
|||
if (num_samples_ == 0 || num_samples_ > num_rows_) {
|
||||
num_samples_ = num_rows_;
|
||||
}
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
num_rows_ > 0 && num_samples_,
|
||||
"Invalid parameter, num_samples and num_rows must be greater than 0, but got num_rows: " +
|
||||
std::to_string(num_rows_) + ", num_samples: " + std::to_string(num_samples_));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(num_rows_ > 0 && num_samples_,
|
||||
"[Internal ERROR] num_samples and num_rows must be greater than 0, but got num_rows: " +
|
||||
std::to_string(num_rows_) + ", num_samples: " + std::to_string(num_samples_));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(samples_per_tensor_ > 0,
|
||||
"Invalid parameter, samples_per_tensor(num_samples) must be greater than 0, but got " +
|
||||
std::to_string(samples_per_tensor_) + ".\n");
|
||||
|
@ -160,8 +159,9 @@ Status WeightedRandomSamplerRT::GetNextSample(TensorRow *out) {
|
|||
}
|
||||
|
||||
if (genId >= num_rows_) {
|
||||
RETURN_STATUS_UNEXPECTED("Generated indice is out of bound, expect range [0, num_data-1], got indice: " +
|
||||
std::to_string(genId) + ", num_data: " + std::to_string(num_rows_ - 1));
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"[Internal ERROR] Generated indice is out of bound, expect range [0, num_data-1], got indice: " +
|
||||
std::to_string(genId) + ", num_data: " + std::to_string(num_rows_ - 1));
|
||||
}
|
||||
|
||||
if (HasChildSampler()) {
|
||||
|
|
|
@ -76,7 +76,8 @@ Status SBUOp::ReadImageToTensor(const std::string &path, std::shared_ptr<Tensor>
|
|||
if (decode_ == true) {
|
||||
Status rc = Decode(*tensor, tensor);
|
||||
if (rc.IsError()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, failed to decode image: " + path);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid image, failed to decode image:" + path +
|
||||
", the image is damaged or permission denied.");
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
|
@ -129,18 +130,21 @@ Status SBUOp::PrepareData() {
|
|||
Path root_dir(real_folder_path.value());
|
||||
|
||||
url_path_ = root_dir / url_file_name;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(url_path_.Exists() && !url_path_.IsDirectory(),
|
||||
"Invalid file, failed to find SBU url file: " + url_path_.ToString());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
url_path_.Exists() && !url_path_.IsDirectory(),
|
||||
"Invalid file, SBU url file: " + url_path_.ToString() + " does not exist or is a directory.");
|
||||
MS_LOG(INFO) << "SBU operator found url file " << url_path_.ToString() << ".";
|
||||
|
||||
caption_path_ = root_dir / caption_file_name;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(caption_path_.Exists() && !caption_path_.IsDirectory(),
|
||||
"Invalid file, failed to find SBU caption file: " + caption_path_.ToString());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
caption_path_.Exists() && !caption_path_.IsDirectory(),
|
||||
"Invalid file, SBU caption file: " + caption_path_.ToString() + " does not exist or is a directory.");
|
||||
MS_LOG(INFO) << "SBU operator found caption file " << caption_path_.ToString() << ".";
|
||||
|
||||
image_folder_ = root_dir / image_folder_name;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_folder_.Exists() && image_folder_.IsDirectory(),
|
||||
"Invalid folder, failed to find SBU image folder: " + image_folder_.ToString());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
image_folder_.Exists() && image_folder_.IsDirectory(),
|
||||
"Invalid folder, SBU image folder:" + image_folder_.ToString() + " does not exist or is not a directory.");
|
||||
MS_LOG(INFO) << "SBU operator found image folder " << image_folder_.ToString() << ".";
|
||||
|
||||
std::ifstream url_file_reader;
|
||||
|
@ -149,10 +153,11 @@ Status SBUOp::PrepareData() {
|
|||
url_file_reader.open(url_path_.ToString(), std::ios::in);
|
||||
caption_file_reader.open(caption_path_.ToString(), std::ios::in);
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(url_file_reader.is_open(),
|
||||
"Invalid file, failed to open SBU url file: " + url_path_.ToString());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(caption_file_reader.is_open(),
|
||||
"Invalid file, failed to open SBU caption file: " + caption_path_.ToString());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(url_file_reader.is_open(), "Invalid file, failed to open " + url_path_.ToString() +
|
||||
": the SBU url file is permission denied.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
caption_file_reader.is_open(),
|
||||
"Invalid file, failed to open " + caption_path_.ToString() + ": the SBU caption file is permission denied.");
|
||||
|
||||
Status rc = GetAvailablePairs(url_file_reader, caption_file_reader);
|
||||
url_file_reader.close();
|
||||
|
@ -172,8 +177,8 @@ Status SBUOp::GetAvailablePairs(std::ifstream &url_file_reader, std::ifstream &c
|
|||
while (std::getline(url_file_reader, url_line) && std::getline(caption_file_reader, caption_line)) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
(url_line.empty() && caption_line.empty()) || (!url_line.empty() && !caption_line.empty()),
|
||||
"Invalid data, SBU url and caption file are mismatched: " + url_path_.ToString() + " and " +
|
||||
caption_path_.ToString());
|
||||
"Invalid data, SBU url: " + url_path_.ToString() + " and caption file: " + caption_path_.ToString() +
|
||||
" load empty data at line: " + std::to_string(line_num) + ".");
|
||||
if (!url_line.empty() && !caption_line.empty()) {
|
||||
line_num++;
|
||||
RETURN_IF_NOT_OK(this->ParsePair(url_line, caption_line));
|
||||
|
@ -182,7 +187,8 @@ Status SBUOp::GetAvailablePairs(std::ifstream &url_file_reader, std::ifstream &c
|
|||
|
||||
image_caption_pairs_.shrink_to_fit();
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_caption_pairs_.size() > 0, "No valid images in " + image_folder_.ToString());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(image_caption_pairs_.size() > 0,
|
||||
"Invalid data, no valid images in " + image_folder_.ToString() + ", check SBU dataset.");
|
||||
|
||||
// base field of RandomAccessOp
|
||||
num_rows_ = image_caption_pairs_.size();
|
||||
|
|
|
@ -80,13 +80,14 @@ Status TextFileOp::LoadTensor(const std::string &line, TensorRow *out_row) {
|
|||
Status TextFileOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) {
|
||||
auto realpath = FileUtils::GetRealPath(file.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, " + DatasetName() + " get real path failed, path=" << file;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, " + DatasetName() + " get real path failed, path=" + file);
|
||||
MS_LOG(ERROR) << "Invalid file path, " << file << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, " + file + " does not exist.");
|
||||
}
|
||||
|
||||
std::ifstream handle(realpath.value());
|
||||
if (!handle.is_open()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open " + DatasetName() + ": " + file);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open text:" + file +
|
||||
", the file is damaged or permission denied.");
|
||||
}
|
||||
|
||||
int64_t rows_total = 0;
|
||||
|
@ -170,13 +171,13 @@ Status TextFileOp::FillIOBlockQueue(const std::vector<int64_t> &i_keys) {
|
|||
int64_t CountTotalRows(const std::string &file) {
|
||||
auto realpath = FileUtils::GetRealPath(file.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << file;
|
||||
MS_LOG(ERROR) << "Invalid file, " << file << " does not exist.";
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::ifstream handle(realpath.value());
|
||||
if (!handle.is_open()) {
|
||||
MS_LOG(ERROR) << "Invalid file, failed to open file: " << file;
|
||||
MS_LOG(ERROR) << "Invalid file, failed to open text file:" << file << ", the file is damaged or permission denied.";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ const int64_t kTFRecordFileLimit = 0x140000000;
|
|||
bool TFReaderOp::ValidateFirstRowCrc(const std::string &filename) {
|
||||
auto realpath = FileUtils::GetRealPath(filename.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filename;
|
||||
MS_LOG(ERROR) << "Invalid file path, " << filename << " does not exist.";
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -126,7 +126,7 @@ Status TFReaderOp::Init() {
|
|||
}
|
||||
if (total_rows_ < 0) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid parameter, num_samples or num_rows for TFRecordDataset must be greater than 0, but got: " +
|
||||
"[Internal ERROR] num_samples or num_rows for TFRecordDataset must be greater than 0, but got: " +
|
||||
std::to_string(total_rows_));
|
||||
}
|
||||
|
||||
|
@ -267,14 +267,14 @@ Status TFReaderOp::FillIOBlockNoShuffle() {
|
|||
Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, int64_t end_offset, int32_t worker_id) {
|
||||
auto realpath = FileUtils::GetRealPath(filename.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filename;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + filename);
|
||||
MS_LOG(ERROR) << "Invalid file path, " << filename << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, " + filename + " does not exist.");
|
||||
}
|
||||
|
||||
std::ifstream reader;
|
||||
reader.open(realpath.value());
|
||||
if (!reader) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + filename);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, " + filename + " open failed: permission denied!");
|
||||
}
|
||||
|
||||
int64_t rows_read = 0;
|
||||
|
@ -304,7 +304,7 @@ Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, i
|
|||
if (start_offset == kInvalidOffset || (rows_total >= start_offset && rows_total < end_offset)) {
|
||||
dataengine::Example tf_file;
|
||||
if (!tf_file.ParseFromString(serialized_example)) {
|
||||
std::string errMsg = "Invalid file, failed to parse tfrecord file : " + filename;
|
||||
std::string errMsg = "Failed to parse tfrecord file: " + filename + ", make sure protobuf version is suitable.";
|
||||
MS_LOG(DEBUG) << errMsg + ", details of string: " << serialized_example;
|
||||
RETURN_STATUS_UNEXPECTED(errMsg);
|
||||
}
|
||||
|
@ -333,7 +333,8 @@ Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, TensorRow *ou
|
|||
const google::protobuf::Map<std::string, dataengine::Feature> &feature_map = example_features.feature();
|
||||
auto iter_column = feature_map.find(current_col.Name());
|
||||
if (iter_column == feature_map.end()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.Name() + " does not exist.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid columns_list, column name: " + current_col.Name() +
|
||||
" does not exist in tfrecord file, check tfrecord files.");
|
||||
}
|
||||
const dataengine::Feature &column_values_list = iter_column->second;
|
||||
RETURN_IF_NOT_OK(LoadFeature(out_row, column_values_list, current_col, col));
|
||||
|
@ -383,11 +384,13 @@ Status TFReaderOp::LoadFeature(TensorRow *tensor_row, const dataengine::Feature
|
|||
break;
|
||||
}
|
||||
case dataengine::Feature::KindCase::KIND_NOT_SET: {
|
||||
std::string err_msg = "Invalid data, column type in tf record file must be uint8, int64 or float32.";
|
||||
std::string err_msg =
|
||||
"Unrecognized datatype, column type in tfrecord file must be uint8, int64 or float32, check tfrecord file.";
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
default: {
|
||||
std::string err_msg = "Invalid data, column type in tf record file must be uint8, int64 or float32.";
|
||||
std::string err_msg =
|
||||
"Unrecognized datatype, column type in tfrecord file must be uint8, int64 or float32, check tfrecord file.";
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
}
|
||||
|
@ -404,8 +407,8 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
|
|||
// Must be single byte type for each element!
|
||||
if (current_col.Type() != DataType::DE_UINT8 && current_col.Type() != DataType::DE_INT8 &&
|
||||
current_col.Type() != DataType::DE_STRING) {
|
||||
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
|
||||
", data type should be int8, uint8 or string, but got " + current_col.Type().ToString();
|
||||
std::string err_msg = "Invalid column type, the column type of " + current_col.Name() +
|
||||
" should be int8, uint8 or string, but got " + current_col.Type().ToString();
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
@ -439,7 +442,8 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
|
|||
for (int i = 1; i < cur_shape.Size(); ++i) {
|
||||
if (cur_shape[i] == TensorShape::kDimUnknown) {
|
||||
std::string err_msg =
|
||||
"Invalid data, more than one unknown dimension in the shape of column: " + current_col.Name();
|
||||
"Invalid data dimension, only one dimension shape supported is -1, but the 0th and the" +
|
||||
std::to_string(i) + "th dimension shape of " + current_col.Name() + " are both -1.";
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
new_pad_size *= cur_shape[i];
|
||||
|
@ -447,10 +451,10 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
|
|||
pad_size = new_pad_size;
|
||||
} else {
|
||||
if (cur_shape.known() && cur_shape.NumOfElements() != max_size) {
|
||||
std::string err_msg = "Invalid data, shape in schema's column '" + current_col.Name() + "' is incorrect." +
|
||||
"\nshape received: " + cur_shape.ToString() +
|
||||
"\ntotal elements in shape received: " + std::to_string(cur_shape.NumOfElements()) +
|
||||
"\nexpected total elements in shape: " + std::to_string(max_size);
|
||||
std::string err_msg = "Data dimensions of '" + current_col.Name() +
|
||||
"' do not match, the expected total elements of shape " + cur_shape.ToString() +
|
||||
" should be " + std::to_string(max_size) + ", but got " +
|
||||
std::to_string(cur_shape.NumOfElements());
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
}
|
||||
|
@ -469,8 +473,8 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng
|
|||
// KFloatList can only map to DE types:
|
||||
// DE_FLOAT32
|
||||
if (current_col.Type() != DataType::DE_FLOAT32) {
|
||||
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
|
||||
", data type should be string, but got " + current_col.Type().ToString();
|
||||
std::string err_msg = "Invalid column type, the column type of " + current_col.Name() +
|
||||
" should be string, but got " + current_col.Type().ToString();
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
@ -507,9 +511,9 @@ Status TFReaderOp::LoadIntListSwitch(const ColDescriptor ¤t_col, const dat
|
|||
} else if (current_col.Type() == DataType::DE_INT8) {
|
||||
RETURN_IF_NOT_OK(LoadIntList<int8_t>(current_col, column_values_list, num_elements, tensor));
|
||||
} else {
|
||||
std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.Name() +
|
||||
", data type should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8" +
|
||||
", but got " + current_col.Type().ToString();
|
||||
std::string err_msg = "Invalid column type, the column type of " + current_col.Name() +
|
||||
" should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8, but got " +
|
||||
current_col.Type().ToString();
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
@ -522,8 +526,8 @@ template <typename T>
|
|||
Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list,
|
||||
int32_t *num_elements, std::shared_ptr<Tensor> *tensor) {
|
||||
if (!(current_col.Type().IsInt())) {
|
||||
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
|
||||
", data type should be int, but got " + current_col.Type().ToString();
|
||||
std::string err_msg = "Invalid column type, the column type of " + current_col.Name() + " should be int, but got " +
|
||||
current_col.Type().ToString();
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
@ -551,8 +555,8 @@ Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengin
|
|||
Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::string> columns_to_load) {
|
||||
auto realpath = FileUtils::GetRealPath(tf_file.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << tf_file;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + tf_file);
|
||||
MS_LOG(ERROR) << "Invalid file path, " << tf_file << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, " + tf_file + " does not exist.");
|
||||
}
|
||||
|
||||
std::ifstream reader;
|
||||
|
@ -572,7 +576,8 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::stri
|
|||
|
||||
dataengine::Example example;
|
||||
if (!example.ParseFromString(serialized_example)) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse tfrecord file: " + serialized_example);
|
||||
RETURN_STATUS_UNEXPECTED("Failed to parse tfrecord file: " + realpath.value() +
|
||||
", fields that failed to parse: " + serialized_example);
|
||||
}
|
||||
|
||||
const dataengine::Features &example_features = example.features();
|
||||
|
@ -587,7 +592,7 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::stri
|
|||
for (const auto &curr_col_name : columns_to_load) {
|
||||
auto it = feature_map.find(curr_col_name);
|
||||
if (it == feature_map.end()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, failed to find column name: " + curr_col_name);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid columns_list, tfrecord file failed to find column name: " + curr_col_name);
|
||||
}
|
||||
std::string column_name = it->first;
|
||||
|
||||
|
@ -609,10 +614,12 @@ Status TFReaderOp::CreateSchema(const std::string tf_file, std::vector<std::stri
|
|||
break;
|
||||
|
||||
case dataengine::Feature::KindCase::KIND_NOT_SET:
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, column type of tf record file must be uint8, int64 or float32.");
|
||||
RETURN_STATUS_UNEXPECTED("Unrecognized column type, the column type of " + column_name +
|
||||
" should be uint8, int64 or float32, but got unrecognized column type.");
|
||||
|
||||
default:
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, column type of tf record file must be uint8, int64 or float32.");
|
||||
RETURN_STATUS_UNEXPECTED("Unsupported column type, the column type of " + column_name +
|
||||
" should be uint8, int64 or float32, but got unsupported column type.");
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(
|
||||
|
@ -633,7 +640,9 @@ Status TFReaderOp::CountTotalRows(int64_t *out_total_rows, const std::vector<std
|
|||
std::vector<std::future<int64_t>> async_results;
|
||||
|
||||
if (threads <= 0) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, the threads of TFReader should be greater than zero, but got zero.");
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid threads number, the threads number of TFReader should be greater than zero, but got " +
|
||||
std::to_string(threads) + ".");
|
||||
}
|
||||
int64_t chunk_size = filenames.size() / threads;
|
||||
int64_t remainder = filenames.size() % threads;
|
||||
|
@ -672,7 +681,7 @@ Status TFReaderOp::CountTotalRows(int64_t *out_total_rows, const std::vector<std
|
|||
*out_total_rows = total_rows;
|
||||
} catch (const std::exception &e) {
|
||||
std::string err_msg = "Unexpected error occurred: ";
|
||||
err_msg += e.what();
|
||||
err_msg += std::string(e.what());
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
@ -684,7 +693,7 @@ int64_t TFReaderOp::CountTotalRowsSectioned(const std::vector<std::string> &file
|
|||
for (int i = begin; i < end; i++) {
|
||||
auto realpath = FileUtils::GetRealPath(filenames[i].data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << filenames[i];
|
||||
MS_LOG(ERROR) << "Invalid file path, " << filenames[i] << " does not exist.";
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -107,7 +107,7 @@ int64_t USPSOp::CountRows(const std::string &data_file) {
|
|||
std::ifstream data_file_reader;
|
||||
data_file_reader.open(data_file, std::ios::in);
|
||||
if (!data_file_reader.is_open()) {
|
||||
MS_LOG(ERROR) << "Invalid file, failed to open file: " << data_file;
|
||||
MS_LOG(ERROR) << "Invalid file, failed to open " << data_file << ": the file is permission denied.";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -124,7 +124,8 @@ int64_t USPSOp::CountRows(const std::string &data_file) {
|
|||
|
||||
Status USPSOp::GetFiles() {
|
||||
auto real_dataset_dir = FileUtils::GetRealPath(dataset_dir_.data());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(real_dataset_dir.has_value(), "Get real path failed: " + dataset_dir_);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(real_dataset_dir.has_value(),
|
||||
"Invalid file path, USPS dataset dir: " + dataset_dir_ + " does not exist.");
|
||||
Path root_dir(real_dataset_dir.value());
|
||||
|
||||
const Path train_file_name("usps");
|
||||
|
@ -144,16 +145,18 @@ Status USPSOp::GetFiles() {
|
|||
|
||||
if (use_train) {
|
||||
Path train_path = root_dir / train_file_name;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(train_path.Exists() && !train_path.IsDirectory(),
|
||||
"Invalid file, failed to find USPS train data file: " + train_path.ToString());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
train_path.Exists() && !train_path.IsDirectory(),
|
||||
"Invalid file, USPS dataset train file: " + train_path.ToString() + " does not exist or is a directory.");
|
||||
data_files_list_.emplace_back(train_path.ToString());
|
||||
MS_LOG(INFO) << "USPS operator found train data file " << train_path.ToString() << ".";
|
||||
}
|
||||
|
||||
if (use_test) {
|
||||
Path test_path = root_dir / test_file_name;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(test_path.Exists() && !test_path.IsDirectory(),
|
||||
"Invalid file, failed to find USPS test data file: " + test_path.ToString());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
test_path.Exists() && !test_path.IsDirectory(),
|
||||
"Invalid file, USPS dataset test file: " + test_path.ToString() + " does not exist or is a directory.");
|
||||
data_files_list_.emplace_back(test_path.ToString());
|
||||
MS_LOG(INFO) << "USPS operator found test data file " << test_path.ToString() << ".";
|
||||
}
|
||||
|
@ -163,7 +166,8 @@ Status USPSOp::GetFiles() {
|
|||
Status USPSOp::LoadFile(const std::string &data_file, int64_t start_offset, int64_t end_offset, int32_t worker_id) {
|
||||
std::ifstream data_file_reader(data_file);
|
||||
if (!data_file_reader.is_open()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + data_file);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open USPS dataset file: " + data_file +
|
||||
", the file is permission denied.");
|
||||
}
|
||||
|
||||
int64_t rows_total = 0;
|
||||
|
@ -210,8 +214,8 @@ Status USPSOp::LoadTensor(std::string *line, TensorRow *trow) {
|
|||
auto images_buffer = std::make_unique<unsigned char[]>(kUSPSImageSize);
|
||||
auto labels_buffer = std::make_unique<uint32_t[]>(1);
|
||||
if (images_buffer == nullptr || labels_buffer == nullptr) {
|
||||
MS_LOG(ERROR) << "Failed to allocate memory for USPS buffer.";
|
||||
RETURN_STATUS_UNEXPECTED("Failed to allocate memory for USPS buffer.");
|
||||
MS_LOG(ERROR) << "[Internal ERROR] Failed to allocate memory for USPS buffer.";
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Failed to allocate memory for USPS buffer.");
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(this->ParseLine(line, images_buffer, labels_buffer));
|
||||
|
@ -245,10 +249,12 @@ Status USPSOp::ParseLine(std::string *line, const std::unique_ptr<unsigned char[
|
|||
} else {
|
||||
size_t split_pos = item.find(":");
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(split_pos != std::string::npos, "Invalid data, USPS data file is corrupted.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(split_pos != std::string::npos,
|
||||
"Invalid data, split character ':' is missing in USPS data file.");
|
||||
// check pixel index
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(std::stoi(item.substr(0, split_pos)) == (split_num - 1),
|
||||
"Invalid data, USPS data file is corrupted.");
|
||||
"Invalid data, the character before ':' should be " + std::to_string(split_num - 1) +
|
||||
", but got " + item.substr(0, split_pos) + ".");
|
||||
|
||||
std::string pixel_str = item.substr(split_pos + 1, item.length() - split_pos);
|
||||
// transform the real pixel value from [-1, 1] to the integers within [0, 255]
|
||||
|
@ -257,7 +263,10 @@ Status USPSOp::ParseLine(std::string *line, const std::unique_ptr<unsigned char[
|
|||
line->erase(0, pos + 1);
|
||||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(split_num == (kUSPSImageSize + 1), "Invalid data, USPS data file is corrupted.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(split_num == (kUSPSImageSize + 1),
|
||||
"Invalid data, the number of split characters ':' in USPS data file is corrupted, "
|
||||
"should be " +
|
||||
std::to_string(kUSPSImageSize + 1) + ", but got " + std::to_string(split_num) + ".");
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -274,7 +283,7 @@ Status USPSOp::CalculateNumRowsPerShard() {
|
|||
}
|
||||
std::string file_list = ss.str();
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
"Invalid data, USPSDataset API can't read the data file (interface mismatch or no data found). "
|
||||
"Invalid data, 'USPSDataset' API can't read the data file (interface mismatch or no data found). "
|
||||
"Check file: " +
|
||||
file_list);
|
||||
}
|
||||
|
|
|
@ -118,14 +118,15 @@ Status VOCOp::ParseImageIds() {
|
|||
|
||||
auto realpath = FileUtils::GetRealPath(image_sets_file.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << image_sets_file;
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + image_sets_file);
|
||||
MS_LOG(ERROR) << "Invalid file path, " << image_sets_file << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, " + image_sets_file + " does not exist.");
|
||||
}
|
||||
|
||||
std::ifstream in_file;
|
||||
in_file.open(realpath.value());
|
||||
if (in_file.fail()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + image_sets_file);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid ImageSets file, failed to open ImageSets file: " + image_sets_file +
|
||||
", the file is damaged or permission denied.");
|
||||
}
|
||||
std::string id;
|
||||
while (getline(in_file, id)) {
|
||||
|
@ -187,28 +188,30 @@ Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float
|
|||
std::string invalid_bbox = "{" + std::to_string(static_cast<int>(xmin)) + ", " +
|
||||
std::to_string(static_cast<int>(ymin)) + ", " + std::to_string(static_cast<int>(xmax)) +
|
||||
", " + std::to_string(static_cast<int>(ymax)) + "}";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid bndbox: " + invalid_bbox + " found in " + path);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid bndbox, the coordinate of bndbox in " + path +
|
||||
" should be greater than 0, but got " + invalid_bbox);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status VOCOp::ParseAnnotationBbox(const std::string &path) {
|
||||
if (!Path(path).Exists()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + path);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, " + path + " does not exist.");
|
||||
}
|
||||
Annotation annotation;
|
||||
XMLDocument doc;
|
||||
XMLError e = doc.LoadFile(common::SafeCStr(path));
|
||||
if (e != XMLError::XML_SUCCESS) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to load xml file: " + path);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load " + path + ": the xml file is damaged or incorrect format.");
|
||||
}
|
||||
XMLElement *root = doc.RootElement();
|
||||
if (root == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, failed to load root element for xml file.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load root element of " + path +
|
||||
": the format of xml file is incorrect.");
|
||||
}
|
||||
XMLElement *object = root->FirstChildElement("object");
|
||||
if (object == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, no object found in " + path);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid xml, the node of object is missing in " + path + ".");
|
||||
}
|
||||
while (object != nullptr) {
|
||||
std::string label_name;
|
||||
|
@ -226,7 +229,7 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) {
|
|||
ParseNodeValue(bbox_node, "ymax", &ymax);
|
||||
RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path));
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid xml, the node of bndbox is missing in " + path);
|
||||
}
|
||||
|
||||
if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 &&
|
||||
|
@ -254,7 +257,8 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co
|
|||
if (decode_ == true) {
|
||||
Status rc = Decode(*tensor, tensor);
|
||||
if (rc.IsError()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, failed to decode image: " + path);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid image, failed to decode " + path +
|
||||
": the image is damaged or permission denied.");
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
|
@ -280,7 +284,7 @@ Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) {
|
|||
}
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
item.second.size() == 6,
|
||||
"Invalid parameter, annotation only support 6 parameters, but got " + std::to_string(item.second.size()));
|
||||
"[Internal ERROR] annotation only support 6 parameters, but got " + std::to_string(item.second.size()));
|
||||
|
||||
std::vector<float> tmp_bbox = {(item.second)[0], (item.second)[1], (item.second)[2], (item.second)[3]};
|
||||
bbox_data.insert(bbox_data.end(), tmp_bbox.begin(), tmp_bbox.end());
|
||||
|
@ -328,8 +332,8 @@ Status VOCOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<in
|
|||
RETURN_UNEXPECTED_IF_NULL(output_class_indexing);
|
||||
if ((*output_class_indexing).empty()) {
|
||||
if (task_type_ != TaskType::Detection) {
|
||||
MS_LOG(ERROR) << "Invalid parameter, GetClassIndexing only valid in \"Detection\" task.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, GetClassIndexing only valid in \"Detection\" task.");
|
||||
MS_LOG(ERROR) << "Invalid task, only 'Detection' task support GetClassIndexing.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid task, only 'Detection' task support GetClassIndexing.");
|
||||
}
|
||||
RETURN_IF_NOT_OK(ParseImageIds());
|
||||
RETURN_IF_NOT_OK(ParseAnnotationIds());
|
||||
|
|
|
@ -49,12 +49,12 @@ YesNoOp::YesNoOp(const std::string &file_dir, int32_t num_workers, int32_t queue
|
|||
Status YesNoOp::PrepareData() {
|
||||
auto realpath = FileUtils::GetRealPath(dataset_dir_.data());
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Get real path failed, path=" << dataset_dir_;
|
||||
RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + dataset_dir_);
|
||||
MS_LOG(ERROR) << "Invalid file path, " << dataset_dir_ << " does not exist.";
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file path, " + dataset_dir_ + " does not exist.");
|
||||
}
|
||||
Path dir(realpath.value());
|
||||
if (dir.Exists() == false || dir.IsDirectory() == false) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, failed to open speech commands: " + dataset_dir_);
|
||||
RETURN_STATUS_UNEXPECTED("Invalid directory, " + dataset_dir_ + " does not exist or is not a directory.");
|
||||
}
|
||||
std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(&dir);
|
||||
RETURN_UNEXPECTED_IF_NULL(dir_itr);
|
||||
|
@ -101,8 +101,9 @@ Status YesNoOp::Split(const std::string &line, std::vector<int32_t> *split_num)
|
|||
split_num->emplace_back(stoi(split[i]));
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
MS_LOG(ERROR) << "Converting char to int confront with an error in function stoi().";
|
||||
RETURN_STATUS_UNEXPECTED("Converting char to int confront with an error in function stoi().");
|
||||
MS_LOG(ERROR) << "[Internal ERROR] Converting char to int confront with an error in function stoi: " << e.what();
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Converting char to int confront with an error in function stoi: " +
|
||||
std::string(e.what()));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ void TakeOp::Print(std::ostream &out, bool show_all) const {
|
|||
}
|
||||
}
|
||||
|
||||
Status TakeOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); }
|
||||
Status TakeOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] TakeOp is an inlined operator."); }
|
||||
|
||||
Status TakeOp::GetNextRow(TensorRow *row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
|
|
|
@ -101,9 +101,7 @@ Status ZipOp::ComputeColMap() {
|
|||
int32_t old_id = pair.second;
|
||||
// check if name already exists in column name descriptor
|
||||
if (column_name_id_map_.count(name) == 1) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, key: " + name +
|
||||
" already exists when zipping datasets. Check for duplicate key names in different "
|
||||
"dataset.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, duplicate column " + name + " already exists when zipping datasets.");
|
||||
}
|
||||
column_name_id_map_[name] = old_id + colsCurrent;
|
||||
}
|
||||
|
@ -115,7 +113,7 @@ Status ZipOp::ComputeColMap() {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ZipOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); }
|
||||
Status ZipOp::operator()() { RETURN_STATUS_UNEXPECTED("[Internal ERROR] ZipOp is an inlined operator."); }
|
||||
|
||||
Status ZipOp::GetNextRow(TensorRow *row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
|
|
|
@ -210,7 +210,7 @@ def test_cifar10_exception():
|
|||
with pytest.raises(ValueError, match=error_msg_6):
|
||||
ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_parallel_workers=256)
|
||||
|
||||
error_msg_7 = "no .bin files found"
|
||||
error_msg_7 = r"cifar\(.bin\) files are missing"
|
||||
with pytest.raises(RuntimeError, match=error_msg_7):
|
||||
ds1 = ds.Cifar10Dataset(NO_BIN_DIR)
|
||||
for _ in ds1.__iter__():
|
||||
|
@ -360,7 +360,7 @@ def test_cifar100_exception():
|
|||
with pytest.raises(ValueError, match=error_msg_6):
|
||||
ds.Cifar100Dataset(DATA_DIR_100, shuffle=False, num_parallel_workers=256)
|
||||
|
||||
error_msg_7 = "no .bin files found"
|
||||
error_msg_7 = r"cifar\(.bin\) files are missing"
|
||||
with pytest.raises(RuntimeError, match=error_msg_7):
|
||||
ds1 = ds.Cifar100Dataset(NO_BIN_DIR)
|
||||
for _ in ds1.__iter__():
|
||||
|
|
|
@ -300,7 +300,7 @@ def test_coco_case_exception():
|
|||
pass
|
||||
assert False
|
||||
except RuntimeError as e:
|
||||
assert "required node not found in JSON" in str(e)
|
||||
assert "the attribute of 'images' is missing" in str(e)
|
||||
|
||||
try:
|
||||
data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_CATEGORY_ID_FILE, task="Detection")
|
||||
|
@ -308,7 +308,7 @@ def test_coco_case_exception():
|
|||
pass
|
||||
assert False
|
||||
except RuntimeError as e:
|
||||
assert "category_id can't find in categories" in str(e)
|
||||
assert "the attribute of 'category_id': 7 is missing" in str(e)
|
||||
|
||||
try:
|
||||
data1 = ds.CocoDataset(DATA_DIR, annotation_file=INVALID_FILE, task="Detection")
|
||||
|
@ -316,7 +316,7 @@ def test_coco_case_exception():
|
|||
pass
|
||||
assert False
|
||||
except RuntimeError as e:
|
||||
assert "failed to open JSON file" in str(e)
|
||||
assert "Invalid annotation file, Coco Dataset annotation file:" in str(e)
|
||||
|
||||
try:
|
||||
sampler = ds.PKSampler(3)
|
||||
|
|
|
@ -239,7 +239,7 @@ def test_csv_dataset_exception():
|
|||
with pytest.raises(Exception) as err:
|
||||
for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
pass
|
||||
assert "failed to parse file" in str(err.value)
|
||||
assert "failed to parse" in str(err.value)
|
||||
|
||||
TEST_FILE1 = '../data/dataset/testCSV/quoted.csv'
|
||||
def exception_func(item):
|
||||
|
|
|
@ -359,7 +359,6 @@ def test_emnist_exception():
|
|||
with pytest.raises(RuntimeError, match=error_msg_8):
|
||||
data = ds.EMnistDataset(DATA_DIR, "mnist", "train")
|
||||
data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1)
|
||||
data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1)
|
||||
for _ in data.__iter__():
|
||||
pass
|
||||
with pytest.raises(RuntimeError, match=error_msg_8):
|
||||
|
|
|
@ -638,7 +638,7 @@ def test_generator_error_2():
|
|||
for _ in data1:
|
||||
pass
|
||||
print("========", str(info.value))
|
||||
assert "Generator should return a tuple of NumPy arrays" in str(info.value)
|
||||
assert "'GeneratorDataset' should return a tuple of NumPy arrays" in str(info.value)
|
||||
|
||||
|
||||
def test_generator_error_3():
|
||||
|
@ -663,7 +663,8 @@ def test_generator_error_4():
|
|||
|
||||
for _ in data1:
|
||||
pass
|
||||
assert "Unexpected error. Result of a tensorOp doesn't match output column names" in str(info.value)
|
||||
assert "the number of columns returned in 'map' operations should match the number of 'output_columns'"\
|
||||
in str(info.value)
|
||||
|
||||
|
||||
def test_generator_sequential_sampler():
|
||||
|
|
|
@ -167,7 +167,7 @@ def test_manifest_dataset_exception():
|
|||
pass
|
||||
assert False
|
||||
except RuntimeError as e:
|
||||
assert "Invalid data, 'source' is not found in Manifest file" in str(e)
|
||||
assert "Invalid manifest file, 'source' is missing in" in str(e)
|
||||
|
||||
NO_USAGE_DATA_FILE = "../data/dataset/testManifestData/invalidNoUsage.manifest"
|
||||
try:
|
||||
|
@ -176,7 +176,7 @@ def test_manifest_dataset_exception():
|
|||
pass
|
||||
assert False
|
||||
except RuntimeError as e:
|
||||
assert "Invalid data, 'usage' is not found in Manifest file" in str(e)
|
||||
assert "Invalid manifest file, 'usage' is missing in" in str(e)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -307,7 +307,7 @@ def test_tf_wrong_schema():
|
|||
pass
|
||||
except RuntimeError as e:
|
||||
exception_occurred = True
|
||||
assert "shape in schema's column 'image' is incorrect" in str(e)
|
||||
assert "Data dimensions of 'image' do not match" in str(e)
|
||||
|
||||
assert exception_occurred, "test_tf_wrong_schema failed."
|
||||
|
||||
|
@ -318,7 +318,7 @@ def test_tfrecord_invalid_columns():
|
|||
data = ds.TFRecordDataset(FILES, columns_list=invalid_columns_list)
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
_ = data.create_dict_iterator(num_epochs=1, output_numpy=True).__next__()
|
||||
assert "Invalid data, failed to find column name: not_exist" in str(info.value)
|
||||
assert "Invalid columns_list, tfrecord file failed to find column name: not_exist" in str(info.value)
|
||||
|
||||
|
||||
def test_tfrecord_exception():
|
||||
|
|
|
@ -214,12 +214,12 @@ def test_usps_exception():
|
|||
for _ in test_data.__iter__():
|
||||
pass
|
||||
|
||||
error_msg_9 = "failed to find USPS train data file"
|
||||
error_msg_9 = "usps does not exist or is a directory"
|
||||
with pytest.raises(RuntimeError, match=error_msg_9):
|
||||
train_data = ds.USPSDataset(WRONG_DIR, "train")
|
||||
for _ in train_data.__iter__():
|
||||
pass
|
||||
error_msg_10 = "failed to find USPS test data file"
|
||||
error_msg_10 = "usps.t does not exist or is a directory"
|
||||
with pytest.raises(RuntimeError, match=error_msg_10):
|
||||
test_data = ds.USPSDataset(WRONG_DIR, "test")
|
||||
for _ in test_data.__iter__():
|
||||
|
|
|
@ -240,7 +240,7 @@ def test_voc_exception():
|
|||
pass
|
||||
assert False
|
||||
except RuntimeError as e:
|
||||
assert "Invalid bndbox: {321, 121, 421, 120}" in str(e)
|
||||
assert "should be greater than 0, but got {321, 121, 421, 120}" in str(e)
|
||||
|
||||
def exception_func(item):
|
||||
raise Exception("Error occur!")
|
||||
|
|
|
@ -68,6 +68,9 @@ def test_auto_offload():
|
|||
dataset_auto_enabled.create_tuple_iterator(num_epochs=1, output_numpy=True)):
|
||||
np.testing.assert_array_equal(img_0, img_1)
|
||||
|
||||
# Need to turn off here or subsequent test cases will fail.
|
||||
ds.config.set_auto_offload(False)
|
||||
|
||||
|
||||
def test_offload_concat_dataset_1():
|
||||
"""
|
||||
|
|
|
@ -369,9 +369,12 @@ def test_multi_col_map():
|
|||
# test exceptions
|
||||
assert "output_columns with value 233 is not of type" in batch_map_config(2, 2, split_col, ["col2"], 233)
|
||||
assert "column_order with value 233 is not of type" in batch_map_config(2, 2, split_col, ["col2"], ["col1"], 233)
|
||||
assert "output_columns in batch is not set correctly" in batch_map_config(2, 2, split_col, ["col2"], ["col1"])
|
||||
assert "Incorrect number of columns" in batch_map_config(2, 2, split_col, ["col2"], ["col3", "col4", "col5"])
|
||||
assert "col-1 doesn't exist" in batch_map_config(2, 2, split_col, ["col-1"], ["col_x", "col_y"])
|
||||
assert "columns that are not involved in 'per_batch_map' should not be in output_columns"\
|
||||
in batch_map_config(2, 2, split_col, ["col2"], ["col1"])
|
||||
assert "the number of columns returned in 'per_batch_map' function should be 3"\
|
||||
in batch_map_config(2, 2, split_col, ["col2"], ["col3", "col4", "col5"])
|
||||
assert "'col-1' of 'input_columns' doesn't exist"\
|
||||
in batch_map_config(2, 2, split_col, ["col-1"], ["col_x", "col_y"])
|
||||
|
||||
|
||||
def test_exceptions_2():
|
||||
|
@ -379,16 +382,16 @@ def test_exceptions_2():
|
|||
for i in range(num):
|
||||
yield (np.array([i]),)
|
||||
|
||||
def simple_copy(colList, batchInfo):
|
||||
return ([np.copy(arr) for arr in colList],)
|
||||
def simple_copy(col_list, batch_info):
|
||||
return ([np.copy(arr) for arr in col_list],)
|
||||
|
||||
def concat_copy(colList, batchInfo):
|
||||
def concat_copy(col_list, batch_info):
|
||||
# this will duplicate the number of rows returned, which would be wrong!
|
||||
return ([np.copy(arr) for arr in colList] * 2,)
|
||||
return ([np.copy(arr) for arr in col_list] * 2,)
|
||||
|
||||
def shrink_copy(colList, batchInfo):
|
||||
def shrink_copy(col_list, batch_info):
|
||||
# this will duplicate the number of rows returned, which would be wrong!
|
||||
return ([np.copy(arr) for arr in colList][0:int(len(colList) / 2)],)
|
||||
return ([np.copy(arr) for arr in col_list][0:int(len(col_list) / 2)],)
|
||||
|
||||
def test_exceptions_config(gen_num, batch_size, in_cols, per_batch_map):
|
||||
data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).batch(batch_size, input_columns=in_cols,
|
||||
|
@ -401,9 +404,9 @@ def test_exceptions_2():
|
|||
return str(e)
|
||||
|
||||
# test exception where column name is incorrect
|
||||
assert "col:num1 doesn't exist" in test_exceptions_config(4, 2, ["num1"], simple_copy)
|
||||
assert "expects: 2 rows returned from per_batch_map, got: 4" in test_exceptions_config(4, 2, ["num"], concat_copy)
|
||||
assert "expects: 4 rows returned from per_batch_map, got: 2" in test_exceptions_config(4, 4, ["num"], shrink_copy)
|
||||
assert "'num1' of 'input_columns' doesn't exist" in test_exceptions_config(4, 2, ["num1"], simple_copy)
|
||||
assert "expects: 2 rows returned from 'per_batch_map', got: 4" in test_exceptions_config(4, 2, ["num"], concat_copy)
|
||||
assert "expects: 4 rows returned from 'per_batch_map', got: 2" in test_exceptions_config(4, 4, ["num"], shrink_copy)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in New Issue