!14661 【MD】fix bug for codespell
From: @xulei2020 Reviewed-by: @pandoublefeng,@heleiwang Signed-off-by: @pandoublefeng
This commit is contained in:
commit
e7661ce8e8
|
@ -286,7 +286,7 @@ elseif(BUILD_MINDDATA STREQUAL "wrapper")
|
|||
${MINDDATA_DIR}/kernels/image/crop_op.cc
|
||||
${MINDDATA_DIR}/kernels/image/normalize_op.cc
|
||||
${MINDDATA_DIR}/kernels/image/resize_op.cc
|
||||
${MINDDATA_DIR}/kernels/image/resize_preserve_ar_op.cc.cc
|
||||
${MINDDATA_DIR}/kernels/image/resize_preserve_ar_op.cc
|
||||
${MINDDATA_DIR}/kernels/image/rgb_to_gray_op.cc
|
||||
${MINDDATA_DIR}/kernels/image/rotate_op.cc
|
||||
${MINDDATA_DIR}/kernels/data/compose_op.cc
|
||||
|
|
|
@ -38,7 +38,7 @@ typedef struct MDToDConf {
|
|||
int ResizeSizeWH[2];
|
||||
int fixOrientation;
|
||||
int CropSizeWH[2];
|
||||
int64_t fileid; // -1 All files, otherwise get a single specifc file
|
||||
int64_t fileid; // -1 All files, otherwise get a single specific file
|
||||
} MDToDConf_t;
|
||||
|
||||
typedef struct MDToDResult {
|
||||
|
|
|
@ -62,7 +62,7 @@ AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string
|
|||
bool StrComp(const std::string &a, const std::string &b) {
|
||||
// returns 1 if string "a" represent a numeric value less than string "b"
|
||||
// the following will always return name, provided there is only one "." character in name
|
||||
// "." character is guaranteed to exist since the extension is checked befor this function call.
|
||||
// "." character is guaranteed to exist since the extension is checked before this function call.
|
||||
int64_t value_a = std::atoi(a.substr(1, a.find(".")).c_str());
|
||||
int64_t value_b = std::atoi(b.substr(1, b.find(".")).c_str());
|
||||
return value_a < value_b;
|
||||
|
@ -385,10 +385,93 @@ Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num,
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status AlbumOp::LoadIntTensorRowByIndex(int index, bool is_array, const nlohmann::json &column_value,
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
|
||||
int i = index;
|
||||
// int value
|
||||
if (!is_array &&
|
||||
(data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
}
|
||||
// int array
|
||||
if (is_array &&
|
||||
(data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status AlbumOp::LoadTensorRowByIndex(int index, const std::string &file, const nlohmann::json &js,
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
|
||||
int i = index;
|
||||
// special case to handle
|
||||
if (data_schema_->column(i).name() == "id") {
|
||||
// id is internal, special case to load from file
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
}
|
||||
// find if key does not exist, insert placeholder nullptr if not found
|
||||
if (js.find(data_schema_->column(i).name()) == js.end()) {
|
||||
// iterator not found, push nullptr as placeholder
|
||||
MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
}
|
||||
nlohmann::json column_value = js.at(data_schema_->column(i).name());
|
||||
MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
|
||||
bool is_array = column_value.is_array();
|
||||
// load single string
|
||||
if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
}
|
||||
// load string array
|
||||
if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
}
|
||||
// load image file
|
||||
if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
|
||||
std::string image_file_path = column_value;
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
uint32_t orientation = GetOrientation(image_file_path);
|
||||
TensorPtr scalar_tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar<uint32_t>(orientation, &scalar_tensor));
|
||||
(*map_row)["orientation"] = scalar_tensor;
|
||||
}
|
||||
// load float value
|
||||
if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
||||
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
}
|
||||
// load float array
|
||||
if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
||||
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(LoadIntTensorRowByIndex(i, is_array, column_value, map_row));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorRow in a DataBuffer
|
||||
// possible optimization: the helper functions of LoadTensorRow should be optimized
|
||||
// to take a reference to a column descriptor?
|
||||
// the design of this class is to make the code more readable, forgoing minor perfomance gain like
|
||||
// the design of this class is to make the code more readable, forgoing minor performance gain like
|
||||
// getting rid of duplicated checks
|
||||
Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file,
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
|
||||
|
@ -414,87 +497,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file,
|
|||
if (!IsReadColumn(data_schema_->column(i).name())) {
|
||||
continue;
|
||||
}
|
||||
// special case to handle
|
||||
if (data_schema_->column(i).name() == "id") {
|
||||
// id is internal, special case to load from file
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
continue;
|
||||
}
|
||||
// find if key does not exist, insert placeholder nullptr if not found
|
||||
if (js.find(data_schema_->column(i).name()) == js.end()) {
|
||||
// iterator not found, push nullptr as placeholder
|
||||
MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
continue;
|
||||
}
|
||||
nlohmann::json column_value = js.at(data_schema_->column(i).name());
|
||||
MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
|
||||
bool is_array = column_value.is_array();
|
||||
// load single string
|
||||
if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
continue;
|
||||
}
|
||||
// load string array
|
||||
if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
continue;
|
||||
}
|
||||
// load image file
|
||||
if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
|
||||
std::string image_file_path = column_value;
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
uint32_t orientation = GetOrientation(image_file_path);
|
||||
TensorPtr scalar_tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar<uint32_t>(orientation, &scalar_tensor));
|
||||
(*map_row)["orientation"] = scalar_tensor;
|
||||
continue;
|
||||
}
|
||||
// load float value
|
||||
if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
||||
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
continue;
|
||||
}
|
||||
// load float array
|
||||
if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
||||
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
continue;
|
||||
}
|
||||
// int value
|
||||
if (!is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
|
||||
data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
continue;
|
||||
}
|
||||
// int array
|
||||
if (is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
|
||||
data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||
TensorPtr tensor;
|
||||
RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor));
|
||||
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||
continue;
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported.";
|
||||
continue;
|
||||
}
|
||||
RETURN_IF_NOT_OK(LoadTensorRowByIndex(i, file, js, map_row));
|
||||
}
|
||||
} catch (const std::exception &err) {
|
||||
file_handle.close();
|
||||
|
|
|
@ -166,6 +166,12 @@ class AlbumOp {
|
|||
/// \param[in] column_name
|
||||
bool IsReadColumn(const std::string &column_name);
|
||||
|
||||
Status LoadTensorRowByIndex(int index, const std::string &file, const nlohmann::json &js,
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row);
|
||||
|
||||
Status LoadIntTensorRowByIndex(int index, bool is_array, const nlohmann::json &column_value,
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row);
|
||||
|
||||
std::string folder_path_; // directory of image folder
|
||||
bool decode_;
|
||||
std::vector<std::string> columns_to_load_;
|
||||
|
|
Loading…
Reference in New Issue