forked from mindspore-Ecosystem/mindspore
Temp fix to get rid of decoding png
Removed Api namespace in testcase, not linked for package size removed extra tensor ops Fix Center crop and pad Added file error check, return emtpy if wrong Added fix to Album Add missing syntax Added wrapper Fix clang Added cpp lint part 2 clang 3 clang 4 Fixed typos 1` Roll back size optimization Added clang fix 5 Lint fix 6 Added Cpp lint fix 7
This commit is contained in:
parent
6a2b3a4ee1
commit
77d2b3c4fc
|
@ -225,20 +225,21 @@ Status AlbumOp::WorkerEntry(int32_t worker_id) {
|
||||||
|
|
||||||
// Only support JPEG/PNG/GIF/BMP
|
// Only support JPEG/PNG/GIF/BMP
|
||||||
// Optimization: Could take in a tensor
|
// Optimization: Could take in a tensor
|
||||||
Status AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
|
// This function does not return status because we want to just skip bad input, not crash
|
||||||
|
bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
|
||||||
std::ifstream file_handle;
|
std::ifstream file_handle;
|
||||||
constexpr int read_num = 3;
|
constexpr int read_num = 3;
|
||||||
*valid = false;
|
*valid = false;
|
||||||
file_handle.open(file_name, std::ios::binary | std::ios::in);
|
file_handle.open(file_name, std::ios::binary | std::ios::in);
|
||||||
if (!file_handle.is_open()) {
|
if (!file_handle.is_open()) {
|
||||||
RETURN_STATUS_UNEXPECTED("Invalid file, can not open image file: " + file_name);
|
return false;
|
||||||
}
|
}
|
||||||
unsigned char file_type[read_num];
|
unsigned char file_type[read_num];
|
||||||
(void)file_handle.read(reinterpret_cast<char *>(file_type), read_num);
|
(void)file_handle.read(reinterpret_cast<char *>(file_type), read_num);
|
||||||
|
|
||||||
if (file_handle.fail()) {
|
if (file_handle.fail()) {
|
||||||
file_handle.close();
|
file_handle.close();
|
||||||
RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name);
|
return false;
|
||||||
}
|
}
|
||||||
file_handle.close();
|
file_handle.close();
|
||||||
if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) {
|
if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) {
|
||||||
|
@ -246,17 +247,8 @@ Status AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
|
||||||
// JPEG with EXIF stats with \xff\xd8\xff\xe1
|
// JPEG with EXIF stats with \xff\xd8\xff\xe1
|
||||||
// Use \xff\xd8\xff to cover both.
|
// Use \xff\xd8\xff to cover both.
|
||||||
*valid = true;
|
*valid = true;
|
||||||
} else if (file_type[0] == 0x89 && file_type[1] == 0x50 && file_type[2] == 0x4e) {
|
|
||||||
// It's a PNG
|
|
||||||
*valid = true;
|
|
||||||
} else if (file_type[0] == 0x47 && file_type[1] == 0x49 && file_type[2] == 0x46) {
|
|
||||||
// It's a GIF
|
|
||||||
*valid = true;
|
|
||||||
} else if (file_type[0] == 0x42 && file_type[1] == 0x4d) {
|
|
||||||
// It's a BMP
|
|
||||||
*valid = true;
|
|
||||||
}
|
}
|
||||||
return Status::OK();
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorRow *row) {
|
Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorRow *row) {
|
||||||
|
@ -264,22 +256,44 @@ Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col
|
||||||
std::ifstream fs;
|
std::ifstream fs;
|
||||||
fs.open(image_file_path, std::ios::binary | std::ios::in);
|
fs.open(image_file_path, std::ios::binary | std::ios::in);
|
||||||
if (fs.fail()) {
|
if (fs.fail()) {
|
||||||
MS_LOG(INFO) << "Image file not found:" << image_file_path << ".";
|
MS_LOG(WARNING) << "File not found:" << image_file_path << ".";
|
||||||
// If file doesn't exist, we don't flag this as error in input check, simply push back empty tensor
|
// If file doesn't exist, we don't flag this as error in input check, simply push back empty tensor
|
||||||
RETURN_STATUS_UNEXPECTED("Invalid file_path, failed to read file: " + image_file_path);
|
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
// Hack logic to replace png images with empty tensor
|
||||||
|
Path file(image_file_path);
|
||||||
|
std::set<std::string> png_ext = {".png", ".PNG"};
|
||||||
|
if (png_ext.find(file.Extension()) != png_ext.end()) {
|
||||||
|
// load empty tensor since image is not jpg
|
||||||
|
MS_LOG(INFO) << "PNG!" << image_file_path << ".";
|
||||||
|
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
// treat bin files separately
|
||||||
|
std::set<std::string> bin_ext = {".bin", ".BIN"};
|
||||||
|
if (bin_ext.find(file.Extension()) != bin_ext.end()) {
|
||||||
|
// load empty tensor since image is not jpg
|
||||||
|
MS_LOG(INFO) << "Bin file found" << image_file_path << ".";
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image));
|
||||||
|
row->push_back(std::move(image));
|
||||||
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
MS_LOG(INFO) << "Image file found: " << image_file_path << ".";
|
|
||||||
|
|
||||||
// check that the file is an image before decoding
|
// check that the file is an image before decoding
|
||||||
bool valid = false;
|
bool valid = false;
|
||||||
RETURN_IF_NOT_OK(CheckImageType(image_file_path, &valid));
|
bool check_success = CheckImageType(image_file_path, &valid);
|
||||||
|
if (!check_success || !valid) {
|
||||||
|
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
// if it is a jpeg image, load and try to decode
|
||||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image));
|
RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image));
|
||||||
if (decode_ && valid) {
|
if (decode_ && valid) {
|
||||||
Status rc = Decode(image, &image);
|
Status rc = Decode(image, &image);
|
||||||
if (rc.IsError()) {
|
if (rc.IsError()) {
|
||||||
std::string err = "Invalid data, failed to decode image: " + image_file_path;
|
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row));
|
||||||
RETURN_STATUS_UNEXPECTED(err);
|
return Status::OK();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
row->push_back(std::move(image));
|
row->push_back(std::move(image));
|
||||||
|
|
|
@ -189,8 +189,8 @@ class AlbumOp : public ParallelOp, public RandomAccessOp {
|
||||||
|
|
||||||
/// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP
|
/// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP
|
||||||
/// This function could be optimized to return the tensor to reduce open/closing files
|
/// This function could be optimized to return the tensor to reduce open/closing files
|
||||||
/// \return Status - The error code returned
|
/// \return bool - if file is bad then return false
|
||||||
Status CheckImageType(const std::string &file_name, bool *valid);
|
bool CheckImageType(const std::string &file_name, bool *valid);
|
||||||
|
|
||||||
// Base-class override for NodePass visitor acceptor.
|
// Base-class override for NodePass visitor acceptor.
|
||||||
// @param p - Pointer to the NodePass to be accepted.
|
// @param p - Pointer to the NodePass to be accepted.
|
||||||
|
|
|
@ -429,10 +429,11 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
|
||||||
bool ret = Pad(lite_mat_rgb, lite_mat_pad, pad_top, pad_bottom, pad_left, pad_right,
|
bool ret = Pad(lite_mat_rgb, lite_mat_pad, pad_top, pad_bottom, pad_left, pad_right,
|
||||||
PaddBorderType::PADD_BORDER_CONSTANT, fill_r, fill_g, fill_b);
|
PaddBorderType::PADD_BORDER_CONSTANT, fill_r, fill_g, fill_b);
|
||||||
CHECK_FAIL_RETURN_UNEXPECTED(ret, "Pad failed in lite cv");
|
CHECK_FAIL_RETURN_UNEXPECTED(ret, "Pad failed in lite cv");
|
||||||
|
// new shape for output tensor
|
||||||
|
TensorShape new_shape = TensorShape({lite_mat_pad.height_, lite_mat_pad.width_, input->shape()[2]});
|
||||||
std::shared_ptr<Tensor> output_tensor;
|
std::shared_ptr<Tensor> output_tensor;
|
||||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(input->shape(), input->type(),
|
RETURN_IF_NOT_OK(
|
||||||
static_cast<uchar *>(lite_mat_pad.data_ptr_), &output_tensor));
|
Tensor::CreateFromMemory(new_shape, input->type(), static_cast<uchar *>(lite_mat_pad.data_ptr_), &output_tensor));
|
||||||
*output = output_tensor;
|
*output = output_tensor;
|
||||||
} catch (std::runtime_error &e) {
|
} catch (std::runtime_error &e) {
|
||||||
RETURN_STATUS_UNEXPECTED("Error in image Pad.");
|
RETURN_STATUS_UNEXPECTED("Error in image Pad.");
|
||||||
|
|
|
@ -175,7 +175,7 @@ if (BUILD_MINDDATA STREQUAL "full")
|
||||||
"${MINDDATA_DIR}/kernels/image/cut_out_op.cc"
|
"${MINDDATA_DIR}/kernels/image/cut_out_op.cc"
|
||||||
"${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc"
|
"${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc"
|
||||||
"${MINDDATA_DIR}/kernels/image/equalize_op.cc"
|
"${MINDDATA_DIR}/kernels/image/equalize_op.cc"
|
||||||
"${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc"
|
"${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc"
|
||||||
"${MINDDATA_DIR}/kernels/image/image_utils.cc"
|
"${MINDDATA_DIR}/kernels/image/image_utils.cc"
|
||||||
"${MINDDATA_DIR}/kernels/image/invert_op.cc"
|
"${MINDDATA_DIR}/kernels/image/invert_op.cc"
|
||||||
"${MINDDATA_DIR}/kernels/image/math_utils.cc"
|
"${MINDDATA_DIR}/kernels/image/math_utils.cc"
|
||||||
|
@ -237,9 +237,9 @@ if (BUILD_MINDDATA STREQUAL "full")
|
||||||
)
|
)
|
||||||
include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache")
|
include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache")
|
||||||
|
|
||||||
# if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64))
|
if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64))
|
||||||
# set(MINDDATA_EXAMPLE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/example/jni-example.cc)
|
set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc)
|
||||||
# endif ()
|
endif ()
|
||||||
|
|
||||||
add_library(minddata-lite SHARED
|
add_library(minddata-lite SHARED
|
||||||
${MINDDATA_API_SRC_FILES}
|
${MINDDATA_API_SRC_FILES}
|
||||||
|
|
|
@ -0,0 +1,451 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "MDToDApi.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
#include "minddata/dataset/include/datasets.h"
|
||||||
|
#include "minddata/dataset/include/execute.h"
|
||||||
|
#include "minddata/dataset/util/path.h"
|
||||||
|
#include "minddata/dataset/include/vision.h"
|
||||||
|
#include "minddata/dataset/util/data_helper.h"
|
||||||
|
#if defined(__ANDROID__) || defined(ANDROID)
|
||||||
|
#include <android/log.h>
|
||||||
|
#include <android/asset_manager.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using mindspore::dataset::Path;
|
||||||
|
using mindspore::dataset::Tensor;
|
||||||
|
|
||||||
|
using mindspore::dataset;
|
||||||
|
|
||||||
|
using mindspore::LogStream;
|
||||||
|
using mindspore::MsLogLevel::DEBUG;
|
||||||
|
using mindspore::MsLogLevel::ERROR;
|
||||||
|
using mindspore::MsLogLevel::INFO;
|
||||||
|
|
||||||
|
using mindspore::dataset::BorderType;
|
||||||
|
using mindspore::dataset::InterpolationMode;
|
||||||
|
using mindspore::dataset::Status;
|
||||||
|
|
||||||
|
class MDToDApi {
|
||||||
|
public:
|
||||||
|
std::shared_ptr<Dataset> _ds;
|
||||||
|
std::shared_ptr<Iterator> _iter;
|
||||||
|
std::vector<std::shared_ptr<TensorOperation>> _augs;
|
||||||
|
std::string _storage_folder;
|
||||||
|
std::string _folder_path;
|
||||||
|
bool _hasBatch;
|
||||||
|
int64_t _file_id;
|
||||||
|
|
||||||
|
MDToDApi() : _ds(nullptr), _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) {
|
||||||
|
MS_LOG(WARNING) << "MDToDAPI Call constructor";
|
||||||
|
}
|
||||||
|
~MDToDApi() {
|
||||||
|
MS_LOG(WARNING) << "MDToDAPI Call destructor";
|
||||||
|
_augs.clear();
|
||||||
|
_ds = nullptr;
|
||||||
|
_iter = nullptr;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::string> MDToDBuffToVector(MDToDBuff_t StrBuff) {
|
||||||
|
std::vector<std::string> strVector;
|
||||||
|
if (StrBuff.DataSize > 0) {
|
||||||
|
const char *p = reinterpret_cast<char *>(StrBuff.Buff);
|
||||||
|
do {
|
||||||
|
strVector.push_back(std::string(p));
|
||||||
|
p += strVector.back().size() + 1;
|
||||||
|
} while (p < reinterpret_cast<char *>(StrBuff.Buff) + StrBuff.DataSize);
|
||||||
|
}
|
||||||
|
return strVector;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int MDToDApi_pathTest(const char *path) {
|
||||||
|
Path f(path);
|
||||||
|
MS_LOG(WARNING) << f.Exists() << f.IsDirectory() << f.ParentPath();
|
||||||
|
// Print out the first few items in the directory
|
||||||
|
auto dir_it = Path::DirIterator::OpenDirectory(&f);
|
||||||
|
MS_LOG(WARNING) << dir_it.get();
|
||||||
|
int i = 0;
|
||||||
|
while (dir_it->hasNext()) {
|
||||||
|
Path v = dir_it->next();
|
||||||
|
MS_LOG(WARNING) << v.toString() << "\n";
|
||||||
|
i++;
|
||||||
|
if (i > 5) break;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) {
|
||||||
|
MS_LOG(WARNING) << "Start createPipeLine";
|
||||||
|
std::string folder_path(MDConf.pFolderPath);
|
||||||
|
std::string schema_file(MDConf.pSchemFile);
|
||||||
|
std::vector<std::string> column_names = MDToDBuffToVector(MDConf.columnsToReadBuff);
|
||||||
|
if (std::find(column_names.begin(), column_names.end(), "id") == column_names.end()) {
|
||||||
|
MS_LOG(WARNING) << "Column id not foud adding it ";
|
||||||
|
column_names.push_back("id");
|
||||||
|
}
|
||||||
|
std::vector<std::shared_ptr<TensorOperation>> mapOperations;
|
||||||
|
if (std::find(column_names.begin(), column_names.end(), "image") != column_names.end()) {
|
||||||
|
MS_LOG(WARNING) << "Found column image create map with:";
|
||||||
|
MS_LOG(WARNING) << "resize: { " << MDConf.ResizeSizeWH[0] << ", " << MDConf.ResizeSizeWH[1] << " }";
|
||||||
|
MS_LOG(WARNING) << "crop: { " << MDConf.CropSizeWH[0] << ", " << MDConf.CropSizeWH[1] << " }";
|
||||||
|
MS_LOG(WARNING) << "MEAN: { " << MDConf.MEAN[0] << ", " << MDConf.MEAN[1] << ", " << MDConf.MEAN[2] << " }";
|
||||||
|
MS_LOG(WARNING) << "STD: { " << MDConf.STD[0] << ", " << MDConf.STD[1] << ", " << MDConf.STD[2] << " }";
|
||||||
|
|
||||||
|
if ((MDConf.ResizeSizeWH[0] != 0) && (MDConf.ResizeSizeWH[1] != 0)) {
|
||||||
|
std::vector<int> Resize(MDConf.ResizeSizeWH, MDConf.ResizeSizeWH + 2);
|
||||||
|
std::shared_ptr<TensorOperation> resize_op = vision::Resize(Resize);
|
||||||
|
assert(resize_op != nullptr);
|
||||||
|
MS_LOG(WARNING) << "Push back resize";
|
||||||
|
mapOperations.push_back(resize_op);
|
||||||
|
}
|
||||||
|
if ((MDConf.CropSizeWH[0] != 0) && (MDConf.CropSizeWH[1] != 0)) {
|
||||||
|
std::vector<int> Crop(MDConf.CropSizeWH, MDConf.CropSizeWH + 2);
|
||||||
|
std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop(Crop);
|
||||||
|
assert(center_crop_op != nullptr);
|
||||||
|
MS_LOG(WARNING) << "Push back crop";
|
||||||
|
mapOperations.push_back(center_crop_op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::shared_ptr<Dataset> ds = nullptr;
|
||||||
|
MS_LOG(INFO) << "Read id =" << MDConf.fileid << " (-1) for all";
|
||||||
|
if (MDConf.fileid > -1) {
|
||||||
|
// read specific image using SequentialSampler
|
||||||
|
ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(MDConf.fileid, 1L));
|
||||||
|
} else {
|
||||||
|
// Distributed sampler takes num_shards then shard_id
|
||||||
|
ds = Album(folder_path, schema_file, column_names, true, SequentialSampler());
|
||||||
|
}
|
||||||
|
ds = ds->SetNumWorkers(1);
|
||||||
|
|
||||||
|
assert(ds != nullptr);
|
||||||
|
|
||||||
|
// Create a Repeat operation on ds
|
||||||
|
int32_t repeat_num = 1;
|
||||||
|
ds = ds->Repeat(repeat_num);
|
||||||
|
assert(ds != nullptr);
|
||||||
|
|
||||||
|
// Create objects for the tensor ops
|
||||||
|
MS_LOG(INFO) << " Create pipline parameters";
|
||||||
|
MS_LOG(INFO) << "floder path: " << folder_path << " , schema json: " << schema_file;
|
||||||
|
MS_LOG(INFO) << "Reading columns:";
|
||||||
|
for (auto str : column_names) {
|
||||||
|
MS_LOG(INFO) << str << " ";
|
||||||
|
}
|
||||||
|
bool hasBatch = false;
|
||||||
|
|
||||||
|
// Create an iterator over the result of the above dataset
|
||||||
|
// This will trigger the creation of the Execution Tree and launch it.
|
||||||
|
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||||
|
if (nullptr == iter) {
|
||||||
|
MS_LOG(ERROR) << "Iterator creation failed";
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
assert(iter != nullptr);
|
||||||
|
MDToDApi *pMDToDApi = new MDToDApi;
|
||||||
|
pMDToDApi->_ds = ds;
|
||||||
|
pMDToDApi->_iter = iter;
|
||||||
|
pMDToDApi->_augs = mapOperations;
|
||||||
|
pMDToDApi->_storage_folder = std::string(MDConf.pStoragePath);
|
||||||
|
pMDToDApi->_folder_path = folder_path;
|
||||||
|
pMDToDApi->_hasBatch = hasBatch;
|
||||||
|
return pMDToDApi;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void MDBuffToVector(MDToDBuff_t MDBuff, std::vector<T> *vec) {
|
||||||
|
vec.clear();
|
||||||
|
if (MDBuff.DataSize > 0) {
|
||||||
|
int nofElements = MDBuff.DataSize / sizeof(T);
|
||||||
|
*vec.assign(reinterpret_cast<T *>(MDBuff.Buff), reinterpret_cast<T *>(MDBuff.Buff) + nofElements);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void GetValue(std::unordered_map<std::string, std::shared_ptr<Tensor>> row, std::string columnName, T *o) {
|
||||||
|
auto column = row[columnName];
|
||||||
|
if (NULL != column) {
|
||||||
|
MS_LOG(INFO) << "Tensor " << columnName << " shape: " << column->shape() << " type: " << column->type()
|
||||||
|
<< " bytes: " << column->SizeInBytes();
|
||||||
|
column->GetItemAt<T>(o, {});
|
||||||
|
MS_LOG(INFO) << columnName << ": " << +*o;
|
||||||
|
} else {
|
||||||
|
MS_LOG(INFO) << "Tensor " << columnName << " Not found"
|
||||||
|
<< ".";
|
||||||
|
*o = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> row, std::string columnName,
|
||||||
|
bool hasBatch, MDToDBuff_t *resBuff) {
|
||||||
|
auto column = row[columnName];
|
||||||
|
resBuff->TensorSize[0] = resBuff->TensorSize[1] = resBuff->TensorSize[2] = resBuff->TensorSize[3] =
|
||||||
|
0; // Mark all dims do not exist in tensor
|
||||||
|
int firstDim = (hasBatch) ? 1 : 0;
|
||||||
|
if (NULL != column) {
|
||||||
|
MS_LOG(INFO) << "Tensor " << columnName << " shape: " << column->shape() << " type: " << column->type()
|
||||||
|
<< " bytes: " << column->SizeInBytes() << "nof elements: " << column->shape()[firstDim];
|
||||||
|
auto tesoreShape = column->shape().AsVector();
|
||||||
|
for (int ix = 0; ix < tesoreShape.size(); ix++) {
|
||||||
|
MS_LOG(INFO) << "Tensor " << columnName << " shape[" << ix << "] = " << tesoreShape[ix];
|
||||||
|
resBuff->TensorSize[ix] = tesoreShape[ix];
|
||||||
|
}
|
||||||
|
if (!hasBatch) {
|
||||||
|
for (int ix = 3; ix > 0; ix--) {
|
||||||
|
resBuff->TensorSize[ix] = resBuff->TensorSize[ix - 1];
|
||||||
|
}
|
||||||
|
resBuff->TensorSize[0] = 1;
|
||||||
|
}
|
||||||
|
if (column->shape()[firstDim] > 0) {
|
||||||
|
if (DataType::DE_STRING == column->type()) {
|
||||||
|
std::string str;
|
||||||
|
for (int ix = 0; ix < column->shape()[firstDim]; ix++) {
|
||||||
|
std::string_view strView;
|
||||||
|
if (hasBatch) {
|
||||||
|
column->GetItemAt(&strView, {0, ix});
|
||||||
|
} else {
|
||||||
|
column->GetItemAt(&strView, {ix});
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "string " << columnName << "[" << ix << "]:" << strView << " (size: " << strView.size()
|
||||||
|
<< ")";
|
||||||
|
str.append(strView);
|
||||||
|
str.push_back('\0');
|
||||||
|
}
|
||||||
|
resBuff->DataSize = str.size();
|
||||||
|
errno_t ret = memcpy_s(resBuff->Buff, resBuff->MaxBuffSize, str.data(), resBuff->DataSize);
|
||||||
|
if (ret != 0) {
|
||||||
|
resBuff->DataSize = 0; // memcpy fail amount of data copied is 0
|
||||||
|
MS_LOG(ERROR) << "memcpy_s return: " << ret;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
DataHelper dh;
|
||||||
|
resBuff->DataSize =
|
||||||
|
dh.DumpData(column->GetBuffer(), column->SizeInBytes(), resBuff->Buff, resBuff->MaxBuffSize);
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << columnName << " " << resBuff->DataSize
|
||||||
|
<< " bytesCopyed to buff (MaxBuffSize: " << resBuff->MaxBuffSize << ") ";
|
||||||
|
if (0 == resBuff->DataSize) {
|
||||||
|
MS_LOG(ERROR) << "Copy Failed!!!! " << columnName << " Too large"
|
||||||
|
<< "."; // memcpy failed
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
MS_LOG(INFO) << "Tensor " << columnName << " is empty (has size 0)";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
MS_LOG(INFO) << "Tensor " << columnName << " was not read.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
|
||||||
|
MS_LOG(INFO) << "Start GetNext";
|
||||||
|
if (pMDToDApi == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "GetNext called with nullptr. Abort";
|
||||||
|
assert(pMDToDApi != nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set defualt
|
||||||
|
results->fileid = -1;
|
||||||
|
results->embeddingBuff.DataSize = 0;
|
||||||
|
results->imageBuff.DataSize = 0;
|
||||||
|
MS_LOG(INFO) << "Start GetNext [1]" << pMDToDApi;
|
||||||
|
// get next row for dataset
|
||||||
|
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||||
|
if (pMDToDApi->_iter == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "GetNext called with no iterator. abort";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// create Execute functions, this replaces Map in Pipeline
|
||||||
|
pMDToDApi->_iter->GetNextRow(&row);
|
||||||
|
if (row.size() != 0) {
|
||||||
|
if ((pMDToDApi->_augs).size() > 0) {
|
||||||
|
// String and Tensors
|
||||||
|
GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff);
|
||||||
|
// for each operation, run eager mode, single threaded operation, will have to memcpy
|
||||||
|
// regardless
|
||||||
|
for (int i = 0; i < (pMDToDApi->_augs).size(); i++) {
|
||||||
|
// each Execute call will invoke a memcpy, this cannot really be optimized further
|
||||||
|
// for this use case, std move is added for fail save.
|
||||||
|
row["image"] = Execute((pMDToDApi->_augs)[i])(std::move(row["image"]));
|
||||||
|
if (row["image"] == nullptr) {
|
||||||
|
// nullptr means that the eager mode image processing failed, we fail in this case
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// FILE ID
|
||||||
|
GetValue<int64_t>(row, "id", &results->fileid);
|
||||||
|
pMDToDApi->_file_id = results->fileid; // hold current file id to enable embeddings update (no itr->getCurrent)
|
||||||
|
// IS FOR TRAIN
|
||||||
|
GetValue<int32_t>(row, "_isForTrain", &results->isForTrain);
|
||||||
|
GetValue<int32_t>(row, "_noOfFaces", &results->noOfFaces);
|
||||||
|
// String and Tensors
|
||||||
|
GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff);
|
||||||
|
GetTensorToBuff(row, "image", pMDToDApi->_hasBatch, &results->imageBuff);
|
||||||
|
GetTensorToBuff(row, "_embedding", pMDToDApi->_hasBatch, &results->embeddingBuff);
|
||||||
|
GetTensorToBuff(row, "label", pMDToDApi->_hasBatch, &results->labelBuff);
|
||||||
|
GetTensorToBuff(row, "_boundingBoxes", pMDToDApi->_hasBatch, &results->boundingBoxesBuff);
|
||||||
|
GetTensorToBuff(row, "_confidences", pMDToDApi->_hasBatch, &results->confidencesBuff);
|
||||||
|
GetTensorToBuff(row, "_landmarks", pMDToDApi->_hasBatch, &results->landmarksBuff);
|
||||||
|
GetTensorToBuff(row, "_faceFileNames", pMDToDApi->_hasBatch, &results->faceFileNamesBuff);
|
||||||
|
GetTensorToBuff(row, "_imageQualities", pMDToDApi->_hasBatch, &results->imageQualitiesBuff);
|
||||||
|
GetTensorToBuff(row, "_faceEmbeddings", pMDToDApi->_hasBatch, &results->faceEmbeddingsBuff);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int MDToDApi_Stop(MDToDApi *pMDToDApi) {
|
||||||
|
// Manually terminate the pipeline
|
||||||
|
pMDToDApi->_iter->Stop();
|
||||||
|
MS_LOG(WARNING) << "pipline stoped";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int MDToDApi_Destroy(MDToDApi *pMDToDApi) {
|
||||||
|
MS_LOG(WARNING) << "pipeline deleted start";
|
||||||
|
pMDToDApi->_iter->Stop();
|
||||||
|
delete pMDToDApi;
|
||||||
|
MS_LOG(WARNING) << "pipeline deleted end";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) {
|
||||||
|
int64_t file_id = pMDToDApi->_file_id;
|
||||||
|
if (file_id < 0) {
|
||||||
|
MS_LOG(ERROR) << "Illigal file ID to update: " << file_id << ".";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
std::string converted = std::to_string(pMDToDApi->_file_id);
|
||||||
|
*filePath = pMDToDApi->_folder_path + "/" + converted + ".json";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int MDToDApi_UpdateEmbeding(MDToDApi *pMDToDApi, const char *column, float *emmbeddings,
|
||||||
|
size_t emmbeddingsSize) {
|
||||||
|
auto columnName = std::string(column);
|
||||||
|
MS_LOG(INFO) << "Start update " << columnName;
|
||||||
|
|
||||||
|
std::string converted = std::to_string(pMDToDApi->_file_id);
|
||||||
|
std::string embedding_file_path = pMDToDApi->_storage_folder + "/" + converted + columnName + ".bin";
|
||||||
|
DataHelper dh;
|
||||||
|
MS_LOG(INFO) << "Try to save file " << embedding_file_path;
|
||||||
|
std::vector<float> bin_content(emmbeddings, emmbeddings + emmbeddingsSize);
|
||||||
|
Status rc = dh.template WriteBinFile<float>(embedding_file_path, bin_content);
|
||||||
|
if (rc.IsError()) {
|
||||||
|
MS_LOG(ERROR) << "Fail to write embedding file: " << embedding_file_path << ".";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "Saved file " << embedding_file_path;
|
||||||
|
|
||||||
|
std::string file_path;
|
||||||
|
if (0 != GetJsonFullFileName(pMDToDApi, &file_path)) {
|
||||||
|
MS_LOG(ERROR) << "Failed to update " << columnName;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
MS_LOG(INFO) << "Updating json file: " << file_path;
|
||||||
|
rc = dh.UpdateValue(file_path, std::string(column), embedding_file_path);
|
||||||
|
if (rc.IsError()) {
|
||||||
|
MS_LOG(ERROR) << "Fail to update json: " << file_path << ".";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int MDToDApi_UpdateStringArray(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDbuff) {
|
||||||
|
auto columnName = std::string(column);
|
||||||
|
std::string file_path;
|
||||||
|
if (0 != GetJsonFullFileName(pMDToDApi, &file_path)) {
|
||||||
|
MS_LOG(ERROR) << "Failed to update " << columnName;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "Start Update string array column: " << columnName << " in file " << file_path;
|
||||||
|
DataHelper dh;
|
||||||
|
std::vector<std::string> strVec;
|
||||||
|
if (MDbuff.DataSize > 0) {
|
||||||
|
const char *p = reinterpret_cast<char *>(MDbuff.Buff);
|
||||||
|
do {
|
||||||
|
strVec.push_back(std::string(p));
|
||||||
|
p += strVec.back().size() + 1;
|
||||||
|
} while (p < reinterpret_cast<char *>(MDbuff.Buff) + MDbuff.DataSize);
|
||||||
|
}
|
||||||
|
Status rc = dh.UpdateArray(file_path, columnName, strVec);
|
||||||
|
if (rc.IsError()) {
|
||||||
|
MS_LOG(ERROR) << "Fail to update json: " << file_path << ".";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int MDToDApi_UpdateFloatArray(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDBuff) {
|
||||||
|
auto columnName = std::string(column);
|
||||||
|
std::string file_path;
|
||||||
|
if (0 != GetJsonFullFileName(pMDToDApi, &file_path)) {
|
||||||
|
MS_LOG(ERROR) << "Faile to updaet " << columnName;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "Start Update float Array column: " << columnName << " in file " << file_path;
|
||||||
|
DataHelper dh;
|
||||||
|
std::vector<float> vec;
|
||||||
|
MDBuffToVector<float>(MDBuff, &vec);
|
||||||
|
Status rc = dh.UpdateArray<float>(file_path, columnName, vec);
|
||||||
|
if (rc.IsError()) {
|
||||||
|
MS_LOG(ERROR) << "Fail to update json: " << file_path << ".";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int MDToDApi_UpdateIsForTrain(MDToDApi *pMDToDApi, int32_t isForTrain) {
|
||||||
|
int64_t file_id = pMDToDApi->_file_id;
|
||||||
|
MS_LOG(INFO) << "Start Update isForTRain for id: " << file_id << " To " << isForTrain;
|
||||||
|
|
||||||
|
if (file_id < 0) return -1;
|
||||||
|
std::string converted = std::to_string(pMDToDApi->_file_id);
|
||||||
|
std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json";
|
||||||
|
DataHelper dh;
|
||||||
|
MS_LOG(INFO) << "Updating file: " << file_path;
|
||||||
|
Status rc = dh.UpdateValue<int32_t>(file_path, "_isForTrain", isForTrain, "");
|
||||||
|
if (rc.IsError()) {
|
||||||
|
MS_LOG(ERROR) << "Fail to update json: " << file_path << ".";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" int MDToDApi_UpdateNoOfFaces(MDToDApi *pMDToDApi, int32_t noOfFaces) {
|
||||||
|
int64_t file_id = pMDToDApi->_file_id;
|
||||||
|
MS_LOG(INFO) << "Start Update noOfFaces for id: " << file_id << " To " << noOfFaces;
|
||||||
|
|
||||||
|
if (file_id < 0) return -1;
|
||||||
|
std::string converted = std::to_string(pMDToDApi->_file_id);
|
||||||
|
std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json";
|
||||||
|
DataHelper dh;
|
||||||
|
MS_LOG(INFO) << "Updating file: " << file_path;
|
||||||
|
Status rc = dh.UpdateValue<int32_t>(file_path, "_noOfFaces", noOfFaces, "");
|
||||||
|
if (rc.IsError()) {
|
||||||
|
MS_LOG(ERROR) << "Fail to update json: " << file_path << ".";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef DATASET_MDTODAPI_H_
|
||||||
|
#define DATASET_MDTODAPI_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
class MDToDApi;
|
||||||
|
|
||||||
|
typedef struct MDToDBuff {
|
||||||
|
void *Buff;
|
||||||
|
size_t DataSize;
|
||||||
|
size_t TensorSize[4];
|
||||||
|
size_t MaxBuffSize;
|
||||||
|
} MDToDBuff_t;
|
||||||
|
|
||||||
|
typedef struct MDToDConf {
|
||||||
|
const char *pFolderPath;
|
||||||
|
const char *pSchemFile;
|
||||||
|
const char *pStoragePath;
|
||||||
|
MDToDBuff_t columnsToReadBuff;
|
||||||
|
float MEAN[3];
|
||||||
|
float STD[3];
|
||||||
|
int ResizeSizeWH[2];
|
||||||
|
int CropSizeWH[2];
|
||||||
|
int64_t fileid; // -1 All files, otherwise get a single specifc file
|
||||||
|
} MDToDConf_t;
|
||||||
|
|
||||||
|
typedef struct MDToDResult {
|
||||||
|
int64_t fileid;
|
||||||
|
int32_t isForTrain;
|
||||||
|
int32_t noOfFaces;
|
||||||
|
MDToDBuff_t fileNameBuff;
|
||||||
|
MDToDBuff_t labelBuff;
|
||||||
|
MDToDBuff_t imageBuff;
|
||||||
|
MDToDBuff_t embeddingBuff;
|
||||||
|
MDToDBuff_t boundingBoxesBuff;
|
||||||
|
MDToDBuff_t confidencesBuff;
|
||||||
|
MDToDBuff_t landmarksBuff;
|
||||||
|
MDToDBuff_t faceFileNamesBuff;
|
||||||
|
MDToDBuff_t imageQualitiesBuff;
|
||||||
|
MDToDBuff_t faceEmbeddingsBuff;
|
||||||
|
} MDToDResult_t;
|
||||||
|
|
||||||
|
typedef int (*MDToDApi_pathTest_t)(const char *path);
|
||||||
|
typedef int (*MDToDApi_testAlbum_t)();
|
||||||
|
typedef MDToDApi *(*MDToDApi_createPipeLine_t)(MDToDConf_t MDConf);
|
||||||
|
typedef int (*MDToDApi_GetNext_t)(MDToDApi *pMDToDApi, MDToDResult_t *results);
|
||||||
|
typedef int (*MDToDApi_UpdateEmbeding_t)(MDToDApi *pMDToDApi, const char *column, float *emmbeddings,
|
||||||
|
size_t emmbeddingsSize);
|
||||||
|
typedef int (*MDToDApi_UpdateStringArray_t)(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDbuff);
|
||||||
|
typedef int (*MDToDApi_UpdateFloatArray_t)(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDbuff);
|
||||||
|
typedef int (*MDToDApi_UpdateIsForTrain_t)(MDToDApi *pMDToDApi, uint8_t isForTrain);
|
||||||
|
typedef int (*MDToDApi_UpdateNoOfFaces_t)(MDToDApi *pMDToDApi, int32_t noOfFaces);
|
||||||
|
typedef int (*MDToDApi_Stop_t)(MDToDApi *pMDToDApi);
|
||||||
|
typedef int (*MDToDApi_Destroy_t)(MDToDApi *pMDToDApi);
|
||||||
|
|
||||||
|
#endif
|
|
@ -27,10 +27,10 @@
|
||||||
|
|
||||||
using MSTensor = mindspore::tensor::MSTensor;
|
using MSTensor = mindspore::tensor::MSTensor;
|
||||||
using DETensor = mindspore::tensor::DETensor;
|
using DETensor = mindspore::tensor::DETensor;
|
||||||
using mindspore::dataset::api::vision::Decode;
|
using mindspore::dataset::vision::Decode;
|
||||||
using mindspore::dataset::api::vision::Normalize;
|
using mindspore::dataset::vision::Normalize;
|
||||||
using mindspore::dataset::api::vision::Resize;
|
using mindspore::dataset::vision::Resize;
|
||||||
using Execute = mindspore::dataset::api::Execute;
|
using Execute = mindspore::dataset::Execute;
|
||||||
using Path = mindspore::dataset::Path;
|
using Path = mindspore::dataset::Path;
|
||||||
|
|
||||||
class MindDataTestEager : public mindspore::CommonTest {
|
class MindDataTestEager : public mindspore::CommonTest {
|
||||||
|
|
Loading…
Reference in New Issue