From 77d2b3c4fc70917f2a55fa86c2d731519b10d7cf Mon Sep 17 00:00:00 2001 From: Eric Date: Wed, 11 Nov 2020 22:35:35 -0500 Subject: [PATCH] Temp fix to get rid of decoding png Removed Api namespace in testcase, not linked for package size removed extra tensor ops Fix Center crop and pad Added file error check, return emtpy if wrong Added fix to Album Add missing syntax Added wrapper Fix clang Added cpp lint part 2 clang 3 clang 4 Fixed typos 1` Roll back size optimization Added clang fix 5 Lint fix 6 Added Cpp lint fix 7 --- .../engine/datasetops/source/album_op.cc | 54 ++- .../engine/datasetops/source/album_op.h | 4 +- .../dataset/kernels/image/lite_image_utils.cc | 7 +- mindspore/lite/minddata/CMakeLists.txt | 8 +- mindspore/lite/minddata/wrapper/MDToDApi.cc | 451 ++++++++++++++++++ mindspore/lite/minddata/wrapper/MDToDApi.h | 70 +++ .../{example => wrapper}/jni-example.cc | 0 .../testCifar10Data/data_batch_1.bin | Bin .../{example => wrapper}/x86-example.cc | 0 .../lite/test/ut/src/dataset/eager_test.cc | 8 +- 10 files changed, 569 insertions(+), 33 deletions(-) create mode 100644 mindspore/lite/minddata/wrapper/MDToDApi.cc create mode 100644 mindspore/lite/minddata/wrapper/MDToDApi.h rename mindspore/lite/minddata/{example => wrapper}/jni-example.cc (100%) rename mindspore/lite/minddata/{example => wrapper}/testCifar10Data/data_batch_1.bin (100%) rename mindspore/lite/minddata/{example => wrapper}/x86-example.cc (100%) diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc index 65c2cdde30b..65be9f1be25 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc @@ -225,20 +225,21 @@ Status AlbumOp::WorkerEntry(int32_t worker_id) { // Only support JPEG/PNG/GIF/BMP // Optimization: Could take in a tensor -Status AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { +// This function does not return status because we want to just skip bad input, not crash +bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { std::ifstream file_handle; constexpr int read_num = 3; *valid = false; file_handle.open(file_name, std::ios::binary | std::ios::in); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, can not open image file: " + file_name); + return false; } unsigned char file_type[read_num]; (void)file_handle.read(reinterpret_cast(file_type), read_num); if (file_handle.fail()) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name); + return false; } file_handle.close(); if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) { @@ -246,17 +247,8 @@ Status AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { // JPEG with EXIF stats with \xff\xd8\xff\xe1 // Use \xff\xd8\xff to cover both. *valid = true; - } else if (file_type[0] == 0x89 && file_type[1] == 0x50 && file_type[2] == 0x4e) { - // It's a PNG - *valid = true; - } else if (file_type[0] == 0x47 && file_type[1] == 0x49 && file_type[2] == 0x46) { - // It's a GIF - *valid = true; - } else if (file_type[0] == 0x42 && file_type[1] == 0x4d) { - // It's a BMP - *valid = true; } - return Status::OK(); + return true; } Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorRow *row) { @@ -264,22 +256,44 @@ Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col std::ifstream fs; fs.open(image_file_path, std::ios::binary | std::ios::in); if (fs.fail()) { - MS_LOG(INFO) << "Image file not found:" << image_file_path << "."; + MS_LOG(WARNING) << "File not found:" << image_file_path << "."; // If file doesn't exist, we don't flag this as error in input check, simply push back empty tensor - RETURN_STATUS_UNEXPECTED("Invalid file_path, failed to read file: " + image_file_path); + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row)); + return Status::OK(); + } + // Hack logic to replace png images with empty tensor + Path file(image_file_path); + std::set png_ext = {".png", ".PNG"}; + if (png_ext.find(file.Extension()) != png_ext.end()) { + // load empty tensor since image is not jpg + MS_LOG(INFO) << "PNG!" << image_file_path << "."; + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row)); + return Status::OK(); + } + // treat bin files separately + std::set bin_ext = {".bin", ".BIN"}; + if (bin_ext.find(file.Extension()) != bin_ext.end()) { + // load empty tensor since image is not jpg + MS_LOG(INFO) << "Bin file found" << image_file_path << "."; + RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image)); + row->push_back(std::move(image)); + return Status::OK(); } - - MS_LOG(INFO) << "Image file found: " << image_file_path << "."; // check that the file is an image before decoding bool valid = false; - RETURN_IF_NOT_OK(CheckImageType(image_file_path, &valid)); + bool check_success = CheckImageType(image_file_path, &valid); + if (!check_success || !valid) { + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row)); + return Status::OK(); + } + // if it is a jpeg image, load and try to decode RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image)); if (decode_ && valid) { Status rc = Decode(image, &image); if (rc.IsError()) { - std::string err = "Invalid data, failed to decode image: " + image_file_path; - RETURN_STATUS_UNEXPECTED(err); + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row)); + return Status::OK(); } } row->push_back(std::move(image)); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h index f3589099ed7..6d0c9604094 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h @@ -189,8 +189,8 @@ class AlbumOp : public ParallelOp, public RandomAccessOp { /// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP /// This function could be optimized to return the tensor to reduce open/closing files - /// \return Status - The error code returned - Status CheckImageType(const std::string &file_name, bool *valid); + /// \return bool - if file is bad then return false + bool CheckImageType(const std::string &file_name, bool *valid); // Base-class override for NodePass visitor acceptor. // @param p - Pointer to the NodePass to be accepted. diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc index b6d50a955ff..f847e402306 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc @@ -429,10 +429,11 @@ Status Pad(const std::shared_ptr &input, std::shared_ptr *output bool ret = Pad(lite_mat_rgb, lite_mat_pad, pad_top, pad_bottom, pad_left, pad_right, PaddBorderType::PADD_BORDER_CONSTANT, fill_r, fill_g, fill_b); CHECK_FAIL_RETURN_UNEXPECTED(ret, "Pad failed in lite cv"); - + // new shape for output tensor + TensorShape new_shape = TensorShape({lite_mat_pad.height_, lite_mat_pad.width_, input->shape()[2]}); std::shared_ptr output_tensor; - RETURN_IF_NOT_OK(Tensor::CreateFromMemory(input->shape(), input->type(), - static_cast(lite_mat_pad.data_ptr_), &output_tensor)); + RETURN_IF_NOT_OK( + Tensor::CreateFromMemory(new_shape, input->type(), static_cast(lite_mat_pad.data_ptr_), &output_tensor)); *output = output_tensor; } catch (std::runtime_error &e) { RETURN_STATUS_UNEXPECTED("Error in image Pad."); diff --git a/mindspore/lite/minddata/CMakeLists.txt b/mindspore/lite/minddata/CMakeLists.txt index 7e312147b49..77d3bded7e7 100644 --- a/mindspore/lite/minddata/CMakeLists.txt +++ b/mindspore/lite/minddata/CMakeLists.txt @@ -175,7 +175,7 @@ if (BUILD_MINDDATA STREQUAL "full") "${MINDDATA_DIR}/kernels/image/cut_out_op.cc" "${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc" "${MINDDATA_DIR}/kernels/image/equalize_op.cc" - "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" + "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" "${MINDDATA_DIR}/kernels/image/image_utils.cc" "${MINDDATA_DIR}/kernels/image/invert_op.cc" "${MINDDATA_DIR}/kernels/image/math_utils.cc" @@ -237,9 +237,9 @@ if (BUILD_MINDDATA STREQUAL "full") ) include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache") -# if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64)) -# set(MINDDATA_EXAMPLE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/example/jni-example.cc) -# endif () + if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64)) + set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc) + endif () add_library(minddata-lite SHARED ${MINDDATA_API_SRC_FILES} diff --git a/mindspore/lite/minddata/wrapper/MDToDApi.cc b/mindspore/lite/minddata/wrapper/MDToDApi.cc new file mode 100644 index 00000000000..b914568d7e5 --- /dev/null +++ b/mindspore/lite/minddata/wrapper/MDToDApi.cc @@ -0,0 +1,451 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "MDToDApi.h" + +#include +#include +#include +#include +#include +#include +#include +#include "minddata/dataset/include/datasets.h" +#include "minddata/dataset/include/execute.h" +#include "minddata/dataset/util/path.h" +#include "minddata/dataset/include/vision.h" +#include "minddata/dataset/util/data_helper.h" +#if defined(__ANDROID__) || defined(ANDROID) +#include +#include +#endif + +using mindspore::dataset::Path; +using mindspore::dataset::Tensor; + +using mindspore::dataset; + +using mindspore::LogStream; +using mindspore::MsLogLevel::DEBUG; +using mindspore::MsLogLevel::ERROR; +using mindspore::MsLogLevel::INFO; + +using mindspore::dataset::BorderType; +using mindspore::dataset::InterpolationMode; +using mindspore::dataset::Status; + +class MDToDApi { + public: + std::shared_ptr _ds; + std::shared_ptr _iter; + std::vector> _augs; + std::string _storage_folder; + std::string _folder_path; + bool _hasBatch; + int64_t _file_id; + + MDToDApi() : _ds(nullptr), _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) { + MS_LOG(WARNING) << "MDToDAPI Call constructor"; + } + ~MDToDApi() { + MS_LOG(WARNING) << "MDToDAPI Call destructor"; + _augs.clear(); + _ds = nullptr; + _iter = nullptr; + } +}; + +std::vector MDToDBuffToVector(MDToDBuff_t StrBuff) { + std::vector strVector; + if (StrBuff.DataSize > 0) { + const char *p = reinterpret_cast(StrBuff.Buff); + do { + strVector.push_back(std::string(p)); + p += strVector.back().size() + 1; + } while (p < reinterpret_cast(StrBuff.Buff) + StrBuff.DataSize); + } + return strVector; +} + +extern "C" int MDToDApi_pathTest(const char *path) { + Path f(path); + MS_LOG(WARNING) << f.Exists() << f.IsDirectory() << f.ParentPath(); + // Print out the first few items in the directory + auto dir_it = Path::DirIterator::OpenDirectory(&f); + MS_LOG(WARNING) << dir_it.get(); + int i = 0; + while (dir_it->hasNext()) { + Path v = dir_it->next(); + MS_LOG(WARNING) << v.toString() << "\n"; + i++; + if (i > 5) break; + } + return 0; +} + +extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) { + MS_LOG(WARNING) << "Start createPipeLine"; + std::string folder_path(MDConf.pFolderPath); + std::string schema_file(MDConf.pSchemFile); + std::vector column_names = MDToDBuffToVector(MDConf.columnsToReadBuff); + if (std::find(column_names.begin(), column_names.end(), "id") == column_names.end()) { + MS_LOG(WARNING) << "Column id not foud adding it "; + column_names.push_back("id"); + } + std::vector> mapOperations; + if (std::find(column_names.begin(), column_names.end(), "image") != column_names.end()) { + MS_LOG(WARNING) << "Found column image create map with:"; + MS_LOG(WARNING) << "resize: { " << MDConf.ResizeSizeWH[0] << ", " << MDConf.ResizeSizeWH[1] << " }"; + MS_LOG(WARNING) << "crop: { " << MDConf.CropSizeWH[0] << ", " << MDConf.CropSizeWH[1] << " }"; + MS_LOG(WARNING) << "MEAN: { " << MDConf.MEAN[0] << ", " << MDConf.MEAN[1] << ", " << MDConf.MEAN[2] << " }"; + MS_LOG(WARNING) << "STD: { " << MDConf.STD[0] << ", " << MDConf.STD[1] << ", " << MDConf.STD[2] << " }"; + + if ((MDConf.ResizeSizeWH[0] != 0) && (MDConf.ResizeSizeWH[1] != 0)) { + std::vector Resize(MDConf.ResizeSizeWH, MDConf.ResizeSizeWH + 2); + std::shared_ptr resize_op = vision::Resize(Resize); + assert(resize_op != nullptr); + MS_LOG(WARNING) << "Push back resize"; + mapOperations.push_back(resize_op); + } + if ((MDConf.CropSizeWH[0] != 0) && (MDConf.CropSizeWH[1] != 0)) { + std::vector Crop(MDConf.CropSizeWH, MDConf.CropSizeWH + 2); + std::shared_ptr center_crop_op = vision::CenterCrop(Crop); + assert(center_crop_op != nullptr); + MS_LOG(WARNING) << "Push back crop"; + mapOperations.push_back(center_crop_op); + } + } + std::shared_ptr ds = nullptr; + MS_LOG(INFO) << "Read id =" << MDConf.fileid << " (-1) for all"; + if (MDConf.fileid > -1) { + // read specific image using SequentialSampler + ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(MDConf.fileid, 1L)); + } else { + // Distributed sampler takes num_shards then shard_id + ds = Album(folder_path, schema_file, column_names, true, SequentialSampler()); + } + ds = ds->SetNumWorkers(1); + + assert(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 1; + ds = ds->Repeat(repeat_num); + assert(ds != nullptr); + + // Create objects for the tensor ops + MS_LOG(INFO) << " Create pipline parameters"; + MS_LOG(INFO) << "floder path: " << folder_path << " , schema json: " << schema_file; + MS_LOG(INFO) << "Reading columns:"; + for (auto str : column_names) { + MS_LOG(INFO) << str << " "; + } + bool hasBatch = false; + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + if (nullptr == iter) { + MS_LOG(ERROR) << "Iterator creation failed"; + return nullptr; + } + assert(iter != nullptr); + MDToDApi *pMDToDApi = new MDToDApi; + pMDToDApi->_ds = ds; + pMDToDApi->_iter = iter; + pMDToDApi->_augs = mapOperations; + pMDToDApi->_storage_folder = std::string(MDConf.pStoragePath); + pMDToDApi->_folder_path = folder_path; + pMDToDApi->_hasBatch = hasBatch; + return pMDToDApi; +} + +template +void MDBuffToVector(MDToDBuff_t MDBuff, std::vector *vec) { + vec.clear(); + if (MDBuff.DataSize > 0) { + int nofElements = MDBuff.DataSize / sizeof(T); + *vec.assign(reinterpret_cast(MDBuff.Buff), reinterpret_cast(MDBuff.Buff) + nofElements); + } +} + +template +void GetValue(std::unordered_map> row, std::string columnName, T *o) { + auto column = row[columnName]; + if (NULL != column) { + MS_LOG(INFO) << "Tensor " << columnName << " shape: " << column->shape() << " type: " << column->type() + << " bytes: " << column->SizeInBytes(); + column->GetItemAt(o, {}); + MS_LOG(INFO) << columnName << ": " << +*o; + } else { + MS_LOG(INFO) << "Tensor " << columnName << " Not found" + << "."; + *o = 0; + } +} + +void GetTensorToBuff(std::unordered_map> row, std::string columnName, + bool hasBatch, MDToDBuff_t *resBuff) { + auto column = row[columnName]; + resBuff->TensorSize[0] = resBuff->TensorSize[1] = resBuff->TensorSize[2] = resBuff->TensorSize[3] = + 0; // Mark all dims do not exist in tensor + int firstDim = (hasBatch) ? 1 : 0; + if (NULL != column) { + MS_LOG(INFO) << "Tensor " << columnName << " shape: " << column->shape() << " type: " << column->type() + << " bytes: " << column->SizeInBytes() << "nof elements: " << column->shape()[firstDim]; + auto tesoreShape = column->shape().AsVector(); + for (int ix = 0; ix < tesoreShape.size(); ix++) { + MS_LOG(INFO) << "Tensor " << columnName << " shape[" << ix << "] = " << tesoreShape[ix]; + resBuff->TensorSize[ix] = tesoreShape[ix]; + } + if (!hasBatch) { + for (int ix = 3; ix > 0; ix--) { + resBuff->TensorSize[ix] = resBuff->TensorSize[ix - 1]; + } + resBuff->TensorSize[0] = 1; + } + if (column->shape()[firstDim] > 0) { + if (DataType::DE_STRING == column->type()) { + std::string str; + for (int ix = 0; ix < column->shape()[firstDim]; ix++) { + std::string_view strView; + if (hasBatch) { + column->GetItemAt(&strView, {0, ix}); + } else { + column->GetItemAt(&strView, {ix}); + } + MS_LOG(INFO) << "string " << columnName << "[" << ix << "]:" << strView << " (size: " << strView.size() + << ")"; + str.append(strView); + str.push_back('\0'); + } + resBuff->DataSize = str.size(); + errno_t ret = memcpy_s(resBuff->Buff, resBuff->MaxBuffSize, str.data(), resBuff->DataSize); + if (ret != 0) { + resBuff->DataSize = 0; // memcpy fail amount of data copied is 0 + MS_LOG(ERROR) << "memcpy_s return: " << ret; + } + } else { + DataHelper dh; + resBuff->DataSize = + dh.DumpData(column->GetBuffer(), column->SizeInBytes(), resBuff->Buff, resBuff->MaxBuffSize); + } + MS_LOG(INFO) << columnName << " " << resBuff->DataSize + << " bytesCopyed to buff (MaxBuffSize: " << resBuff->MaxBuffSize << ") "; + if (0 == resBuff->DataSize) { + MS_LOG(ERROR) << "Copy Failed!!!! " << columnName << " Too large" + << "."; // memcpy failed + } + } else { + MS_LOG(INFO) << "Tensor " << columnName << " is empty (has size 0)"; + } + } else { + MS_LOG(INFO) << "Tensor " << columnName << " was not read."; + } +} + +extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { + MS_LOG(INFO) << "Start GetNext"; + if (pMDToDApi == nullptr) { + MS_LOG(ERROR) << "GetNext called with nullptr. Abort"; + assert(pMDToDApi != nullptr); + } + + // Set defualt + results->fileid = -1; + results->embeddingBuff.DataSize = 0; + results->imageBuff.DataSize = 0; + MS_LOG(INFO) << "Start GetNext [1]" << pMDToDApi; + // get next row for dataset + std::unordered_map> row; + if (pMDToDApi->_iter == nullptr) { + MS_LOG(ERROR) << "GetNext called with no iterator. abort"; + return -1; + } + // create Execute functions, this replaces Map in Pipeline + pMDToDApi->_iter->GetNextRow(&row); + if (row.size() != 0) { + if ((pMDToDApi->_augs).size() > 0) { + // String and Tensors + GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff); + // for each operation, run eager mode, single threaded operation, will have to memcpy + // regardless + for (int i = 0; i < (pMDToDApi->_augs).size(); i++) { + // each Execute call will invoke a memcpy, this cannot really be optimized further + // for this use case, std move is added for fail save. + row["image"] = Execute((pMDToDApi->_augs)[i])(std::move(row["image"])); + if (row["image"] == nullptr) { + // nullptr means that the eager mode image processing failed, we fail in this case + return -1; + } + } + } + // FILE ID + GetValue(row, "id", &results->fileid); + pMDToDApi->_file_id = results->fileid; // hold current file id to enable embeddings update (no itr->getCurrent) + // IS FOR TRAIN + GetValue(row, "_isForTrain", &results->isForTrain); + GetValue(row, "_noOfFaces", &results->noOfFaces); + // String and Tensors + GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff); + GetTensorToBuff(row, "image", pMDToDApi->_hasBatch, &results->imageBuff); + GetTensorToBuff(row, "_embedding", pMDToDApi->_hasBatch, &results->embeddingBuff); + GetTensorToBuff(row, "label", pMDToDApi->_hasBatch, &results->labelBuff); + GetTensorToBuff(row, "_boundingBoxes", pMDToDApi->_hasBatch, &results->boundingBoxesBuff); + GetTensorToBuff(row, "_confidences", pMDToDApi->_hasBatch, &results->confidencesBuff); + GetTensorToBuff(row, "_landmarks", pMDToDApi->_hasBatch, &results->landmarksBuff); + GetTensorToBuff(row, "_faceFileNames", pMDToDApi->_hasBatch, &results->faceFileNamesBuff); + GetTensorToBuff(row, "_imageQualities", pMDToDApi->_hasBatch, &results->imageQualitiesBuff); + GetTensorToBuff(row, "_faceEmbeddings", pMDToDApi->_hasBatch, &results->faceEmbeddingsBuff); + return 0; + } + return -1; +} + +extern "C" int MDToDApi_Stop(MDToDApi *pMDToDApi) { + // Manually terminate the pipeline + pMDToDApi->_iter->Stop(); + MS_LOG(WARNING) << "pipline stoped"; + return 0; +} + +extern "C" int MDToDApi_Destroy(MDToDApi *pMDToDApi) { + MS_LOG(WARNING) << "pipeline deleted start"; + pMDToDApi->_iter->Stop(); + delete pMDToDApi; + MS_LOG(WARNING) << "pipeline deleted end"; + return 0; +} + +int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) { + int64_t file_id = pMDToDApi->_file_id; + if (file_id < 0) { + MS_LOG(ERROR) << "Illigal file ID to update: " << file_id << "."; + return -1; + } + std::string converted = std::to_string(pMDToDApi->_file_id); + *filePath = pMDToDApi->_folder_path + "/" + converted + ".json"; + return 0; +} + +extern "C" int MDToDApi_UpdateEmbeding(MDToDApi *pMDToDApi, const char *column, float *emmbeddings, + size_t emmbeddingsSize) { + auto columnName = std::string(column); + MS_LOG(INFO) << "Start update " << columnName; + + std::string converted = std::to_string(pMDToDApi->_file_id); + std::string embedding_file_path = pMDToDApi->_storage_folder + "/" + converted + columnName + ".bin"; + DataHelper dh; + MS_LOG(INFO) << "Try to save file " << embedding_file_path; + std::vector bin_content(emmbeddings, emmbeddings + emmbeddingsSize); + Status rc = dh.template WriteBinFile(embedding_file_path, bin_content); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to write embedding file: " << embedding_file_path << "."; + return -1; + } + MS_LOG(INFO) << "Saved file " << embedding_file_path; + + std::string file_path; + if (0 != GetJsonFullFileName(pMDToDApi, &file_path)) { + MS_LOG(ERROR) << "Failed to update " << columnName; + return -1; + } + + MS_LOG(INFO) << "Updating json file: " << file_path; + rc = dh.UpdateValue(file_path, std::string(column), embedding_file_path); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to update json: " << file_path << "."; + return -1; + } + return 0; +} + +extern "C" int MDToDApi_UpdateStringArray(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDbuff) { + auto columnName = std::string(column); + std::string file_path; + if (0 != GetJsonFullFileName(pMDToDApi, &file_path)) { + MS_LOG(ERROR) << "Failed to update " << columnName; + return -1; + } + MS_LOG(INFO) << "Start Update string array column: " << columnName << " in file " << file_path; + DataHelper dh; + std::vector strVec; + if (MDbuff.DataSize > 0) { + const char *p = reinterpret_cast(MDbuff.Buff); + do { + strVec.push_back(std::string(p)); + p += strVec.back().size() + 1; + } while (p < reinterpret_cast(MDbuff.Buff) + MDbuff.DataSize); + } + Status rc = dh.UpdateArray(file_path, columnName, strVec); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to update json: " << file_path << "."; + return -1; + } + return 0; +} + +extern "C" int MDToDApi_UpdateFloatArray(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDBuff) { + auto columnName = std::string(column); + std::string file_path; + if (0 != GetJsonFullFileName(pMDToDApi, &file_path)) { + MS_LOG(ERROR) << "Faile to updaet " << columnName; + return -1; + } + MS_LOG(INFO) << "Start Update float Array column: " << columnName << " in file " << file_path; + DataHelper dh; + std::vector vec; + MDBuffToVector(MDBuff, &vec); + Status rc = dh.UpdateArray(file_path, columnName, vec); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to update json: " << file_path << "."; + return -1; + } + return 0; +} + +extern "C" int MDToDApi_UpdateIsForTrain(MDToDApi *pMDToDApi, int32_t isForTrain) { + int64_t file_id = pMDToDApi->_file_id; + MS_LOG(INFO) << "Start Update isForTRain for id: " << file_id << " To " << isForTrain; + + if (file_id < 0) return -1; + std::string converted = std::to_string(pMDToDApi->_file_id); + std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; + DataHelper dh; + MS_LOG(INFO) << "Updating file: " << file_path; + Status rc = dh.UpdateValue(file_path, "_isForTrain", isForTrain, ""); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to update json: " << file_path << "."; + return -1; + } + return 0; +} + +extern "C" int MDToDApi_UpdateNoOfFaces(MDToDApi *pMDToDApi, int32_t noOfFaces) { + int64_t file_id = pMDToDApi->_file_id; + MS_LOG(INFO) << "Start Update noOfFaces for id: " << file_id << " To " << noOfFaces; + + if (file_id < 0) return -1; + std::string converted = std::to_string(pMDToDApi->_file_id); + std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; + DataHelper dh; + MS_LOG(INFO) << "Updating file: " << file_path; + Status rc = dh.UpdateValue(file_path, "_noOfFaces", noOfFaces, ""); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to update json: " << file_path << "."; + return -1; + } + return 0; +} diff --git a/mindspore/lite/minddata/wrapper/MDToDApi.h b/mindspore/lite/minddata/wrapper/MDToDApi.h new file mode 100644 index 00000000000..9f9bebeddda --- /dev/null +++ b/mindspore/lite/minddata/wrapper/MDToDApi.h @@ -0,0 +1,70 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_MDTODAPI_H_ +#define DATASET_MDTODAPI_H_ + +#include +class MDToDApi; + +typedef struct MDToDBuff { + void *Buff; + size_t DataSize; + size_t TensorSize[4]; + size_t MaxBuffSize; +} MDToDBuff_t; + +typedef struct MDToDConf { + const char *pFolderPath; + const char *pSchemFile; + const char *pStoragePath; + MDToDBuff_t columnsToReadBuff; + float MEAN[3]; + float STD[3]; + int ResizeSizeWH[2]; + int CropSizeWH[2]; + int64_t fileid; // -1 All files, otherwise get a single specifc file +} MDToDConf_t; + +typedef struct MDToDResult { + int64_t fileid; + int32_t isForTrain; + int32_t noOfFaces; + MDToDBuff_t fileNameBuff; + MDToDBuff_t labelBuff; + MDToDBuff_t imageBuff; + MDToDBuff_t embeddingBuff; + MDToDBuff_t boundingBoxesBuff; + MDToDBuff_t confidencesBuff; + MDToDBuff_t landmarksBuff; + MDToDBuff_t faceFileNamesBuff; + MDToDBuff_t imageQualitiesBuff; + MDToDBuff_t faceEmbeddingsBuff; +} MDToDResult_t; + +typedef int (*MDToDApi_pathTest_t)(const char *path); +typedef int (*MDToDApi_testAlbum_t)(); +typedef MDToDApi *(*MDToDApi_createPipeLine_t)(MDToDConf_t MDConf); +typedef int (*MDToDApi_GetNext_t)(MDToDApi *pMDToDApi, MDToDResult_t *results); +typedef int (*MDToDApi_UpdateEmbeding_t)(MDToDApi *pMDToDApi, const char *column, float *emmbeddings, + size_t emmbeddingsSize); +typedef int (*MDToDApi_UpdateStringArray_t)(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDbuff); +typedef int (*MDToDApi_UpdateFloatArray_t)(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDbuff); +typedef int (*MDToDApi_UpdateIsForTrain_t)(MDToDApi *pMDToDApi, uint8_t isForTrain); +typedef int (*MDToDApi_UpdateNoOfFaces_t)(MDToDApi *pMDToDApi, int32_t noOfFaces); +typedef int (*MDToDApi_Stop_t)(MDToDApi *pMDToDApi); +typedef int (*MDToDApi_Destroy_t)(MDToDApi *pMDToDApi); + +#endif diff --git a/mindspore/lite/minddata/example/jni-example.cc b/mindspore/lite/minddata/wrapper/jni-example.cc similarity index 100% rename from mindspore/lite/minddata/example/jni-example.cc rename to mindspore/lite/minddata/wrapper/jni-example.cc diff --git a/mindspore/lite/minddata/example/testCifar10Data/data_batch_1.bin b/mindspore/lite/minddata/wrapper/testCifar10Data/data_batch_1.bin similarity index 100% rename from mindspore/lite/minddata/example/testCifar10Data/data_batch_1.bin rename to mindspore/lite/minddata/wrapper/testCifar10Data/data_batch_1.bin diff --git a/mindspore/lite/minddata/example/x86-example.cc b/mindspore/lite/minddata/wrapper/x86-example.cc similarity index 100% rename from mindspore/lite/minddata/example/x86-example.cc rename to mindspore/lite/minddata/wrapper/x86-example.cc diff --git a/mindspore/lite/test/ut/src/dataset/eager_test.cc b/mindspore/lite/test/ut/src/dataset/eager_test.cc index 612cfddc4d2..74cc8b177b2 100644 --- a/mindspore/lite/test/ut/src/dataset/eager_test.cc +++ b/mindspore/lite/test/ut/src/dataset/eager_test.cc @@ -27,10 +27,10 @@ using MSTensor = mindspore::tensor::MSTensor; using DETensor = mindspore::tensor::DETensor; -using mindspore::dataset::api::vision::Decode; -using mindspore::dataset::api::vision::Normalize; -using mindspore::dataset::api::vision::Resize; -using Execute = mindspore::dataset::api::Execute; +using mindspore::dataset::vision::Decode; +using mindspore::dataset::vision::Normalize; +using mindspore::dataset::vision::Resize; +using Execute = mindspore::dataset::Execute; using Path = mindspore::dataset::Path; class MindDataTestEager : public mindspore::CommonTest {