diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc index 65c2cdde30b..65be9f1be25 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc @@ -225,20 +225,21 @@ Status AlbumOp::WorkerEntry(int32_t worker_id) { // Only support JPEG/PNG/GIF/BMP // Optimization: Could take in a tensor -Status AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { +// This function does not return status because we want to just skip bad input, not crash +bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { std::ifstream file_handle; constexpr int read_num = 3; *valid = false; file_handle.open(file_name, std::ios::binary | std::ios::in); if (!file_handle.is_open()) { - RETURN_STATUS_UNEXPECTED("Invalid file, can not open image file: " + file_name); + return false; } unsigned char file_type[read_num]; (void)file_handle.read(reinterpret_cast(file_type), read_num); if (file_handle.fail()) { file_handle.close(); - RETURN_STATUS_UNEXPECTED("Invalid data, failed to read image file: " + file_name); + return false; } file_handle.close(); if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) { @@ -246,17 +247,8 @@ Status AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { // JPEG with EXIF stats with \xff\xd8\xff\xe1 // Use \xff\xd8\xff to cover both. *valid = true; - } else if (file_type[0] == 0x89 && file_type[1] == 0x50 && file_type[2] == 0x4e) { - // It's a PNG - *valid = true; - } else if (file_type[0] == 0x47 && file_type[1] == 0x49 && file_type[2] == 0x46) { - // It's a GIF - *valid = true; - } else if (file_type[0] == 0x42 && file_type[1] == 0x4d) { - // It's a BMP - *valid = true; } - return Status::OK(); + return true; } Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorRow *row) { @@ -264,22 +256,44 @@ Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col std::ifstream fs; fs.open(image_file_path, std::ios::binary | std::ios::in); if (fs.fail()) { - MS_LOG(INFO) << "Image file not found:" << image_file_path << "."; + MS_LOG(WARNING) << "File not found:" << image_file_path << "."; // If file doesn't exist, we don't flag this as error in input check, simply push back empty tensor - RETURN_STATUS_UNEXPECTED("Invalid file_path, failed to read file: " + image_file_path); + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row)); + return Status::OK(); + } + // Hack logic to replace png images with empty tensor + Path file(image_file_path); + std::set png_ext = {".png", ".PNG"}; + if (png_ext.find(file.Extension()) != png_ext.end()) { + // load empty tensor since image is not jpg + MS_LOG(INFO) << "PNG!" << image_file_path << "."; + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row)); + return Status::OK(); + } + // treat bin files separately + std::set bin_ext = {".bin", ".BIN"}; + if (bin_ext.find(file.Extension()) != bin_ext.end()) { + // load empty tensor since image is not jpg + MS_LOG(INFO) << "Bin file found" << image_file_path << "."; + RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image)); + row->push_back(std::move(image)); + return Status::OK(); } - - MS_LOG(INFO) << "Image file found: " << image_file_path << "."; // check that the file is an image before decoding bool valid = false; - RETURN_IF_NOT_OK(CheckImageType(image_file_path, &valid)); + bool check_success = CheckImageType(image_file_path, &valid); + if (!check_success || !valid) { + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row)); + return Status::OK(); + } + // if it is a jpeg image, load and try to decode RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image)); if (decode_ && valid) { Status rc = Decode(image, &image); if (rc.IsError()) { - std::string err = "Invalid data, failed to decode image: " + image_file_path; - RETURN_STATUS_UNEXPECTED(err); + RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, row)); + return Status::OK(); } } row->push_back(std::move(image)); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h index f3589099ed7..6d0c9604094 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h @@ -189,8 +189,8 @@ class AlbumOp : public ParallelOp, public RandomAccessOp { /// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP /// This function could be optimized to return the tensor to reduce open/closing files - /// \return Status - The error code returned - Status CheckImageType(const std::string &file_name, bool *valid); + /// \return bool - if file is bad then return false + bool CheckImageType(const std::string &file_name, bool *valid); // Base-class override for NodePass visitor acceptor. // @param p - Pointer to the NodePass to be accepted. diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc index b6d50a955ff..f847e402306 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc @@ -429,10 +429,11 @@ Status Pad(const std::shared_ptr &input, std::shared_ptr *output bool ret = Pad(lite_mat_rgb, lite_mat_pad, pad_top, pad_bottom, pad_left, pad_right, PaddBorderType::PADD_BORDER_CONSTANT, fill_r, fill_g, fill_b); CHECK_FAIL_RETURN_UNEXPECTED(ret, "Pad failed in lite cv"); - + // new shape for output tensor + TensorShape new_shape = TensorShape({lite_mat_pad.height_, lite_mat_pad.width_, input->shape()[2]}); std::shared_ptr output_tensor; - RETURN_IF_NOT_OK(Tensor::CreateFromMemory(input->shape(), input->type(), - static_cast(lite_mat_pad.data_ptr_), &output_tensor)); + RETURN_IF_NOT_OK( + Tensor::CreateFromMemory(new_shape, input->type(), static_cast(lite_mat_pad.data_ptr_), &output_tensor)); *output = output_tensor; } catch (std::runtime_error &e) { RETURN_STATUS_UNEXPECTED("Error in image Pad."); diff --git a/mindspore/lite/minddata/CMakeLists.txt b/mindspore/lite/minddata/CMakeLists.txt index 7e312147b49..77d3bded7e7 100644 --- a/mindspore/lite/minddata/CMakeLists.txt +++ b/mindspore/lite/minddata/CMakeLists.txt @@ -175,7 +175,7 @@ if (BUILD_MINDDATA STREQUAL "full") "${MINDDATA_DIR}/kernels/image/cut_out_op.cc" "${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc" "${MINDDATA_DIR}/kernels/image/equalize_op.cc" - "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" + "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" "${MINDDATA_DIR}/kernels/image/image_utils.cc" "${MINDDATA_DIR}/kernels/image/invert_op.cc" "${MINDDATA_DIR}/kernels/image/math_utils.cc" @@ -237,9 +237,9 @@ if (BUILD_MINDDATA STREQUAL "full") ) include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache") -# if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64)) -# set(MINDDATA_EXAMPLE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/example/jni-example.cc) -# endif () + if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64)) + set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc) + endif () add_library(minddata-lite SHARED ${MINDDATA_API_SRC_FILES} diff --git a/mindspore/lite/minddata/wrapper/MDToDApi.cc b/mindspore/lite/minddata/wrapper/MDToDApi.cc new file mode 100644 index 00000000000..b914568d7e5 --- /dev/null +++ b/mindspore/lite/minddata/wrapper/MDToDApi.cc @@ -0,0 +1,451 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "MDToDApi.h" + +#include +#include +#include +#include +#include +#include +#include +#include "minddata/dataset/include/datasets.h" +#include "minddata/dataset/include/execute.h" +#include "minddata/dataset/util/path.h" +#include "minddata/dataset/include/vision.h" +#include "minddata/dataset/util/data_helper.h" +#if defined(__ANDROID__) || defined(ANDROID) +#include +#include +#endif + +using mindspore::dataset::Path; +using mindspore::dataset::Tensor; + +using mindspore::dataset; + +using mindspore::LogStream; +using mindspore::MsLogLevel::DEBUG; +using mindspore::MsLogLevel::ERROR; +using mindspore::MsLogLevel::INFO; + +using mindspore::dataset::BorderType; +using mindspore::dataset::InterpolationMode; +using mindspore::dataset::Status; + +class MDToDApi { + public: + std::shared_ptr _ds; + std::shared_ptr _iter; + std::vector> _augs; + std::string _storage_folder; + std::string _folder_path; + bool _hasBatch; + int64_t _file_id; + + MDToDApi() : _ds(nullptr), _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) { + MS_LOG(WARNING) << "MDToDAPI Call constructor"; + } + ~MDToDApi() { + MS_LOG(WARNING) << "MDToDAPI Call destructor"; + _augs.clear(); + _ds = nullptr; + _iter = nullptr; + } +}; + +std::vector MDToDBuffToVector(MDToDBuff_t StrBuff) { + std::vector strVector; + if (StrBuff.DataSize > 0) { + const char *p = reinterpret_cast(StrBuff.Buff); + do { + strVector.push_back(std::string(p)); + p += strVector.back().size() + 1; + } while (p < reinterpret_cast(StrBuff.Buff) + StrBuff.DataSize); + } + return strVector; +} + +extern "C" int MDToDApi_pathTest(const char *path) { + Path f(path); + MS_LOG(WARNING) << f.Exists() << f.IsDirectory() << f.ParentPath(); + // Print out the first few items in the directory + auto dir_it = Path::DirIterator::OpenDirectory(&f); + MS_LOG(WARNING) << dir_it.get(); + int i = 0; + while (dir_it->hasNext()) { + Path v = dir_it->next(); + MS_LOG(WARNING) << v.toString() << "\n"; + i++; + if (i > 5) break; + } + return 0; +} + +extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) { + MS_LOG(WARNING) << "Start createPipeLine"; + std::string folder_path(MDConf.pFolderPath); + std::string schema_file(MDConf.pSchemFile); + std::vector column_names = MDToDBuffToVector(MDConf.columnsToReadBuff); + if (std::find(column_names.begin(), column_names.end(), "id") == column_names.end()) { + MS_LOG(WARNING) << "Column id not foud adding it "; + column_names.push_back("id"); + } + std::vector> mapOperations; + if (std::find(column_names.begin(), column_names.end(), "image") != column_names.end()) { + MS_LOG(WARNING) << "Found column image create map with:"; + MS_LOG(WARNING) << "resize: { " << MDConf.ResizeSizeWH[0] << ", " << MDConf.ResizeSizeWH[1] << " }"; + MS_LOG(WARNING) << "crop: { " << MDConf.CropSizeWH[0] << ", " << MDConf.CropSizeWH[1] << " }"; + MS_LOG(WARNING) << "MEAN: { " << MDConf.MEAN[0] << ", " << MDConf.MEAN[1] << ", " << MDConf.MEAN[2] << " }"; + MS_LOG(WARNING) << "STD: { " << MDConf.STD[0] << ", " << MDConf.STD[1] << ", " << MDConf.STD[2] << " }"; + + if ((MDConf.ResizeSizeWH[0] != 0) && (MDConf.ResizeSizeWH[1] != 0)) { + std::vector Resize(MDConf.ResizeSizeWH, MDConf.ResizeSizeWH + 2); + std::shared_ptr resize_op = vision::Resize(Resize); + assert(resize_op != nullptr); + MS_LOG(WARNING) << "Push back resize"; + mapOperations.push_back(resize_op); + } + if ((MDConf.CropSizeWH[0] != 0) && (MDConf.CropSizeWH[1] != 0)) { + std::vector Crop(MDConf.CropSizeWH, MDConf.CropSizeWH + 2); + std::shared_ptr center_crop_op = vision::CenterCrop(Crop); + assert(center_crop_op != nullptr); + MS_LOG(WARNING) << "Push back crop"; + mapOperations.push_back(center_crop_op); + } + } + std::shared_ptr ds = nullptr; + MS_LOG(INFO) << "Read id =" << MDConf.fileid << " (-1) for all"; + if (MDConf.fileid > -1) { + // read specific image using SequentialSampler + ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(MDConf.fileid, 1L)); + } else { + // Distributed sampler takes num_shards then shard_id + ds = Album(folder_path, schema_file, column_names, true, SequentialSampler()); + } + ds = ds->SetNumWorkers(1); + + assert(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 1; + ds = ds->Repeat(repeat_num); + assert(ds != nullptr); + + // Create objects for the tensor ops + MS_LOG(INFO) << " Create pipline parameters"; + MS_LOG(INFO) << "floder path: " << folder_path << " , schema json: " << schema_file; + MS_LOG(INFO) << "Reading columns:"; + for (auto str : column_names) { + MS_LOG(INFO) << str << " "; + } + bool hasBatch = false; + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + if (nullptr == iter) { + MS_LOG(ERROR) << "Iterator creation failed"; + return nullptr; + } + assert(iter != nullptr); + MDToDApi *pMDToDApi = new MDToDApi; + pMDToDApi->_ds = ds; + pMDToDApi->_iter = iter; + pMDToDApi->_augs = mapOperations; + pMDToDApi->_storage_folder = std::string(MDConf.pStoragePath); + pMDToDApi->_folder_path = folder_path; + pMDToDApi->_hasBatch = hasBatch; + return pMDToDApi; +} + +template +void MDBuffToVector(MDToDBuff_t MDBuff, std::vector *vec) { + vec.clear(); + if (MDBuff.DataSize > 0) { + int nofElements = MDBuff.DataSize / sizeof(T); + *vec.assign(reinterpret_cast(MDBuff.Buff), reinterpret_cast(MDBuff.Buff) + nofElements); + } +} + +template +void GetValue(std::unordered_map> row, std::string columnName, T *o) { + auto column = row[columnName]; + if (NULL != column) { + MS_LOG(INFO) << "Tensor " << columnName << " shape: " << column->shape() << " type: " << column->type() + << " bytes: " << column->SizeInBytes(); + column->GetItemAt(o, {}); + MS_LOG(INFO) << columnName << ": " << +*o; + } else { + MS_LOG(INFO) << "Tensor " << columnName << " Not found" + << "."; + *o = 0; + } +} + +void GetTensorToBuff(std::unordered_map> row, std::string columnName, + bool hasBatch, MDToDBuff_t *resBuff) { + auto column = row[columnName]; + resBuff->TensorSize[0] = resBuff->TensorSize[1] = resBuff->TensorSize[2] = resBuff->TensorSize[3] = + 0; // Mark all dims do not exist in tensor + int firstDim = (hasBatch) ? 1 : 0; + if (NULL != column) { + MS_LOG(INFO) << "Tensor " << columnName << " shape: " << column->shape() << " type: " << column->type() + << " bytes: " << column->SizeInBytes() << "nof elements: " << column->shape()[firstDim]; + auto tesoreShape = column->shape().AsVector(); + for (int ix = 0; ix < tesoreShape.size(); ix++) { + MS_LOG(INFO) << "Tensor " << columnName << " shape[" << ix << "] = " << tesoreShape[ix]; + resBuff->TensorSize[ix] = tesoreShape[ix]; + } + if (!hasBatch) { + for (int ix = 3; ix > 0; ix--) { + resBuff->TensorSize[ix] = resBuff->TensorSize[ix - 1]; + } + resBuff->TensorSize[0] = 1; + } + if (column->shape()[firstDim] > 0) { + if (DataType::DE_STRING == column->type()) { + std::string str; + for (int ix = 0; ix < column->shape()[firstDim]; ix++) { + std::string_view strView; + if (hasBatch) { + column->GetItemAt(&strView, {0, ix}); + } else { + column->GetItemAt(&strView, {ix}); + } + MS_LOG(INFO) << "string " << columnName << "[" << ix << "]:" << strView << " (size: " << strView.size() + << ")"; + str.append(strView); + str.push_back('\0'); + } + resBuff->DataSize = str.size(); + errno_t ret = memcpy_s(resBuff->Buff, resBuff->MaxBuffSize, str.data(), resBuff->DataSize); + if (ret != 0) { + resBuff->DataSize = 0; // memcpy fail amount of data copied is 0 + MS_LOG(ERROR) << "memcpy_s return: " << ret; + } + } else { + DataHelper dh; + resBuff->DataSize = + dh.DumpData(column->GetBuffer(), column->SizeInBytes(), resBuff->Buff, resBuff->MaxBuffSize); + } + MS_LOG(INFO) << columnName << " " << resBuff->DataSize + << " bytesCopyed to buff (MaxBuffSize: " << resBuff->MaxBuffSize << ") "; + if (0 == resBuff->DataSize) { + MS_LOG(ERROR) << "Copy Failed!!!! " << columnName << " Too large" + << "."; // memcpy failed + } + } else { + MS_LOG(INFO) << "Tensor " << columnName << " is empty (has size 0)"; + } + } else { + MS_LOG(INFO) << "Tensor " << columnName << " was not read."; + } +} + +extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { + MS_LOG(INFO) << "Start GetNext"; + if (pMDToDApi == nullptr) { + MS_LOG(ERROR) << "GetNext called with nullptr. Abort"; + assert(pMDToDApi != nullptr); + } + + // Set defualt + results->fileid = -1; + results->embeddingBuff.DataSize = 0; + results->imageBuff.DataSize = 0; + MS_LOG(INFO) << "Start GetNext [1]" << pMDToDApi; + // get next row for dataset + std::unordered_map> row; + if (pMDToDApi->_iter == nullptr) { + MS_LOG(ERROR) << "GetNext called with no iterator. abort"; + return -1; + } + // create Execute functions, this replaces Map in Pipeline + pMDToDApi->_iter->GetNextRow(&row); + if (row.size() != 0) { + if ((pMDToDApi->_augs).size() > 0) { + // String and Tensors + GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff); + // for each operation, run eager mode, single threaded operation, will have to memcpy + // regardless + for (int i = 0; i < (pMDToDApi->_augs).size(); i++) { + // each Execute call will invoke a memcpy, this cannot really be optimized further + // for this use case, std move is added for fail save. + row["image"] = Execute((pMDToDApi->_augs)[i])(std::move(row["image"])); + if (row["image"] == nullptr) { + // nullptr means that the eager mode image processing failed, we fail in this case + return -1; + } + } + } + // FILE ID + GetValue(row, "id", &results->fileid); + pMDToDApi->_file_id = results->fileid; // hold current file id to enable embeddings update (no itr->getCurrent) + // IS FOR TRAIN + GetValue(row, "_isForTrain", &results->isForTrain); + GetValue(row, "_noOfFaces", &results->noOfFaces); + // String and Tensors + GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff); + GetTensorToBuff(row, "image", pMDToDApi->_hasBatch, &results->imageBuff); + GetTensorToBuff(row, "_embedding", pMDToDApi->_hasBatch, &results->embeddingBuff); + GetTensorToBuff(row, "label", pMDToDApi->_hasBatch, &results->labelBuff); + GetTensorToBuff(row, "_boundingBoxes", pMDToDApi->_hasBatch, &results->boundingBoxesBuff); + GetTensorToBuff(row, "_confidences", pMDToDApi->_hasBatch, &results->confidencesBuff); + GetTensorToBuff(row, "_landmarks", pMDToDApi->_hasBatch, &results->landmarksBuff); + GetTensorToBuff(row, "_faceFileNames", pMDToDApi->_hasBatch, &results->faceFileNamesBuff); + GetTensorToBuff(row, "_imageQualities", pMDToDApi->_hasBatch, &results->imageQualitiesBuff); + GetTensorToBuff(row, "_faceEmbeddings", pMDToDApi->_hasBatch, &results->faceEmbeddingsBuff); + return 0; + } + return -1; +} + +extern "C" int MDToDApi_Stop(MDToDApi *pMDToDApi) { + // Manually terminate the pipeline + pMDToDApi->_iter->Stop(); + MS_LOG(WARNING) << "pipline stoped"; + return 0; +} + +extern "C" int MDToDApi_Destroy(MDToDApi *pMDToDApi) { + MS_LOG(WARNING) << "pipeline deleted start"; + pMDToDApi->_iter->Stop(); + delete pMDToDApi; + MS_LOG(WARNING) << "pipeline deleted end"; + return 0; +} + +int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) { + int64_t file_id = pMDToDApi->_file_id; + if (file_id < 0) { + MS_LOG(ERROR) << "Illigal file ID to update: " << file_id << "."; + return -1; + } + std::string converted = std::to_string(pMDToDApi->_file_id); + *filePath = pMDToDApi->_folder_path + "/" + converted + ".json"; + return 0; +} + +extern "C" int MDToDApi_UpdateEmbeding(MDToDApi *pMDToDApi, const char *column, float *emmbeddings, + size_t emmbeddingsSize) { + auto columnName = std::string(column); + MS_LOG(INFO) << "Start update " << columnName; + + std::string converted = std::to_string(pMDToDApi->_file_id); + std::string embedding_file_path = pMDToDApi->_storage_folder + "/" + converted + columnName + ".bin"; + DataHelper dh; + MS_LOG(INFO) << "Try to save file " << embedding_file_path; + std::vector bin_content(emmbeddings, emmbeddings + emmbeddingsSize); + Status rc = dh.template WriteBinFile(embedding_file_path, bin_content); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to write embedding file: " << embedding_file_path << "."; + return -1; + } + MS_LOG(INFO) << "Saved file " << embedding_file_path; + + std::string file_path; + if (0 != GetJsonFullFileName(pMDToDApi, &file_path)) { + MS_LOG(ERROR) << "Failed to update " << columnName; + return -1; + } + + MS_LOG(INFO) << "Updating json file: " << file_path; + rc = dh.UpdateValue(file_path, std::string(column), embedding_file_path); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to update json: " << file_path << "."; + return -1; + } + return 0; +} + +extern "C" int MDToDApi_UpdateStringArray(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDbuff) { + auto columnName = std::string(column); + std::string file_path; + if (0 != GetJsonFullFileName(pMDToDApi, &file_path)) { + MS_LOG(ERROR) << "Failed to update " << columnName; + return -1; + } + MS_LOG(INFO) << "Start Update string array column: " << columnName << " in file " << file_path; + DataHelper dh; + std::vector strVec; + if (MDbuff.DataSize > 0) { + const char *p = reinterpret_cast(MDbuff.Buff); + do { + strVec.push_back(std::string(p)); + p += strVec.back().size() + 1; + } while (p < reinterpret_cast(MDbuff.Buff) + MDbuff.DataSize); + } + Status rc = dh.UpdateArray(file_path, columnName, strVec); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to update json: " << file_path << "."; + return -1; + } + return 0; +} + +extern "C" int MDToDApi_UpdateFloatArray(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDBuff) { + auto columnName = std::string(column); + std::string file_path; + if (0 != GetJsonFullFileName(pMDToDApi, &file_path)) { + MS_LOG(ERROR) << "Faile to updaet " << columnName; + return -1; + } + MS_LOG(INFO) << "Start Update float Array column: " << columnName << " in file " << file_path; + DataHelper dh; + std::vector vec; + MDBuffToVector(MDBuff, &vec); + Status rc = dh.UpdateArray(file_path, columnName, vec); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to update json: " << file_path << "."; + return -1; + } + return 0; +} + +extern "C" int MDToDApi_UpdateIsForTrain(MDToDApi *pMDToDApi, int32_t isForTrain) { + int64_t file_id = pMDToDApi->_file_id; + MS_LOG(INFO) << "Start Update isForTRain for id: " << file_id << " To " << isForTrain; + + if (file_id < 0) return -1; + std::string converted = std::to_string(pMDToDApi->_file_id); + std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; + DataHelper dh; + MS_LOG(INFO) << "Updating file: " << file_path; + Status rc = dh.UpdateValue(file_path, "_isForTrain", isForTrain, ""); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to update json: " << file_path << "."; + return -1; + } + return 0; +} + +extern "C" int MDToDApi_UpdateNoOfFaces(MDToDApi *pMDToDApi, int32_t noOfFaces) { + int64_t file_id = pMDToDApi->_file_id; + MS_LOG(INFO) << "Start Update noOfFaces for id: " << file_id << " To " << noOfFaces; + + if (file_id < 0) return -1; + std::string converted = std::to_string(pMDToDApi->_file_id); + std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; + DataHelper dh; + MS_LOG(INFO) << "Updating file: " << file_path; + Status rc = dh.UpdateValue(file_path, "_noOfFaces", noOfFaces, ""); + if (rc.IsError()) { + MS_LOG(ERROR) << "Fail to update json: " << file_path << "."; + return -1; + } + return 0; +} diff --git a/mindspore/lite/minddata/wrapper/MDToDApi.h b/mindspore/lite/minddata/wrapper/MDToDApi.h new file mode 100644 index 00000000000..9f9bebeddda --- /dev/null +++ b/mindspore/lite/minddata/wrapper/MDToDApi.h @@ -0,0 +1,70 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef DATASET_MDTODAPI_H_ +#define DATASET_MDTODAPI_H_ + +#include +class MDToDApi; + +typedef struct MDToDBuff { + void *Buff; + size_t DataSize; + size_t TensorSize[4]; + size_t MaxBuffSize; +} MDToDBuff_t; + +typedef struct MDToDConf { + const char *pFolderPath; + const char *pSchemFile; + const char *pStoragePath; + MDToDBuff_t columnsToReadBuff; + float MEAN[3]; + float STD[3]; + int ResizeSizeWH[2]; + int CropSizeWH[2]; + int64_t fileid; // -1 All files, otherwise get a single specifc file +} MDToDConf_t; + +typedef struct MDToDResult { + int64_t fileid; + int32_t isForTrain; + int32_t noOfFaces; + MDToDBuff_t fileNameBuff; + MDToDBuff_t labelBuff; + MDToDBuff_t imageBuff; + MDToDBuff_t embeddingBuff; + MDToDBuff_t boundingBoxesBuff; + MDToDBuff_t confidencesBuff; + MDToDBuff_t landmarksBuff; + MDToDBuff_t faceFileNamesBuff; + MDToDBuff_t imageQualitiesBuff; + MDToDBuff_t faceEmbeddingsBuff; +} MDToDResult_t; + +typedef int (*MDToDApi_pathTest_t)(const char *path); +typedef int (*MDToDApi_testAlbum_t)(); +typedef MDToDApi *(*MDToDApi_createPipeLine_t)(MDToDConf_t MDConf); +typedef int (*MDToDApi_GetNext_t)(MDToDApi *pMDToDApi, MDToDResult_t *results); +typedef int (*MDToDApi_UpdateEmbeding_t)(MDToDApi *pMDToDApi, const char *column, float *emmbeddings, + size_t emmbeddingsSize); +typedef int (*MDToDApi_UpdateStringArray_t)(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDbuff); +typedef int (*MDToDApi_UpdateFloatArray_t)(MDToDApi *pMDToDApi, const char *column, MDToDBuff_t MDbuff); +typedef int (*MDToDApi_UpdateIsForTrain_t)(MDToDApi *pMDToDApi, uint8_t isForTrain); +typedef int (*MDToDApi_UpdateNoOfFaces_t)(MDToDApi *pMDToDApi, int32_t noOfFaces); +typedef int (*MDToDApi_Stop_t)(MDToDApi *pMDToDApi); +typedef int (*MDToDApi_Destroy_t)(MDToDApi *pMDToDApi); + +#endif diff --git a/mindspore/lite/minddata/example/jni-example.cc b/mindspore/lite/minddata/wrapper/jni-example.cc similarity index 100% rename from mindspore/lite/minddata/example/jni-example.cc rename to mindspore/lite/minddata/wrapper/jni-example.cc diff --git a/mindspore/lite/minddata/example/testCifar10Data/data_batch_1.bin b/mindspore/lite/minddata/wrapper/testCifar10Data/data_batch_1.bin similarity index 100% rename from mindspore/lite/minddata/example/testCifar10Data/data_batch_1.bin rename to mindspore/lite/minddata/wrapper/testCifar10Data/data_batch_1.bin diff --git a/mindspore/lite/minddata/example/x86-example.cc b/mindspore/lite/minddata/wrapper/x86-example.cc similarity index 100% rename from mindspore/lite/minddata/example/x86-example.cc rename to mindspore/lite/minddata/wrapper/x86-example.cc diff --git a/mindspore/lite/test/ut/src/dataset/eager_test.cc b/mindspore/lite/test/ut/src/dataset/eager_test.cc index 612cfddc4d2..74cc8b177b2 100644 --- a/mindspore/lite/test/ut/src/dataset/eager_test.cc +++ b/mindspore/lite/test/ut/src/dataset/eager_test.cc @@ -27,10 +27,10 @@ using MSTensor = mindspore::tensor::MSTensor; using DETensor = mindspore::tensor::DETensor; -using mindspore::dataset::api::vision::Decode; -using mindspore::dataset::api::vision::Normalize; -using mindspore::dataset::api::vision::Resize; -using Execute = mindspore::dataset::api::Execute; +using mindspore::dataset::vision::Decode; +using mindspore::dataset::vision::Normalize; +using mindspore::dataset::vision::Resize; +using Execute = mindspore::dataset::Execute; using Path = mindspore::dataset::Path; class MindDataTestEager : public mindspore::CommonTest {