add wrapper code for jni

This commit is contained in:
xulei2020 2020-11-26 15:50:46 +08:00
parent 40222f59a7
commit 406c586374
7 changed files with 888 additions and 193 deletions

View File

@ -49,7 +49,7 @@ usage()
echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" echo " -P Enable dump anf graph to file in ProtoBuffer format, default on"
echo " -D Enable dumping of function graph ir, default on" echo " -D Enable dumping of function graph ir, default on"
echo " -z Compile dataset & mindrecord, default on" echo " -z Compile dataset & mindrecord, default on"
echo " -n Compile minddata with mindspore lite, available: off, lite, full, lite_cv, full mode in lite train and lite_cv mode in lite predict" echo " -n Compile minddata with mindspore lite, available: off, lite, full, lite_cv, full mode in lite train and lite_cv, wrapper mode in lite predict"
echo " -M Enable MPI and NCCL for GPU training, gpu default on" echo " -M Enable MPI and NCCL for GPU training, gpu default on"
echo " -V Specify the minimum required cuda version, default CUDA 10.1" echo " -V Specify the minimum required cuda version, default CUDA 10.1"
echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compilation" echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compilation"
@ -129,7 +129,7 @@ checkopts()
DEBUG_MODE="on" DEBUG_MODE="on"
;; ;;
n) n)
if [[ "X$OPTARG" == "Xoff" || "X$OPTARG" == "Xlite" || "X$OPTARG" == "Xfull" || "X$OPTARG" == "Xlite_cv" ]]; then if [[ "X$OPTARG" == "Xoff" || "X$OPTARG" == "Xlite" || "X$OPTARG" == "Xfull" || "X$OPTARG" == "Xlite_cv" || "X$OPTARG" == "Xwrapper" ]]; then
COMPILE_MINDDATA_LITE="$OPTARG" COMPILE_MINDDATA_LITE="$OPTARG"
else else
echo "Invalid value ${OPTARG} for option -n" echo "Invalid value ${OPTARG} for option -n"
@ -678,7 +678,7 @@ build_lite()
build_gtest build_gtest
fi fi
if [ "${COMPILE_MINDDATA_LITE}" == "lite" ] || [ "${COMPILE_MINDDATA_LITE}" == "full" ]; then if [[ "${COMPILE_MINDDATA_LITE}" == "lite" || "${COMPILE_MINDDATA_LITE}" == "full" || "${COMPILE_MINDDATA_LITE}" == "wrapper" ]]; then
build_minddata_lite_deps build_minddata_lite_deps
fi fi

View File

@ -20,7 +20,7 @@ set(OPENCV_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/minddata/third_part
set(PROTOBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/protobuf) set(PROTOBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/protobuf)
set(FLATBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/flatbuffers) set(FLATBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/flatbuffers)
if (BUILD_MINDDATA STREQUAL "full") if (BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
if (PLATFORM_ARM64) if (PLATFORM_ARM64)
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${COMPONENT_NAME}) install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${COMPONENT_NAME})

View File

@ -28,7 +28,7 @@ set(MAIN_DIR ${DIR_PREFIX}-${MS_VERSION})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv or full") set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv, wrapper or full")
set(BUILD_LITE "on") set(BUILD_LITE "on")
set(PLATFORM_ARM "off") set(PLATFORM_ARM "off")
if (PLATFORM_ARM64 OR PLATFORM_ARM32) if (PLATFORM_ARM64 OR PLATFORM_ARM32)
@ -182,7 +182,7 @@ if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
endif () endif ()
endif () endif ()
if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full") if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
# add sentencepiece dependency # add sentencepiece dependency
# include(${TOP_DIR}/cmake/external_libs/sentencepiece.cmake) # include(${TOP_DIR}/cmake/external_libs/sentencepiece.cmake)
# json # json

View File

@ -81,6 +81,12 @@ AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/util MINDDATA_UTIL_SRC_FILES)
AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES) AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES)
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
if (BUILD_MINDDATA STREQUAL "full")
set(BUILD_MINDDATA "wrapper")
endif ()
endif ()
if (BUILD_MINDDATA STREQUAL "full") if (BUILD_MINDDATA STREQUAL "full")
include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image") include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image")
list(REMOVE_ITEM MINDDATA_API_SRC_FILES list(REMOVE_ITEM MINDDATA_API_SRC_FILES
@ -293,11 +299,73 @@ if (BUILD_MINDDATA STREQUAL "full")
if (PLATFORM_ARM32 OR PLATFORM_ARM64) if (PLATFORM_ARM32 OR PLATFORM_ARM64)
target_link_libraries(minddata-lite log) target_link_libraries(minddata-lite log)
elseif (BUILD_MINDDATA_EXAMPLE) elseif (BUILD_MINDDATA_EXAMPLE)
# add_executable(mdlite-example ${CMAKE_CURRENT_SOURCE_DIR}/example/x86-example.cc) endif()
# target_link_libraries(mdlite-example minddata-lite) elseif (BUILD_MINDDATA STREQUAL "wrapper")
# add_custom_command(TARGET mdlite-example POST_BUILD include_directories("${MINDDATA_DIR}/kernels/image")
# COMMAND cp -rf ${CMAKE_CURRENT_SOURCE_DIR}/example/testCifar10Data ${CMAKE_BINARY_DIR}/minddata include_directories("${MINDDATA_DIR}/util")
# ) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/wrapper)
set(MINDDATA_TODAPI_SRC
${MINDDATA_DIR}/core/tensor_shape.cc
${MINDDATA_DIR}/core/tensor.cc
${MINDDATA_DIR}/core/config_manager.cc
${MINDDATA_DIR}/core/data_type.cc
${MINDDATA_DIR}/core/tensor_helpers.cc
${MINDDATA_DIR}/core/global_context.cc
${MINDDATA_DIR}/core/tensor_row.cc
${MINDDATA_DIR}/api/vision.cc
${MINDDATA_DIR}/api/execute.cc
${MINDDATA_DIR}/api/transforms.cc
${MINDDATA_DIR}/api/de_tensor.cc
${MINDDATA_DIR}/util/path.cc
${MINDDATA_DIR}/util/status.cc
${MINDDATA_DIR}/util/data_helper.cc
${MINDDATA_DIR}/util/memory_pool.cc
${MINDDATA_DIR}/engine/data_schema.cc
${MINDDATA_DIR}/kernels/tensor_op.cc
${MINDDATA_DIR}/kernels/image/lite_image_utils.cc
${MINDDATA_DIR}/kernels/image/center_crop_op.cc
${MINDDATA_DIR}/kernels/image/crop_op.cc
${MINDDATA_DIR}/kernels/image/normalize_op.cc
${MINDDATA_DIR}/kernels/image/resize_op.cc
${MINDDATA_DIR}/kernels/data/compose_op.cc
${MINDDATA_DIR}/kernels/data/duplicate_op.cc
${MINDDATA_DIR}/kernels/data/one_hot_op.cc
${MINDDATA_DIR}/kernels/data/random_apply_op.cc
${MINDDATA_DIR}/kernels/data/random_choice_op.cc
${MINDDATA_DIR}/kernels/data/type_cast_op.cc
${MINDDATA_DIR}/kernels/data/data_utils.cc
${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc
${CMAKE_CURRENT_SOURCE_DIR}/wrapper/album_op_android.cc
)
add_library(minddata-lite SHARED
${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES}
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc
${CORE_DIR}/utils/ms_utils.cc
${MINDDATA_TODAPI_SRC}
)
find_package(Threads REQUIRED)
target_link_libraries(minddata-lite
securec
jpeg-turbo
jpeg
mindspore::json
Threads::Threads
)
# ref: https://github.com/android/ndk/issues/1202
if (PLATFORM_ARM32)
file(GLOB_RECURSE LIBCLANG_RT_LIB $ENV{ANDROID_NDK}/libclang_rt.builtins-arm-android.a)
if (LIBCLANG_RT_LIB STREQUAL "")
MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}")
endif()
target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB})
endif()
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
target_link_libraries(minddata-lite log)
elseif (BUILD_MINDDATA_EXAMPLE)
endif() endif()
elseif (BUILD_MINDDATA STREQUAL "lite") elseif (BUILD_MINDDATA STREQUAL "lite")
list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES "${MINDDATA_DIR}/core/client.cc") list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES "${MINDDATA_DIR}/core/client.cc")
@ -374,9 +442,6 @@ elseif (BUILD_MINDDATA STREQUAL "lite")
securec securec
jpeg-turbo jpeg-turbo
jpeg jpeg
# opencv_core
# opencv_imgcodecs
# opencv_imgproc
mindspore::json mindspore::json
) )

View File

@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#include "MDToDApi.h" #include "MDToDApi.h" //NOLINT
#include <string> #include <string>
#include <fstream> #include <fstream>
@ -22,7 +22,8 @@
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "minddata/dataset/include/datasets.h"
#include "album_op_android.h" //NOLINT
#include "minddata/dataset/include/execute.h" #include "minddata/dataset/include/execute.h"
#include "minddata/dataset/util/path.h" #include "minddata/dataset/util/path.h"
#include "minddata/dataset/include/vision.h" #include "minddata/dataset/include/vision.h"
@ -35,7 +36,7 @@
using mindspore::dataset::Path; using mindspore::dataset::Path;
using mindspore::dataset::Tensor; using mindspore::dataset::Tensor;
using mindspore::dataset; using TensorOperation = mindspore::dataset::TensorOperation;
using mindspore::LogStream; using mindspore::LogStream;
using mindspore::MsLogLevel::DEBUG; using mindspore::MsLogLevel::DEBUG;
@ -48,22 +49,21 @@ using mindspore::dataset::Status;
class MDToDApi { class MDToDApi {
public: public:
std::shared_ptr<Dataset> _ds; std::shared_ptr<mindspore::dataset::AlbumOp> _iter;
std::shared_ptr<Iterator> _iter;
std::vector<std::shared_ptr<TensorOperation>> _augs; std::vector<std::shared_ptr<TensorOperation>> _augs;
std::string _storage_folder; std::string _storage_folder;
std::string _folder_path; std::string _folder_path;
bool _hasBatch; bool _hasBatch;
int64_t _file_id; int64_t _file_id;
MDToDApi() : _ds(nullptr), _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) { public:
MS_LOG(WARNING) << "MDToDAPI Call constructor"; MDToDApi() : _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) {
MS_LOG(WARNING) << "MDToDAPI Call constractor";
} }
~MDToDApi() { ~MDToDApi() {
MS_LOG(WARNING) << "MDToDAPI Call destructor"; MS_LOG(WARNING) << "MDToDAPI Call destractor";
// derefernce dataset and iterator
_augs.clear(); _augs.clear();
_ds = nullptr;
_iter = nullptr;
} }
}; };
@ -79,7 +79,9 @@ std::vector<std::string> MDToDBuffToVector(MDToDBuff_t StrBuff) {
return strVector; return strVector;
} }
extern "C" int MDToDApi_pathTest(const char *path) { extern "C"
int MDToDApi_pathTest(const char* path) {
Path f(path); Path f(path);
MS_LOG(WARNING) << f.Exists() << f.IsDirectory() << f.ParentPath(); MS_LOG(WARNING) << f.Exists() << f.IsDirectory() << f.ParentPath();
// Print out the first few items in the directory // Print out the first few items in the directory
@ -114,36 +116,31 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) {
if ((MDConf.ResizeSizeWH[0] != 0) && (MDConf.ResizeSizeWH[1] != 0)) { if ((MDConf.ResizeSizeWH[0] != 0) && (MDConf.ResizeSizeWH[1] != 0)) {
std::vector<int> Resize(MDConf.ResizeSizeWH, MDConf.ResizeSizeWH + 2); std::vector<int> Resize(MDConf.ResizeSizeWH, MDConf.ResizeSizeWH + 2);
std::shared_ptr<TensorOperation> resize_op = vision::Resize(Resize); std::shared_ptr<TensorOperation> resize_op = mindspore::dataset::vision::Resize(Resize);
assert(resize_op != nullptr); assert(resize_op != nullptr);
MS_LOG(WARNING) << "Push back resize"; MS_LOG(WARNING) << "Push back resize";
mapOperations.push_back(resize_op); mapOperations.push_back(resize_op);
// hasBatch = true; Batch not currently supported inMInddata-Lite
} }
if ((MDConf.CropSizeWH[0] != 0) && (MDConf.CropSizeWH[1] != 0)) { if ((MDConf.CropSizeWH[0] != 0) && (MDConf.CropSizeWH[1] != 0)) {
std::vector<int> Crop(MDConf.CropSizeWH, MDConf.CropSizeWH + 2); std::vector<int> Crop(MDConf.CropSizeWH, MDConf.CropSizeWH + 2);
std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop(Crop); std::shared_ptr<TensorOperation> center_crop_op = mindspore::dataset::vision::CenterCrop(Crop);
assert(center_crop_op != nullptr); assert(center_crop_op != nullptr);
MS_LOG(WARNING) << "Push back crop"; MS_LOG(WARNING) << "Push back crop";
mapOperations.push_back(center_crop_op); mapOperations.push_back(center_crop_op);
// hasBatch = true; Batch not currently supported inMInddata-Lite
} }
} }
std::shared_ptr<Dataset> ds = nullptr;
MS_LOG(INFO) << "Read id=" << MDConf.fileid << " (-1) for all"; MS_LOG(INFO) << "Read id=" << MDConf.fileid << " (-1) for all";
std::shared_ptr<mindspore::dataset::AlbumOp> iter = nullptr;
const std::set<std::string> exts = {};
if (MDConf.fileid > -1) { if (MDConf.fileid > -1) {
// read specific image using SequentialSampler // read specific image using SequentialSampler witn
ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(MDConf.fileid, 1L)); iter = std::make_shared<mindspore::dataset::AlbumOp>(folder_path, true, schema_file, exts, MDConf.fileid);
} else { } else {
// Distributed sampler takes num_shards then shard_id iter = std::make_shared<mindspore::dataset::AlbumOp>(folder_path, true, schema_file, exts);
ds = Album(folder_path, schema_file, column_names, true, SequentialSampler());
} }
ds = ds->SetNumWorkers(1);
assert(ds != nullptr);
// Create a Repeat operation on ds
int32_t repeat_num = 1;
ds = ds->Repeat(repeat_num);
assert(ds != nullptr);
// Create objects for the tensor ops // Create objects for the tensor ops
MS_LOG(INFO) << " Create pipline parameters"; MS_LOG(INFO) << " Create pipline parameters";
@ -154,16 +151,7 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) {
} }
bool hasBatch = false; bool hasBatch = false;
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
if (nullptr == iter) {
MS_LOG(ERROR) << "Iterator creation failed";
return nullptr;
}
assert(iter != nullptr);
MDToDApi *pMDToDApi = new MDToDApi; MDToDApi *pMDToDApi = new MDToDApi;
pMDToDApi->_ds = ds;
pMDToDApi->_iter = iter; pMDToDApi->_iter = iter;
pMDToDApi->_augs = mapOperations; pMDToDApi->_augs = mapOperations;
pMDToDApi->_storage_folder = std::string(MDConf.pStoragePath); pMDToDApi->_storage_folder = std::string(MDConf.pStoragePath);
@ -173,11 +161,11 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) {
} }
template <typename T> template <typename T>
void MDBuffToVector(MDToDBuff_t MDBuff, std::vector<T> *vec) { void MDBuffToVector(const MDToDBuff_t MDBuff, std::vector<T> *vec) {
vec.clear(); vec->clear();
if (MDBuff.DataSize > 0) { if (MDBuff.DataSize > 0) {
int nofElements = MDBuff.DataSize / sizeof(T); int nofElements = MDBuff.DataSize / sizeof(T);
*vec.assign(reinterpret_cast<T *>(MDBuff.Buff), reinterpret_cast<T *>(MDBuff.Buff) + nofElements); vec->assign(reinterpret_cast<T *>(MDBuff.Buff), reinterpret_cast<T *>(MDBuff.Buff) + nofElements);
} }
} }
@ -217,7 +205,7 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro
resBuff->TensorSize[0] = 1; resBuff->TensorSize[0] = 1;
} }
if (column->shape()[firstDim] > 0) { if (column->shape()[firstDim] > 0) {
if (DataType::DE_STRING == column->type()) { if (mindspore::dataset::DataType::DE_STRING == column->type()) {
std::string str; std::string str;
for (int ix = 0; ix < column->shape()[firstDim]; ix++) { for (int ix = 0; ix < column->shape()[firstDim]; ix++) {
std::string_view strView; std::string_view strView;
@ -238,14 +226,14 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro
MS_LOG(ERROR) << "memcpy_s return: " << ret; MS_LOG(ERROR) << "memcpy_s return: " << ret;
} }
} else { } else {
DataHelper dh; mindspore::dataset::DataHelper dh;
resBuff->DataSize = resBuff->DataSize =
dh.DumpData(column->GetBuffer(), column->SizeInBytes(), resBuff->Buff, resBuff->MaxBuffSize); dh.DumpData(column->GetBuffer(), column->SizeInBytes(), resBuff->Buff, resBuff->MaxBuffSize);
} }
MS_LOG(INFO) << columnName << " " << resBuff->DataSize MS_LOG(INFO) << columnName << " " << resBuff->DataSize
<< " bytesCopyed to buff (MaxBuffSize: " << resBuff->MaxBuffSize << ") "; << " bytesCopyed to buff (MaxBuffSize: " << resBuff->MaxBuffSize << ") ";
if (0 == resBuff->DataSize) { if (0 == resBuff->DataSize) {
MS_LOG(ERROR) << "Copy Failed!!!! " << columnName << " Too large" MS_LOG(ERROR) << "COPY FAIL!!!! " << columnName << " Too large"
<< "."; // memcpy failed << "."; // memcpy failed
} }
} else { } else {
@ -259,7 +247,7 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro
extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
MS_LOG(INFO) << "Start GetNext"; MS_LOG(INFO) << "Start GetNext";
if (pMDToDApi == nullptr) { if (pMDToDApi == nullptr) {
MS_LOG(ERROR) << "GetNext called with nullptr. Abort"; MS_LOG(ERROR) << "GetNext called with null ptr. abort";
assert(pMDToDApi != nullptr); assert(pMDToDApi != nullptr);
} }
@ -271,12 +259,13 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
// get next row for dataset // get next row for dataset
std::unordered_map<std::string, std::shared_ptr<Tensor>> row; std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
if (pMDToDApi->_iter == nullptr) { if (pMDToDApi->_iter == nullptr) {
MS_LOG(ERROR) << "GetNext called with no iterator. abort"; MS_LOG(ERROR) << "GetNext called with no iteratoe. abort";
return -1; return -1;
} }
// create Execute functions, this replaces Map in Pipeline // create Execute functions, this replaces Map in Pipeline
pMDToDApi->_iter->GetNextRow(&row);
if (row.size() != 0) { bool ret = pMDToDApi->_iter->GetNextRow(&row);
if (row.size() != 0 && ret) {
if ((pMDToDApi->_augs).size() > 0) { if ((pMDToDApi->_augs).size() > 0) {
// String and Tensors // String and Tensors
GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff); GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff);
@ -285,7 +274,7 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
for (int i = 0; i < (pMDToDApi->_augs).size(); i++) { for (int i = 0; i < (pMDToDApi->_augs).size(); i++) {
// each Execute call will invoke a memcpy, this cannot really be optimized further // each Execute call will invoke a memcpy, this cannot really be optimized further
// for this use case, std move is added for fail save. // for this use case, std move is added for fail save.
row["image"] = Execute((pMDToDApi->_augs)[i])(std::move(row["image"])); row["image"] = mindspore::dataset::Execute((pMDToDApi->_augs)[i])(std::move(row["image"]));
if (row["image"] == nullptr) { if (row["image"] == nullptr) {
// nullptr means that the eager mode image processing failed, we fail in this case // nullptr means that the eager mode image processing failed, we fail in this case
return -1; return -1;
@ -316,20 +305,18 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
extern "C" int MDToDApi_Stop(MDToDApi *pMDToDApi) { extern "C" int MDToDApi_Stop(MDToDApi *pMDToDApi) {
// Manually terminate the pipeline // Manually terminate the pipeline
pMDToDApi->_iter->Stop();
MS_LOG(WARNING) << "pipline stoped"; MS_LOG(WARNING) << "pipline stoped";
return 0; return 0;
} }
extern "C" int MDToDApi_Destroy(MDToDApi *pMDToDApi) { extern "C" int MDToDApi_Destroy(MDToDApi *pMDToDApi) {
MS_LOG(WARNING) << "pipeline deleted start"; MS_LOG(WARNING) << "pipline deleted start";
pMDToDApi->_iter->Stop();
delete pMDToDApi; delete pMDToDApi;
MS_LOG(WARNING) << "pipeline deleted end"; MS_LOG(WARNING) << "pipline deleted end";
return 0; return 0;
} }
int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) { int GetJsonFullFileName(const MDToDApi *pMDToDApi, std::string *filePath) {
int64_t file_id = pMDToDApi->_file_id; int64_t file_id = pMDToDApi->_file_id;
if (file_id < 0) { if (file_id < 0) {
MS_LOG(ERROR) << "Illigal file ID to update: " << file_id << "."; MS_LOG(ERROR) << "Illigal file ID to update: " << file_id << ".";
@ -343,12 +330,12 @@ int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) {
extern "C" int MDToDApi_UpdateEmbeding(MDToDApi *pMDToDApi, const char *column, float *emmbeddings, extern "C" int MDToDApi_UpdateEmbeding(MDToDApi *pMDToDApi, const char *column, float *emmbeddings,
size_t emmbeddingsSize) { size_t emmbeddingsSize) {
auto columnName = std::string(column); auto columnName = std::string(column);
MS_LOG(INFO) << "Start update " << columnName; MS_LOG(INFO) << "Start Update " << columnName;
std::string converted = std::to_string(pMDToDApi->_file_id); std::string converted = std::to_string(pMDToDApi->_file_id);
std::string embedding_file_path = pMDToDApi->_storage_folder + "/" + converted + columnName + ".bin"; std::string embedding_file_path = pMDToDApi->_storage_folder + "/" + converted + columnName + ".bin";
DataHelper dh; mindspore::dataset::DataHelper dh;
MS_LOG(INFO) << "Try to save file " << embedding_file_path; MS_LOG(INFO) << "Try to Save file " << embedding_file_path;
std::vector<float> bin_content(emmbeddings, emmbeddings + emmbeddingsSize); std::vector<float> bin_content(emmbeddings, emmbeddings + emmbeddingsSize);
Status rc = dh.template WriteBinFile<float>(embedding_file_path, bin_content); Status rc = dh.template WriteBinFile<float>(embedding_file_path, bin_content);
if (rc.IsError()) { if (rc.IsError()) {
@ -379,8 +366,8 @@ extern "C" int MDToDApi_UpdateStringArray(MDToDApi *pMDToDApi, const char *colum
MS_LOG(ERROR) << "Failed to update " << columnName; MS_LOG(ERROR) << "Failed to update " << columnName;
return -1; return -1;
} }
MS_LOG(INFO) << "Start Update string array column: " << columnName << " in file " << file_path; MS_LOG(INFO) << "Start Update string Array column: " << columnName << " in file " << file_path;
DataHelper dh; mindspore::dataset::DataHelper dh;
std::vector<std::string> strVec; std::vector<std::string> strVec;
if (MDbuff.DataSize > 0) { if (MDbuff.DataSize > 0) {
const char *p = reinterpret_cast<char *>(MDbuff.Buff); const char *p = reinterpret_cast<char *>(MDbuff.Buff);
@ -405,7 +392,7 @@ extern "C" int MDToDApi_UpdateFloatArray(MDToDApi *pMDToDApi, const char *column
return -1; return -1;
} }
MS_LOG(INFO) << "Start Update float Array column: " << columnName << " in file " << file_path; MS_LOG(INFO) << "Start Update float Array column: " << columnName << " in file " << file_path;
DataHelper dh; mindspore::dataset::DataHelper dh;
std::vector<float> vec; std::vector<float> vec;
MDBuffToVector<float>(MDBuff, &vec); MDBuffToVector<float>(MDBuff, &vec);
Status rc = dh.UpdateArray<float>(file_path, columnName, vec); Status rc = dh.UpdateArray<float>(file_path, columnName, vec);
@ -423,7 +410,7 @@ extern "C" int MDToDApi_UpdateIsForTrain(MDToDApi *pMDToDApi, int32_t isForTrain
if (file_id < 0) return -1; if (file_id < 0) return -1;
std::string converted = std::to_string(pMDToDApi->_file_id); std::string converted = std::to_string(pMDToDApi->_file_id);
std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json";
DataHelper dh; mindspore::dataset::DataHelper dh;
MS_LOG(INFO) << "Updating file: " << file_path; MS_LOG(INFO) << "Updating file: " << file_path;
Status rc = dh.UpdateValue<int32_t>(file_path, "_isForTrain", isForTrain, ""); Status rc = dh.UpdateValue<int32_t>(file_path, "_isForTrain", isForTrain, "");
if (rc.IsError()) { if (rc.IsError()) {
@ -440,7 +427,7 @@ extern "C" int MDToDApi_UpdateNoOfFaces(MDToDApi *pMDToDApi, int32_t noOfFaces)
if (file_id < 0) return -1; if (file_id < 0) return -1;
std::string converted = std::to_string(pMDToDApi->_file_id); std::string converted = std::to_string(pMDToDApi->_file_id);
std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json"; std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json";
DataHelper dh; mindspore::dataset::DataHelper dh;
MS_LOG(INFO) << "Updating file: " << file_path; MS_LOG(INFO) << "Updating file: " << file_path;
Status rc = dh.UpdateValue<int32_t>(file_path, "_noOfFaces", noOfFaces, ""); Status rc = dh.UpdateValue<int32_t>(file_path, "_noOfFaces", noOfFaces, "");
if (rc.IsError()) { if (rc.IsError()) {

View File

@ -0,0 +1,470 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "album_op_android.h" //NOLINT
#include <fstream>
#include <iomanip>
#include "minddata/dataset/core/tensor_shape.h"
#include "minddata/dataset/kernels/image/lite_image_utils.h"
namespace mindspore {
namespace dataset {
AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file,
const std::set<std::string> &exts)
: folder_path_(file_dir),
decode_(do_decode),
extensions_(exts),
schema_file_(schema_file),
row_cnt_(0),
buf_cnt_(0),
current_cnt_(0),
dirname_offset_(0),
sampler_(false),
sampler_index_(0) {
PrescanEntry();
}
AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file,
const std::set<std::string> &exts, uint32_t index)
: folder_path_(file_dir),
decode_(do_decode),
extensions_(exts),
schema_file_(schema_file),
row_cnt_(0),
buf_cnt_(0),
current_cnt_(0),
dirname_offset_(0),
sampler_(true),
sampler_index_(0) {
PrescanEntry();
}
// Helper function for string comparison
// album sorts the files via numerical values, so this is not a simple string comparison
bool StrComp(const std::string &a, const std::string &b) {
// returns 1 if string "a" represent a numeric value less than string "b"
// the following will always return name, provided there is only one "." character in name
// "." character is guaranteed to exist since the extension is checked befor this function call.
int64_t value_a = std::atoi(a.substr(1, a.find(".")).c_str());
int64_t value_b = std::atoi(b.substr(1, b.find(".")).c_str());
return value_a < value_b;
}
// Single thread to go through the folder directory and gets all file names
// calculate numRows then return
Status AlbumOp::PrescanEntry() {
data_schema_ = std::make_unique<DataSchema>();
Path schema_file(schema_file_);
if (schema_file_ == "" || !schema_file.Exists()) {
RETURN_STATUS_UNEXPECTED("Invalid file, schema_file is invalid or not set: " + schema_file_);
} else {
MS_LOG(WARNING) << "Schema file provided: " << schema_file_ << ".";
data_schema_->LoadSchemaFile(schema_file_, columns_to_load_);
}
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i;
}
Path folder(folder_path_);
dirname_offset_ = folder_path_.length();
std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder);
if (folder.Exists() == false || dirItr == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_);
}
MS_LOG(WARNING) << "Album folder Path found: " << folder_path_ << ".";
while (dirItr->hasNext()) {
Path file = dirItr->next();
if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) {
(void)image_rows_.push_back(file.toString().substr(dirname_offset_));
} else {
MS_LOG(WARNING) << "Album operator unsupported file found: " << file.toString()
<< ", extension: " << file.Extension() << ".";
}
}
std::sort(image_rows_.begin(), image_rows_.end(), StrComp);
if (image_rows_.size() == 0) {
RETURN_STATUS_UNEXPECTED(
"Invalid data, no valid data matching the dataset API AlbumDataset. Please check file path or dataset API.");
}
if (sampler_) {
if (sampler_index_ < 0 || sampler_index_ >= image_rows_.size()) {
RETURN_STATUS_UNEXPECTED("the sampler index was out of range");
}
std::vector<std::string> tmp;
tmp.emplace_back(image_rows_[sampler_index_]);
image_rows_.clear();
image_rows_ = tmp;
}
return Status::OK();
}
// contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_
// IMPORTANT: 1 IOBlock produces 1 DataBuffer
bool AlbumOp::GetNextRow(std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
if (map_row == nullptr) {
MS_LOG(WARNING) << "GetNextRow in AlbumOp: the point of map_row is nullptr";
return false;
}
if (current_cnt_ == image_rows_.size()) {
return false;
}
Status ret = LoadTensorRow(current_cnt_, image_rows_[current_cnt_], map_row);
if (ret.IsError()) {
MS_LOG(ERROR) << "GetNextRow in AlbumOp: " << ret.ToString() << "\n";
return false;
}
current_cnt_++;
return true;
}
// Only support JPEG/PNG/GIF/BMP
// Optimization: Could take in a tensor
// This function does not return status because we want to just skip bad input, not crash
bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
std::ifstream file_handle;
constexpr int read_num = 3;
*valid = false;
file_handle.open(file_name, std::ios::binary | std::ios::in);
if (!file_handle.is_open()) {
return false;
}
unsigned char file_type[read_num];
(void)file_handle.read(reinterpret_cast<char *>(file_type), read_num);
if (file_handle.fail()) {
file_handle.close();
return false;
}
file_handle.close();
if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) {
// Normal JPEGs start with \xff\xd8\xff\xe0
// JPEG with EXIF stats with \xff\xd8\xff\xe1
// Use \xff\xd8\xff to cover both.
*valid = true;
}
return true;
}
Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorPtr *tensor) {
TensorPtr image;
std::ifstream fs;
fs.open(image_file_path, std::ios::binary | std::ios::in);
if (fs.fail()) {
MS_LOG(WARNING) << "File not found:" << image_file_path << ".";
// If file doesn't exist, we don't flag this as error in input check, simply push back empty tensor
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
return Status::OK();
}
// Hack logic to replace png images with empty tensor
Path file(image_file_path);
std::set<std::string> png_ext = {".png", ".PNG"};
if (png_ext.find(file.Extension()) != png_ext.end()) {
// load empty tensor since image is not jpg
MS_LOG(INFO) << "PNG!" << image_file_path << ".";
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
return Status::OK();
}
// treat bin files separately
std::set<std::string> bin_ext = {".bin", ".BIN"};
if (bin_ext.find(file.Extension()) != bin_ext.end()) {
// load empty tensor since image is not jpg
MS_LOG(INFO) << "Bin file found" << image_file_path << ".";
RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, tensor));
// row->push_back(std::move(image));
return Status::OK();
}
// check that the file is an image before decoding
bool valid = false;
bool check_success = CheckImageType(image_file_path, &valid);
if (!check_success || !valid) {
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
return Status::OK();
}
// if it is a jpeg image, load and try to decode
RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image));
if (decode_ && valid) {
Status rc = Decode(image, tensor);
if (rc.IsError()) {
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
return Status::OK();
}
}
// row->push_back(std::move(image));
return Status::OK();
}
Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
std::vector<std::string> data = json_obj.get<std::vector<std::string>>();
MS_LOG(WARNING) << "String array label found: " << data << ".";
// TensorPtr label;
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
// row->push_back(std::move(label));
return Status::OK();
}
Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
std::string data = json_obj;
// now we iterate over the elements in json
MS_LOG(INFO) << "String label found: " << data << ".";
TensorPtr label;
RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(data, tensor));
// row->push_back(std::move(label));
return Status::OK();
}
Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
// TensorPtr label;
// consider templating this function to handle all ints
if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
std::vector<int64_t> data;
// Iterate over the integer list and add those values to the output shape tensor
auto items = json_obj.items();
using it_type = decltype(items.begin());
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
} else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
std::vector<int32_t> data;
// Iterate over the integer list and add those values to the output shape tensor
auto items = json_obj.items();
using it_type = decltype(items.begin());
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
} else {
RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither int32 nor int64, it is " +
data_schema_->column(col_num).type().ToString());
}
// row->push_back(std::move(label));
return Status::OK();
}
Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
// TensorPtr float_array;
// consider templating this function to handle all ints
if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
std::vector<double> data;
// Iterate over the integer list and add those values to the output shape tensor
auto items = json_obj.items();
using it_type = decltype(items.begin());
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
} else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
std::vector<float> data;
// Iterate over the integer list and add those values to the output shape tensor
auto items = json_obj.items();
using it_type = decltype(items.begin());
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
} else {
RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither float32 nor float64, it is " +
data_schema_->column(col_num).type().ToString());
}
// row->push_back(std::move(float_array));
return Status::OK();
}
Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor) {
if (data_schema_->column(col_num).type() == DataType::DE_STRING) {
// TensorPtr id;
RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, tensor));
// row->push_back(std::move(id));
return Status::OK();
}
// hack to get the file name without extension, the 1 is to get rid of the backslash character
int64_t image_id = std::atoi(file.substr(1, file.find(".")).c_str());
// TensorPtr id;
RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(image_id, tensor));
MS_LOG(INFO) << "File ID " << image_id << ".";
// row->push_back(std::move(id));
return Status::OK();
}
Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor) {
// hack to get the file name without extension, the 1 is to get rid of the backslash character
// TensorPtr empty_tensor;
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), tensor));
// row->push_back(std::move(empty_tensor));
return Status::OK();
}
// Loads a tensor with float value, issue with float64, we don't have reverse look up to the type
// So we actually have to check what type we want to fill the tensor with.
// Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to
// only be float32, seems like a weird limitation to impose
Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
// TensorPtr float_tensor;
if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
double data = json_obj;
MS_LOG(INFO) << "double found: " << json_obj << ".";
RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, tensor));
} else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
float data = json_obj;
RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data, tensor));
MS_LOG(INFO) << "float found: " << json_obj << ".";
}
// row->push_back(std::move(float_tensor));
return Status::OK();
}
// Loads a tensor with int value, we have to cast the value to type specified in the schema.
Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
// TensorPtr int_tensor;
if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
int64_t data = json_obj;
MS_LOG(INFO) << "int64 found: " << json_obj << ".";
RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, tensor));
} else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
int32_t data = json_obj;
RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, tensor));
MS_LOG(INFO) << "int32 found: " << json_obj << ".";
}
// row->push_back(std::move(int_tensor));
return Status::OK();
}
// Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorRow in a DataBuffer
// possible optimization: the helper functions of LoadTensorRow should be optimized
// to take a reference to a column descriptor?
// the design of this class is to make the code more readable, forgoing minor perfomance gain like
// getting rid of duplicated checks
Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file,
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
// testing here is to just print out file path
// (*row) = TensorRow(row_id, {});
MS_LOG(INFO) << "Image row file: " << file << ".";
std::ifstream file_handle(folder_path_ + file);
if (!file_handle.is_open()) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file);
}
std::string line;
while (getline(file_handle, line)) {
try {
nlohmann::json js = nlohmann::json::parse(line);
MS_LOG(INFO) << "This Line: " << line << ".";
// note if take a schema here, then we have to iterate over all column descriptors in schema and check for key
// get columns in schema:
int32_t columns = data_schema_->NumColumns();
// loop over each column descriptor, this can optimized by switch cases
for (int32_t i = 0; i < columns; i++) {
// special case to handle
if (data_schema_->column(i).name() == "id") {
// id is internal, special case to load from file
TensorPtr tensor;
RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor));
(*map_row)[data_schema_->column(i).name()] = tensor;
continue;
}
// find if key does not exist, insert placeholder nullptr if not found
if (js.find(data_schema_->column(i).name()) == js.end()) {
// iterator not found, push nullptr as placeholder
MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
TensorPtr tensor;
RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor));
(*map_row)[data_schema_->column(i).name()] = tensor;
continue;
}
nlohmann::json column_value = js.at(data_schema_->column(i).name());
MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
bool is_array = column_value.is_array();
// load single string
if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
TensorPtr tensor;
RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor));
(*map_row)[data_schema_->column(i).name()] = tensor;
continue;
}
// load string array
if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
TensorPtr tensor;
RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor));
(*map_row)[data_schema_->column(i).name()] = tensor;
continue;
}
// load image file
if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
std::string image_file_path = column_value;
TensorPtr tensor;
RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor));
(*map_row)[data_schema_->column(i).name()] = tensor;
continue;
}
// load float value
if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
TensorPtr tensor;
RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor));
(*map_row)[data_schema_->column(i).name()] = tensor;
continue;
}
// load float array
if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
TensorPtr tensor;
RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor));
(*map_row)[data_schema_->column(i).name()] = tensor;
continue;
}
// int value
if (!is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
data_schema_->column(i).type() == DataType::DE_INT32)) {
TensorPtr tensor;
RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor));
(*map_row)[data_schema_->column(i).name()] = tensor;
continue;
}
// int array
if (is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
data_schema_->column(i).type() == DataType::DE_INT32)) {
TensorPtr tensor;
RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor));
(*map_row)[data_schema_->column(i).name()] = tensor;
continue;
} else {
MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported.";
continue;
}
}
} catch (const std::exception &err) {
file_handle.close();
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file);
}
}
file_handle.close();
return Status::OK();
}
} // namespace dataset
} // namespace mindspore

View File

@ -0,0 +1,173 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_
#include <deque>
#include <memory>
#include <queue>
#include <string>
#include <algorithm>
#include <map>
#include <set>
#include <utility>
#include <vector>
#include <unordered_map>
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/engine/data_schema.h"
#include "minddata/dataset/util/path.h"
#include "minddata/dataset/util/queue.h"
#include "minddata/dataset/util/status.h"
namespace mindspore {
namespace dataset {
// Forward declares
template <typename T>
class Queue;
// Define row information as a list of file objects to read
using FolderImages = std::shared_ptr<std::pair<std::string, std::queue<std::string>>>;
/// \class AlbumOp
class AlbumOp {
public:
/// \brief Constructor
/// \param[in] file_dir - directory of Album
/// \param[in] do_decode - decode image files
/// \param[in] schema_file - schema file
/// \param[in] exts - set of file extensions to read, if empty, read everything under the dir
AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file,
const std::set<std::string> &exts);
/// \brief Constructor
/// \param[in] file_dir - directory of Album
/// \param[in] do_decode - decode image files
/// \param[in] schema_file - schema file
/// \param[in] exts - set of file extensions to read, if empty, read everything under the dir
/// \param[in] index - the specific file index
AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file,
const std::set<std::string> &exts, uint32_t index);
/// \brief Destructor.
~AlbumOp() = default;
/// \brief Initialize AlbumOp related var, calls the function to walk all files
/// \return - The error code returned
Status PrescanEntry();
/// \brief Initialize AlbumOp related var, calls the function to walk all files
/// \return - The error code returned
bool GetNextRow(std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row);
/// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP
/// This function could be optimized to return the tensor to reduce open/closing files
/// \return bool - if file is bad then return false
bool CheckImageType(const std::string &file_name, bool *valid);
// Op name getter
// @return Name of the current Op
std::string Name() const { return "AlbumOp"; }
private:
/// \brief Load image to tensor
/// \param[in] image_file Image name of file
/// \param[in] col_num Column num in schema
/// \param[inout] Tensor to push to
/// \return Status The error code returned
Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorPtr *tensor);
/// \brief Load vector of ints to tensor, append tensor to tensor
/// \param[in] json_obj Json object containing multi-dimensional label
/// \param[in] col_num Column num in schema
/// \param[inout] Tensor to push to
/// \return Status The error code returned
Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
/// \brief Load vector of floatss to tensor, append tensor to tensor
/// \param[in] json_obj Json object containing array data
/// \param[in] col_num Column num in schema
/// \param[inout] Tensor to push to
/// \return Status The error code returned
Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
/// \brief Load string array into a tensor, append tensor to tensor
/// \param[in] json_obj Json object containing string tensor
/// \param[in] col_num Column num in schema
/// \param[inout] Tensor to push to
/// \return Status The error code returned
Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
/// \brief Load string into a tensor, append tensor to tensor
/// \param[in] json_obj Json object containing string tensor
/// \param[in] col_num Column num in schema
/// \param[inout] Tensor to push to
/// \return Status The error code returned
Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
/// \brief Load float value to tensor
/// \param[in] json_obj Json object containing float
/// \param[in] col_num Column num in schema
/// \param[inout] Tensor to push to
/// \return Status The error code returned
Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
/// \brief Load int value to tensor
/// \param[in] json_obj Json object containing int
/// \param[in] col_num Column num in schema
/// \param[inout] Tensor to push to
/// \return Status The error code returned
Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
/// \brief Load emtpy tensor to tensor
/// \param[in] col_num Column num in schema
/// \param[inout] Tensor to push to
/// \return Status The error code returned
Status LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor);
/// \brief Load id from file name to tensor
/// \param[in] file The file name to get ID from
/// \param[in] col_num Column num in schema
/// \param[inout] Tensor to push to
/// \return Status The error code returned
Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor);
/// \brief Load a tensor according to a json file
/// \param[in] row_id_type row_id - id for this tensor row
/// \param[in] ImageColumns file Json file location
/// \param[inout] TensorRow Json content stored into a tensor row
/// \return Status The error code returned
Status LoadTensorRow(row_id_type row_id, const std::string &file,
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row);
std::string folder_path_; // directory of image folder
bool decode_;
std::vector<std::string> columns_to_load_;
std::set<std::string> extensions_; // extensions allowed
std::unique_ptr<DataSchema> data_schema_;
std::string schema_file_;
int64_t row_cnt_;
int64_t current_cnt_;
int64_t buf_cnt_;
int64_t dirname_offset_;
bool sampler_;
int64_t sampler_index_;
std::vector<std::string> image_rows_;
std::unordered_map<std::string, int32_t> column_name_id_map_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_