forked from mindspore-Ecosystem/mindspore
add wrapper code for jni
This commit is contained in:
parent
40222f59a7
commit
406c586374
6
build.sh
6
build.sh
|
@ -49,7 +49,7 @@ usage()
|
||||||
echo " -P Enable dump anf graph to file in ProtoBuffer format, default on"
|
echo " -P Enable dump anf graph to file in ProtoBuffer format, default on"
|
||||||
echo " -D Enable dumping of function graph ir, default on"
|
echo " -D Enable dumping of function graph ir, default on"
|
||||||
echo " -z Compile dataset & mindrecord, default on"
|
echo " -z Compile dataset & mindrecord, default on"
|
||||||
echo " -n Compile minddata with mindspore lite, available: off, lite, full, lite_cv, full mode in lite train and lite_cv mode in lite predict"
|
echo " -n Compile minddata with mindspore lite, available: off, lite, full, lite_cv, full mode in lite train and lite_cv, wrapper mode in lite predict"
|
||||||
echo " -M Enable MPI and NCCL for GPU training, gpu default on"
|
echo " -M Enable MPI and NCCL for GPU training, gpu default on"
|
||||||
echo " -V Specify the minimum required cuda version, default CUDA 10.1"
|
echo " -V Specify the minimum required cuda version, default CUDA 10.1"
|
||||||
echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compilation"
|
echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compilation"
|
||||||
|
@ -129,7 +129,7 @@ checkopts()
|
||||||
DEBUG_MODE="on"
|
DEBUG_MODE="on"
|
||||||
;;
|
;;
|
||||||
n)
|
n)
|
||||||
if [[ "X$OPTARG" == "Xoff" || "X$OPTARG" == "Xlite" || "X$OPTARG" == "Xfull" || "X$OPTARG" == "Xlite_cv" ]]; then
|
if [[ "X$OPTARG" == "Xoff" || "X$OPTARG" == "Xlite" || "X$OPTARG" == "Xfull" || "X$OPTARG" == "Xlite_cv" || "X$OPTARG" == "Xwrapper" ]]; then
|
||||||
COMPILE_MINDDATA_LITE="$OPTARG"
|
COMPILE_MINDDATA_LITE="$OPTARG"
|
||||||
else
|
else
|
||||||
echo "Invalid value ${OPTARG} for option -n"
|
echo "Invalid value ${OPTARG} for option -n"
|
||||||
|
@ -678,7 +678,7 @@ build_lite()
|
||||||
build_gtest
|
build_gtest
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "${COMPILE_MINDDATA_LITE}" == "lite" ] || [ "${COMPILE_MINDDATA_LITE}" == "full" ]; then
|
if [[ "${COMPILE_MINDDATA_LITE}" == "lite" || "${COMPILE_MINDDATA_LITE}" == "full" || "${COMPILE_MINDDATA_LITE}" == "wrapper" ]]; then
|
||||||
build_minddata_lite_deps
|
build_minddata_lite_deps
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ set(OPENCV_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/minddata/third_part
|
||||||
set(PROTOBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/protobuf)
|
set(PROTOBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/protobuf)
|
||||||
set(FLATBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/flatbuffers)
|
set(FLATBF_DIR_RUN_X86 ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/third_party/flatbuffers)
|
||||||
|
|
||||||
if (BUILD_MINDDATA STREQUAL "full")
|
if (BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
|
||||||
install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||||
if (PLATFORM_ARM64)
|
if (PLATFORM_ARM64)
|
||||||
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
||||||
|
|
|
@ -28,7 +28,7 @@ set(MAIN_DIR ${DIR_PREFIX}-${MS_VERSION})
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMS_VERSION_MAJOR=${MS_VERSION_MAJOR} -DMS_VERSION_MINOR=${MS_VERSION_MINOR} -DMS_VERSION_REVISION=${MS_VERSION_REVISION}")
|
||||||
set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv or full")
|
set(BUILD_MINDDATA "lite_cv" CACHE STRING "off, lite, lite_cv, wrapper or full")
|
||||||
set(BUILD_LITE "on")
|
set(BUILD_LITE "on")
|
||||||
set(PLATFORM_ARM "off")
|
set(PLATFORM_ARM "off")
|
||||||
if (PLATFORM_ARM64 OR PLATFORM_ARM32)
|
if (PLATFORM_ARM64 OR PLATFORM_ARM32)
|
||||||
|
@ -182,7 +182,7 @@ if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full")
|
if (BUILD_MINDDATA STREQUAL "lite" OR BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
|
||||||
# add sentencepiece dependency
|
# add sentencepiece dependency
|
||||||
# include(${TOP_DIR}/cmake/external_libs/sentencepiece.cmake)
|
# include(${TOP_DIR}/cmake/external_libs/sentencepiece.cmake)
|
||||||
# json
|
# json
|
||||||
|
|
|
@ -81,6 +81,12 @@ AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/util MINDDATA_UTIL_SRC_FILES)
|
||||||
AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES)
|
AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES)
|
||||||
|
|
||||||
|
|
||||||
|
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||||
|
if (BUILD_MINDDATA STREQUAL "full")
|
||||||
|
set(BUILD_MINDDATA "wrapper")
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (BUILD_MINDDATA STREQUAL "full")
|
if (BUILD_MINDDATA STREQUAL "full")
|
||||||
include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image")
|
include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image")
|
||||||
list(REMOVE_ITEM MINDDATA_API_SRC_FILES
|
list(REMOVE_ITEM MINDDATA_API_SRC_FILES
|
||||||
|
@ -293,11 +299,73 @@ if (BUILD_MINDDATA STREQUAL "full")
|
||||||
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||||
target_link_libraries(minddata-lite log)
|
target_link_libraries(minddata-lite log)
|
||||||
elseif (BUILD_MINDDATA_EXAMPLE)
|
elseif (BUILD_MINDDATA_EXAMPLE)
|
||||||
# add_executable(mdlite-example ${CMAKE_CURRENT_SOURCE_DIR}/example/x86-example.cc)
|
endif()
|
||||||
# target_link_libraries(mdlite-example minddata-lite)
|
elseif (BUILD_MINDDATA STREQUAL "wrapper")
|
||||||
# add_custom_command(TARGET mdlite-example POST_BUILD
|
include_directories("${MINDDATA_DIR}/kernels/image")
|
||||||
# COMMAND cp -rf ${CMAKE_CURRENT_SOURCE_DIR}/example/testCifar10Data ${CMAKE_BINARY_DIR}/minddata
|
include_directories("${MINDDATA_DIR}/util")
|
||||||
# )
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/wrapper)
|
||||||
|
set(MINDDATA_TODAPI_SRC
|
||||||
|
${MINDDATA_DIR}/core/tensor_shape.cc
|
||||||
|
${MINDDATA_DIR}/core/tensor.cc
|
||||||
|
${MINDDATA_DIR}/core/config_manager.cc
|
||||||
|
${MINDDATA_DIR}/core/data_type.cc
|
||||||
|
${MINDDATA_DIR}/core/tensor_helpers.cc
|
||||||
|
${MINDDATA_DIR}/core/global_context.cc
|
||||||
|
${MINDDATA_DIR}/core/tensor_row.cc
|
||||||
|
${MINDDATA_DIR}/api/vision.cc
|
||||||
|
${MINDDATA_DIR}/api/execute.cc
|
||||||
|
${MINDDATA_DIR}/api/transforms.cc
|
||||||
|
${MINDDATA_DIR}/api/de_tensor.cc
|
||||||
|
${MINDDATA_DIR}/util/path.cc
|
||||||
|
${MINDDATA_DIR}/util/status.cc
|
||||||
|
${MINDDATA_DIR}/util/data_helper.cc
|
||||||
|
${MINDDATA_DIR}/util/memory_pool.cc
|
||||||
|
${MINDDATA_DIR}/engine/data_schema.cc
|
||||||
|
${MINDDATA_DIR}/kernels/tensor_op.cc
|
||||||
|
${MINDDATA_DIR}/kernels/image/lite_image_utils.cc
|
||||||
|
${MINDDATA_DIR}/kernels/image/center_crop_op.cc
|
||||||
|
${MINDDATA_DIR}/kernels/image/crop_op.cc
|
||||||
|
${MINDDATA_DIR}/kernels/image/normalize_op.cc
|
||||||
|
${MINDDATA_DIR}/kernels/image/resize_op.cc
|
||||||
|
${MINDDATA_DIR}/kernels/data/compose_op.cc
|
||||||
|
${MINDDATA_DIR}/kernels/data/duplicate_op.cc
|
||||||
|
${MINDDATA_DIR}/kernels/data/one_hot_op.cc
|
||||||
|
${MINDDATA_DIR}/kernels/data/random_apply_op.cc
|
||||||
|
${MINDDATA_DIR}/kernels/data/random_choice_op.cc
|
||||||
|
${MINDDATA_DIR}/kernels/data/type_cast_op.cc
|
||||||
|
${MINDDATA_DIR}/kernels/data/data_utils.cc
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/wrapper/album_op_android.cc
|
||||||
|
)
|
||||||
|
|
||||||
|
add_library(minddata-lite SHARED
|
||||||
|
${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES}
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc
|
||||||
|
${CORE_DIR}/utils/ms_utils.cc
|
||||||
|
${MINDDATA_TODAPI_SRC}
|
||||||
|
)
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
target_link_libraries(minddata-lite
|
||||||
|
securec
|
||||||
|
jpeg-turbo
|
||||||
|
jpeg
|
||||||
|
mindspore::json
|
||||||
|
Threads::Threads
|
||||||
|
)
|
||||||
|
|
||||||
|
# ref: https://github.com/android/ndk/issues/1202
|
||||||
|
if (PLATFORM_ARM32)
|
||||||
|
file(GLOB_RECURSE LIBCLANG_RT_LIB $ENV{ANDROID_NDK}/libclang_rt.builtins-arm-android.a)
|
||||||
|
if (LIBCLANG_RT_LIB STREQUAL "")
|
||||||
|
MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}")
|
||||||
|
endif()
|
||||||
|
target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||||
|
target_link_libraries(minddata-lite log)
|
||||||
|
elseif (BUILD_MINDDATA_EXAMPLE)
|
||||||
endif()
|
endif()
|
||||||
elseif (BUILD_MINDDATA STREQUAL "lite")
|
elseif (BUILD_MINDDATA STREQUAL "lite")
|
||||||
list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES "${MINDDATA_DIR}/core/client.cc")
|
list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES "${MINDDATA_DIR}/core/client.cc")
|
||||||
|
@ -374,9 +442,6 @@ elseif (BUILD_MINDDATA STREQUAL "lite")
|
||||||
securec
|
securec
|
||||||
jpeg-turbo
|
jpeg-turbo
|
||||||
jpeg
|
jpeg
|
||||||
# opencv_core
|
|
||||||
# opencv_imgcodecs
|
|
||||||
# opencv_imgproc
|
|
||||||
mindspore::json
|
mindspore::json
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
#include "MDToDApi.h"
|
#include "MDToDApi.h" //NOLINT
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
@ -22,7 +22,8 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "minddata/dataset/include/datasets.h"
|
|
||||||
|
#include "album_op_android.h" //NOLINT
|
||||||
#include "minddata/dataset/include/execute.h"
|
#include "minddata/dataset/include/execute.h"
|
||||||
#include "minddata/dataset/util/path.h"
|
#include "minddata/dataset/util/path.h"
|
||||||
#include "minddata/dataset/include/vision.h"
|
#include "minddata/dataset/include/vision.h"
|
||||||
|
@ -35,7 +36,7 @@
|
||||||
using mindspore::dataset::Path;
|
using mindspore::dataset::Path;
|
||||||
using mindspore::dataset::Tensor;
|
using mindspore::dataset::Tensor;
|
||||||
|
|
||||||
using mindspore::dataset;
|
using TensorOperation = mindspore::dataset::TensorOperation;
|
||||||
|
|
||||||
using mindspore::LogStream;
|
using mindspore::LogStream;
|
||||||
using mindspore::MsLogLevel::DEBUG;
|
using mindspore::MsLogLevel::DEBUG;
|
||||||
|
@ -48,22 +49,21 @@ using mindspore::dataset::Status;
|
||||||
|
|
||||||
class MDToDApi {
|
class MDToDApi {
|
||||||
public:
|
public:
|
||||||
std::shared_ptr<Dataset> _ds;
|
std::shared_ptr<mindspore::dataset::AlbumOp> _iter;
|
||||||
std::shared_ptr<Iterator> _iter;
|
|
||||||
std::vector<std::shared_ptr<TensorOperation>> _augs;
|
std::vector<std::shared_ptr<TensorOperation>> _augs;
|
||||||
std::string _storage_folder;
|
std::string _storage_folder;
|
||||||
std::string _folder_path;
|
std::string _folder_path;
|
||||||
bool _hasBatch;
|
bool _hasBatch;
|
||||||
int64_t _file_id;
|
int64_t _file_id;
|
||||||
|
|
||||||
MDToDApi() : _ds(nullptr), _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) {
|
public:
|
||||||
MS_LOG(WARNING) << "MDToDAPI Call constructor";
|
MDToDApi() : _iter(nullptr), _augs({}), _storage_folder(""), _file_id(-1), _hasBatch(false) {
|
||||||
|
MS_LOG(WARNING) << "MDToDAPI Call constractor";
|
||||||
}
|
}
|
||||||
~MDToDApi() {
|
~MDToDApi() {
|
||||||
MS_LOG(WARNING) << "MDToDAPI Call destructor";
|
MS_LOG(WARNING) << "MDToDAPI Call destractor";
|
||||||
|
// derefernce dataset and iterator
|
||||||
_augs.clear();
|
_augs.clear();
|
||||||
_ds = nullptr;
|
|
||||||
_iter = nullptr;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -79,7 +79,9 @@ std::vector<std::string> MDToDBuffToVector(MDToDBuff_t StrBuff) {
|
||||||
return strVector;
|
return strVector;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" int MDToDApi_pathTest(const char *path) {
|
extern "C"
|
||||||
|
|
||||||
|
int MDToDApi_pathTest(const char* path) {
|
||||||
Path f(path);
|
Path f(path);
|
||||||
MS_LOG(WARNING) << f.Exists() << f.IsDirectory() << f.ParentPath();
|
MS_LOG(WARNING) << f.Exists() << f.IsDirectory() << f.ParentPath();
|
||||||
// Print out the first few items in the directory
|
// Print out the first few items in the directory
|
||||||
|
@ -114,36 +116,31 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) {
|
||||||
|
|
||||||
if ((MDConf.ResizeSizeWH[0] != 0) && (MDConf.ResizeSizeWH[1] != 0)) {
|
if ((MDConf.ResizeSizeWH[0] != 0) && (MDConf.ResizeSizeWH[1] != 0)) {
|
||||||
std::vector<int> Resize(MDConf.ResizeSizeWH, MDConf.ResizeSizeWH + 2);
|
std::vector<int> Resize(MDConf.ResizeSizeWH, MDConf.ResizeSizeWH + 2);
|
||||||
std::shared_ptr<TensorOperation> resize_op = vision::Resize(Resize);
|
std::shared_ptr<TensorOperation> resize_op = mindspore::dataset::vision::Resize(Resize);
|
||||||
assert(resize_op != nullptr);
|
assert(resize_op != nullptr);
|
||||||
MS_LOG(WARNING) << "Push back resize";
|
MS_LOG(WARNING) << "Push back resize";
|
||||||
mapOperations.push_back(resize_op);
|
mapOperations.push_back(resize_op);
|
||||||
|
// hasBatch = true; Batch not currently supported inMInddata-Lite
|
||||||
}
|
}
|
||||||
if ((MDConf.CropSizeWH[0] != 0) && (MDConf.CropSizeWH[1] != 0)) {
|
if ((MDConf.CropSizeWH[0] != 0) && (MDConf.CropSizeWH[1] != 0)) {
|
||||||
std::vector<int> Crop(MDConf.CropSizeWH, MDConf.CropSizeWH + 2);
|
std::vector<int> Crop(MDConf.CropSizeWH, MDConf.CropSizeWH + 2);
|
||||||
std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop(Crop);
|
std::shared_ptr<TensorOperation> center_crop_op = mindspore::dataset::vision::CenterCrop(Crop);
|
||||||
assert(center_crop_op != nullptr);
|
assert(center_crop_op != nullptr);
|
||||||
MS_LOG(WARNING) << "Push back crop";
|
MS_LOG(WARNING) << "Push back crop";
|
||||||
mapOperations.push_back(center_crop_op);
|
mapOperations.push_back(center_crop_op);
|
||||||
|
// hasBatch = true; Batch not currently supported inMInddata-Lite
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::shared_ptr<Dataset> ds = nullptr;
|
|
||||||
MS_LOG(INFO) << "Read id=" << MDConf.fileid << " (-1) for all";
|
MS_LOG(INFO) << "Read id=" << MDConf.fileid << " (-1) for all";
|
||||||
|
std::shared_ptr<mindspore::dataset::AlbumOp> iter = nullptr;
|
||||||
|
const std::set<std::string> exts = {};
|
||||||
if (MDConf.fileid > -1) {
|
if (MDConf.fileid > -1) {
|
||||||
// read specific image using SequentialSampler
|
// read specific image using SequentialSampler witn
|
||||||
ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(MDConf.fileid, 1L));
|
iter = std::make_shared<mindspore::dataset::AlbumOp>(folder_path, true, schema_file, exts, MDConf.fileid);
|
||||||
} else {
|
} else {
|
||||||
// Distributed sampler takes num_shards then shard_id
|
iter = std::make_shared<mindspore::dataset::AlbumOp>(folder_path, true, schema_file, exts);
|
||||||
ds = Album(folder_path, schema_file, column_names, true, SequentialSampler());
|
|
||||||
}
|
}
|
||||||
ds = ds->SetNumWorkers(1);
|
|
||||||
|
|
||||||
assert(ds != nullptr);
|
|
||||||
|
|
||||||
// Create a Repeat operation on ds
|
|
||||||
int32_t repeat_num = 1;
|
|
||||||
ds = ds->Repeat(repeat_num);
|
|
||||||
assert(ds != nullptr);
|
|
||||||
|
|
||||||
// Create objects for the tensor ops
|
// Create objects for the tensor ops
|
||||||
MS_LOG(INFO) << " Create pipline parameters";
|
MS_LOG(INFO) << " Create pipline parameters";
|
||||||
|
@ -154,16 +151,7 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) {
|
||||||
}
|
}
|
||||||
bool hasBatch = false;
|
bool hasBatch = false;
|
||||||
|
|
||||||
// Create an iterator over the result of the above dataset
|
|
||||||
// This will trigger the creation of the Execution Tree and launch it.
|
|
||||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
|
||||||
if (nullptr == iter) {
|
|
||||||
MS_LOG(ERROR) << "Iterator creation failed";
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
assert(iter != nullptr);
|
|
||||||
MDToDApi *pMDToDApi = new MDToDApi;
|
MDToDApi *pMDToDApi = new MDToDApi;
|
||||||
pMDToDApi->_ds = ds;
|
|
||||||
pMDToDApi->_iter = iter;
|
pMDToDApi->_iter = iter;
|
||||||
pMDToDApi->_augs = mapOperations;
|
pMDToDApi->_augs = mapOperations;
|
||||||
pMDToDApi->_storage_folder = std::string(MDConf.pStoragePath);
|
pMDToDApi->_storage_folder = std::string(MDConf.pStoragePath);
|
||||||
|
@ -173,11 +161,11 @@ extern "C" MDToDApi *MDToDApi_createPipeLine(MDToDConf_t MDConf) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void MDBuffToVector(MDToDBuff_t MDBuff, std::vector<T> *vec) {
|
void MDBuffToVector(const MDToDBuff_t MDBuff, std::vector<T> *vec) {
|
||||||
vec.clear();
|
vec->clear();
|
||||||
if (MDBuff.DataSize > 0) {
|
if (MDBuff.DataSize > 0) {
|
||||||
int nofElements = MDBuff.DataSize / sizeof(T);
|
int nofElements = MDBuff.DataSize / sizeof(T);
|
||||||
*vec.assign(reinterpret_cast<T *>(MDBuff.Buff), reinterpret_cast<T *>(MDBuff.Buff) + nofElements);
|
vec->assign(reinterpret_cast<T *>(MDBuff.Buff), reinterpret_cast<T *>(MDBuff.Buff) + nofElements);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -217,7 +205,7 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro
|
||||||
resBuff->TensorSize[0] = 1;
|
resBuff->TensorSize[0] = 1;
|
||||||
}
|
}
|
||||||
if (column->shape()[firstDim] > 0) {
|
if (column->shape()[firstDim] > 0) {
|
||||||
if (DataType::DE_STRING == column->type()) {
|
if (mindspore::dataset::DataType::DE_STRING == column->type()) {
|
||||||
std::string str;
|
std::string str;
|
||||||
for (int ix = 0; ix < column->shape()[firstDim]; ix++) {
|
for (int ix = 0; ix < column->shape()[firstDim]; ix++) {
|
||||||
std::string_view strView;
|
std::string_view strView;
|
||||||
|
@ -238,14 +226,14 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro
|
||||||
MS_LOG(ERROR) << "memcpy_s return: " << ret;
|
MS_LOG(ERROR) << "memcpy_s return: " << ret;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
DataHelper dh;
|
mindspore::dataset::DataHelper dh;
|
||||||
resBuff->DataSize =
|
resBuff->DataSize =
|
||||||
dh.DumpData(column->GetBuffer(), column->SizeInBytes(), resBuff->Buff, resBuff->MaxBuffSize);
|
dh.DumpData(column->GetBuffer(), column->SizeInBytes(), resBuff->Buff, resBuff->MaxBuffSize);
|
||||||
}
|
}
|
||||||
MS_LOG(INFO) << columnName << " " << resBuff->DataSize
|
MS_LOG(INFO) << columnName << " " << resBuff->DataSize
|
||||||
<< " bytesCopyed to buff (MaxBuffSize: " << resBuff->MaxBuffSize << ") ";
|
<< " bytesCopyed to buff (MaxBuffSize: " << resBuff->MaxBuffSize << ") ";
|
||||||
if (0 == resBuff->DataSize) {
|
if (0 == resBuff->DataSize) {
|
||||||
MS_LOG(ERROR) << "Copy Failed!!!! " << columnName << " Too large"
|
MS_LOG(ERROR) << "COPY FAIL!!!! " << columnName << " Too large"
|
||||||
<< "."; // memcpy failed
|
<< "."; // memcpy failed
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -259,7 +247,7 @@ void GetTensorToBuff(std::unordered_map<std::string, std::shared_ptr<Tensor>> ro
|
||||||
extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
|
extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
|
||||||
MS_LOG(INFO) << "Start GetNext";
|
MS_LOG(INFO) << "Start GetNext";
|
||||||
if (pMDToDApi == nullptr) {
|
if (pMDToDApi == nullptr) {
|
||||||
MS_LOG(ERROR) << "GetNext called with nullptr. Abort";
|
MS_LOG(ERROR) << "GetNext called with null ptr. abort";
|
||||||
assert(pMDToDApi != nullptr);
|
assert(pMDToDApi != nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -271,12 +259,13 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
|
||||||
// get next row for dataset
|
// get next row for dataset
|
||||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||||
if (pMDToDApi->_iter == nullptr) {
|
if (pMDToDApi->_iter == nullptr) {
|
||||||
MS_LOG(ERROR) << "GetNext called with no iterator. abort";
|
MS_LOG(ERROR) << "GetNext called with no iteratoe. abort";
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// create Execute functions, this replaces Map in Pipeline
|
// create Execute functions, this replaces Map in Pipeline
|
||||||
pMDToDApi->_iter->GetNextRow(&row);
|
|
||||||
if (row.size() != 0) {
|
bool ret = pMDToDApi->_iter->GetNextRow(&row);
|
||||||
|
if (row.size() != 0 && ret) {
|
||||||
if ((pMDToDApi->_augs).size() > 0) {
|
if ((pMDToDApi->_augs).size() > 0) {
|
||||||
// String and Tensors
|
// String and Tensors
|
||||||
GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff);
|
GetTensorToBuff(row, "image_filename", pMDToDApi->_hasBatch, &results->fileNameBuff);
|
||||||
|
@ -285,7 +274,7 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
|
||||||
for (int i = 0; i < (pMDToDApi->_augs).size(); i++) {
|
for (int i = 0; i < (pMDToDApi->_augs).size(); i++) {
|
||||||
// each Execute call will invoke a memcpy, this cannot really be optimized further
|
// each Execute call will invoke a memcpy, this cannot really be optimized further
|
||||||
// for this use case, std move is added for fail save.
|
// for this use case, std move is added for fail save.
|
||||||
row["image"] = Execute((pMDToDApi->_augs)[i])(std::move(row["image"]));
|
row["image"] = mindspore::dataset::Execute((pMDToDApi->_augs)[i])(std::move(row["image"]));
|
||||||
if (row["image"] == nullptr) {
|
if (row["image"] == nullptr) {
|
||||||
// nullptr means that the eager mode image processing failed, we fail in this case
|
// nullptr means that the eager mode image processing failed, we fail in this case
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -316,20 +305,18 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
|
||||||
|
|
||||||
extern "C" int MDToDApi_Stop(MDToDApi *pMDToDApi) {
|
extern "C" int MDToDApi_Stop(MDToDApi *pMDToDApi) {
|
||||||
// Manually terminate the pipeline
|
// Manually terminate the pipeline
|
||||||
pMDToDApi->_iter->Stop();
|
|
||||||
MS_LOG(WARNING) << "pipline stoped";
|
MS_LOG(WARNING) << "pipline stoped";
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" int MDToDApi_Destroy(MDToDApi *pMDToDApi) {
|
extern "C" int MDToDApi_Destroy(MDToDApi *pMDToDApi) {
|
||||||
MS_LOG(WARNING) << "pipeline deleted start";
|
MS_LOG(WARNING) << "pipline deleted start";
|
||||||
pMDToDApi->_iter->Stop();
|
|
||||||
delete pMDToDApi;
|
delete pMDToDApi;
|
||||||
MS_LOG(WARNING) << "pipeline deleted end";
|
MS_LOG(WARNING) << "pipline deleted end";
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) {
|
int GetJsonFullFileName(const MDToDApi *pMDToDApi, std::string *filePath) {
|
||||||
int64_t file_id = pMDToDApi->_file_id;
|
int64_t file_id = pMDToDApi->_file_id;
|
||||||
if (file_id < 0) {
|
if (file_id < 0) {
|
||||||
MS_LOG(ERROR) << "Illigal file ID to update: " << file_id << ".";
|
MS_LOG(ERROR) << "Illigal file ID to update: " << file_id << ".";
|
||||||
|
@ -343,12 +330,12 @@ int GetJsonFullFileName(MDToDApi *pMDToDApi, std::string *filePath) {
|
||||||
extern "C" int MDToDApi_UpdateEmbeding(MDToDApi *pMDToDApi, const char *column, float *emmbeddings,
|
extern "C" int MDToDApi_UpdateEmbeding(MDToDApi *pMDToDApi, const char *column, float *emmbeddings,
|
||||||
size_t emmbeddingsSize) {
|
size_t emmbeddingsSize) {
|
||||||
auto columnName = std::string(column);
|
auto columnName = std::string(column);
|
||||||
MS_LOG(INFO) << "Start update " << columnName;
|
MS_LOG(INFO) << "Start Update " << columnName;
|
||||||
|
|
||||||
std::string converted = std::to_string(pMDToDApi->_file_id);
|
std::string converted = std::to_string(pMDToDApi->_file_id);
|
||||||
std::string embedding_file_path = pMDToDApi->_storage_folder + "/" + converted + columnName + ".bin";
|
std::string embedding_file_path = pMDToDApi->_storage_folder + "/" + converted + columnName + ".bin";
|
||||||
DataHelper dh;
|
mindspore::dataset::DataHelper dh;
|
||||||
MS_LOG(INFO) << "Try to save file " << embedding_file_path;
|
MS_LOG(INFO) << "Try to Save file " << embedding_file_path;
|
||||||
std::vector<float> bin_content(emmbeddings, emmbeddings + emmbeddingsSize);
|
std::vector<float> bin_content(emmbeddings, emmbeddings + emmbeddingsSize);
|
||||||
Status rc = dh.template WriteBinFile<float>(embedding_file_path, bin_content);
|
Status rc = dh.template WriteBinFile<float>(embedding_file_path, bin_content);
|
||||||
if (rc.IsError()) {
|
if (rc.IsError()) {
|
||||||
|
@ -379,8 +366,8 @@ extern "C" int MDToDApi_UpdateStringArray(MDToDApi *pMDToDApi, const char *colum
|
||||||
MS_LOG(ERROR) << "Failed to update " << columnName;
|
MS_LOG(ERROR) << "Failed to update " << columnName;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
MS_LOG(INFO) << "Start Update string array column: " << columnName << " in file " << file_path;
|
MS_LOG(INFO) << "Start Update string Array column: " << columnName << " in file " << file_path;
|
||||||
DataHelper dh;
|
mindspore::dataset::DataHelper dh;
|
||||||
std::vector<std::string> strVec;
|
std::vector<std::string> strVec;
|
||||||
if (MDbuff.DataSize > 0) {
|
if (MDbuff.DataSize > 0) {
|
||||||
const char *p = reinterpret_cast<char *>(MDbuff.Buff);
|
const char *p = reinterpret_cast<char *>(MDbuff.Buff);
|
||||||
|
@ -405,7 +392,7 @@ extern "C" int MDToDApi_UpdateFloatArray(MDToDApi *pMDToDApi, const char *column
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
MS_LOG(INFO) << "Start Update float Array column: " << columnName << " in file " << file_path;
|
MS_LOG(INFO) << "Start Update float Array column: " << columnName << " in file " << file_path;
|
||||||
DataHelper dh;
|
mindspore::dataset::DataHelper dh;
|
||||||
std::vector<float> vec;
|
std::vector<float> vec;
|
||||||
MDBuffToVector<float>(MDBuff, &vec);
|
MDBuffToVector<float>(MDBuff, &vec);
|
||||||
Status rc = dh.UpdateArray<float>(file_path, columnName, vec);
|
Status rc = dh.UpdateArray<float>(file_path, columnName, vec);
|
||||||
|
@ -423,7 +410,7 @@ extern "C" int MDToDApi_UpdateIsForTrain(MDToDApi *pMDToDApi, int32_t isForTrain
|
||||||
if (file_id < 0) return -1;
|
if (file_id < 0) return -1;
|
||||||
std::string converted = std::to_string(pMDToDApi->_file_id);
|
std::string converted = std::to_string(pMDToDApi->_file_id);
|
||||||
std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json";
|
std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json";
|
||||||
DataHelper dh;
|
mindspore::dataset::DataHelper dh;
|
||||||
MS_LOG(INFO) << "Updating file: " << file_path;
|
MS_LOG(INFO) << "Updating file: " << file_path;
|
||||||
Status rc = dh.UpdateValue<int32_t>(file_path, "_isForTrain", isForTrain, "");
|
Status rc = dh.UpdateValue<int32_t>(file_path, "_isForTrain", isForTrain, "");
|
||||||
if (rc.IsError()) {
|
if (rc.IsError()) {
|
||||||
|
@ -440,7 +427,7 @@ extern "C" int MDToDApi_UpdateNoOfFaces(MDToDApi *pMDToDApi, int32_t noOfFaces)
|
||||||
if (file_id < 0) return -1;
|
if (file_id < 0) return -1;
|
||||||
std::string converted = std::to_string(pMDToDApi->_file_id);
|
std::string converted = std::to_string(pMDToDApi->_file_id);
|
||||||
std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json";
|
std::string file_path = pMDToDApi->_folder_path + "/" + converted + ".json";
|
||||||
DataHelper dh;
|
mindspore::dataset::DataHelper dh;
|
||||||
MS_LOG(INFO) << "Updating file: " << file_path;
|
MS_LOG(INFO) << "Updating file: " << file_path;
|
||||||
Status rc = dh.UpdateValue<int32_t>(file_path, "_noOfFaces", noOfFaces, "");
|
Status rc = dh.UpdateValue<int32_t>(file_path, "_noOfFaces", noOfFaces, "");
|
||||||
if (rc.IsError()) {
|
if (rc.IsError()) {
|
||||||
|
|
|
@ -0,0 +1,470 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "album_op_android.h" //NOLINT
|
||||||
|
#include <fstream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include "minddata/dataset/core/tensor_shape.h"
|
||||||
|
#include "minddata/dataset/kernels/image/lite_image_utils.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
|
||||||
|
AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file,
|
||||||
|
const std::set<std::string> &exts)
|
||||||
|
: folder_path_(file_dir),
|
||||||
|
decode_(do_decode),
|
||||||
|
extensions_(exts),
|
||||||
|
schema_file_(schema_file),
|
||||||
|
row_cnt_(0),
|
||||||
|
buf_cnt_(0),
|
||||||
|
current_cnt_(0),
|
||||||
|
dirname_offset_(0),
|
||||||
|
sampler_(false),
|
||||||
|
sampler_index_(0) {
|
||||||
|
PrescanEntry();
|
||||||
|
}
|
||||||
|
|
||||||
|
AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file,
|
||||||
|
const std::set<std::string> &exts, uint32_t index)
|
||||||
|
: folder_path_(file_dir),
|
||||||
|
decode_(do_decode),
|
||||||
|
extensions_(exts),
|
||||||
|
schema_file_(schema_file),
|
||||||
|
row_cnt_(0),
|
||||||
|
buf_cnt_(0),
|
||||||
|
current_cnt_(0),
|
||||||
|
dirname_offset_(0),
|
||||||
|
sampler_(true),
|
||||||
|
sampler_index_(0) {
|
||||||
|
PrescanEntry();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function for string comparison
|
||||||
|
// album sorts the files via numerical values, so this is not a simple string comparison
|
||||||
|
bool StrComp(const std::string &a, const std::string &b) {
|
||||||
|
// returns 1 if string "a" represent a numeric value less than string "b"
|
||||||
|
// the following will always return name, provided there is only one "." character in name
|
||||||
|
// "." character is guaranteed to exist since the extension is checked befor this function call.
|
||||||
|
int64_t value_a = std::atoi(a.substr(1, a.find(".")).c_str());
|
||||||
|
int64_t value_b = std::atoi(b.substr(1, b.find(".")).c_str());
|
||||||
|
return value_a < value_b;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Single thread to go through the folder directory and gets all file names
|
||||||
|
// calculate numRows then return
|
||||||
|
Status AlbumOp::PrescanEntry() {
|
||||||
|
data_schema_ = std::make_unique<DataSchema>();
|
||||||
|
Path schema_file(schema_file_);
|
||||||
|
if (schema_file_ == "" || !schema_file.Exists()) {
|
||||||
|
RETURN_STATUS_UNEXPECTED("Invalid file, schema_file is invalid or not set: " + schema_file_);
|
||||||
|
} else {
|
||||||
|
MS_LOG(WARNING) << "Schema file provided: " << schema_file_ << ".";
|
||||||
|
data_schema_->LoadSchemaFile(schema_file_, columns_to_load_);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||||
|
column_name_id_map_[data_schema_->column(i).name()] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
Path folder(folder_path_);
|
||||||
|
dirname_offset_ = folder_path_.length();
|
||||||
|
std::shared_ptr<Path::DirIterator> dirItr = Path::DirIterator::OpenDirectory(&folder);
|
||||||
|
if (folder.Exists() == false || dirItr == nullptr) {
|
||||||
|
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_path_);
|
||||||
|
}
|
||||||
|
MS_LOG(WARNING) << "Album folder Path found: " << folder_path_ << ".";
|
||||||
|
|
||||||
|
while (dirItr->hasNext()) {
|
||||||
|
Path file = dirItr->next();
|
||||||
|
if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) {
|
||||||
|
(void)image_rows_.push_back(file.toString().substr(dirname_offset_));
|
||||||
|
} else {
|
||||||
|
MS_LOG(WARNING) << "Album operator unsupported file found: " << file.toString()
|
||||||
|
<< ", extension: " << file.Extension() << ".";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(image_rows_.begin(), image_rows_.end(), StrComp);
|
||||||
|
|
||||||
|
if (image_rows_.size() == 0) {
|
||||||
|
RETURN_STATUS_UNEXPECTED(
|
||||||
|
"Invalid data, no valid data matching the dataset API AlbumDataset. Please check file path or dataset API.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sampler_) {
|
||||||
|
if (sampler_index_ < 0 || sampler_index_ >= image_rows_.size()) {
|
||||||
|
RETURN_STATUS_UNEXPECTED("the sampler index was out of range");
|
||||||
|
}
|
||||||
|
std::vector<std::string> tmp;
|
||||||
|
tmp.emplace_back(image_rows_[sampler_index_]);
|
||||||
|
image_rows_.clear();
|
||||||
|
image_rows_ = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
// contains the main logic of pulling a IOBlock from IOBlockQueue, load a buffer and push the buffer to out_connector_
|
||||||
|
// IMPORTANT: 1 IOBlock produces 1 DataBuffer
|
||||||
|
bool AlbumOp::GetNextRow(std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
|
||||||
|
if (map_row == nullptr) {
|
||||||
|
MS_LOG(WARNING) << "GetNextRow in AlbumOp: the point of map_row is nullptr";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current_cnt_ == image_rows_.size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Status ret = LoadTensorRow(current_cnt_, image_rows_[current_cnt_], map_row);
|
||||||
|
if (ret.IsError()) {
|
||||||
|
MS_LOG(ERROR) << "GetNextRow in AlbumOp: " << ret.ToString() << "\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
current_cnt_++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only support JPEG/PNG/GIF/BMP
|
||||||
|
// Optimization: Could take in a tensor
|
||||||
|
// This function does not return status because we want to just skip bad input, not crash
|
||||||
|
bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
|
||||||
|
std::ifstream file_handle;
|
||||||
|
constexpr int read_num = 3;
|
||||||
|
*valid = false;
|
||||||
|
file_handle.open(file_name, std::ios::binary | std::ios::in);
|
||||||
|
if (!file_handle.is_open()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
unsigned char file_type[read_num];
|
||||||
|
(void)file_handle.read(reinterpret_cast<char *>(file_type), read_num);
|
||||||
|
|
||||||
|
if (file_handle.fail()) {
|
||||||
|
file_handle.close();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
file_handle.close();
|
||||||
|
if (file_type[0] == 0xff && file_type[1] == 0xd8 && file_type[2] == 0xff) {
|
||||||
|
// Normal JPEGs start with \xff\xd8\xff\xe0
|
||||||
|
// JPEG with EXIF stats with \xff\xd8\xff\xe1
|
||||||
|
// Use \xff\xd8\xff to cover both.
|
||||||
|
*valid = true;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorPtr *tensor) {
|
||||||
|
TensorPtr image;
|
||||||
|
std::ifstream fs;
|
||||||
|
fs.open(image_file_path, std::ios::binary | std::ios::in);
|
||||||
|
if (fs.fail()) {
|
||||||
|
MS_LOG(WARNING) << "File not found:" << image_file_path << ".";
|
||||||
|
// If file doesn't exist, we don't flag this as error in input check, simply push back empty tensor
|
||||||
|
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
// Hack logic to replace png images with empty tensor
|
||||||
|
Path file(image_file_path);
|
||||||
|
std::set<std::string> png_ext = {".png", ".PNG"};
|
||||||
|
if (png_ext.find(file.Extension()) != png_ext.end()) {
|
||||||
|
// load empty tensor since image is not jpg
|
||||||
|
MS_LOG(INFO) << "PNG!" << image_file_path << ".";
|
||||||
|
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
// treat bin files separately
|
||||||
|
std::set<std::string> bin_ext = {".bin", ".BIN"};
|
||||||
|
if (bin_ext.find(file.Extension()) != bin_ext.end()) {
|
||||||
|
// load empty tensor since image is not jpg
|
||||||
|
MS_LOG(INFO) << "Bin file found" << image_file_path << ".";
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, tensor));
|
||||||
|
// row->push_back(std::move(image));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
// check that the file is an image before decoding
|
||||||
|
bool valid = false;
|
||||||
|
bool check_success = CheckImageType(image_file_path, &valid);
|
||||||
|
if (!check_success || !valid) {
|
||||||
|
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
// if it is a jpeg image, load and try to decode
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_file_path, &image));
|
||||||
|
if (decode_ && valid) {
|
||||||
|
Status rc = Decode(image, tensor);
|
||||||
|
if (rc.IsError()) {
|
||||||
|
RETURN_IF_NOT_OK(LoadEmptyTensor(col_num, tensor));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// row->push_back(std::move(image));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
|
||||||
|
std::vector<std::string> data = json_obj.get<std::vector<std::string>>();
|
||||||
|
|
||||||
|
MS_LOG(WARNING) << "String array label found: " << data << ".";
|
||||||
|
// TensorPtr label;
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
|
||||||
|
// row->push_back(std::move(label));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
|
||||||
|
std::string data = json_obj;
|
||||||
|
// now we iterate over the elements in json
|
||||||
|
|
||||||
|
MS_LOG(INFO) << "String label found: " << data << ".";
|
||||||
|
TensorPtr label;
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(data, tensor));
|
||||||
|
// row->push_back(std::move(label));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
|
||||||
|
// TensorPtr label;
|
||||||
|
// consider templating this function to handle all ints
|
||||||
|
if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
|
||||||
|
std::vector<int64_t> data;
|
||||||
|
|
||||||
|
// Iterate over the integer list and add those values to the output shape tensor
|
||||||
|
auto items = json_obj.items();
|
||||||
|
using it_type = decltype(items.begin());
|
||||||
|
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
|
||||||
|
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
|
||||||
|
} else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
|
||||||
|
std::vector<int32_t> data;
|
||||||
|
|
||||||
|
// Iterate over the integer list and add those values to the output shape tensor
|
||||||
|
auto items = json_obj.items();
|
||||||
|
using it_type = decltype(items.begin());
|
||||||
|
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
|
||||||
|
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
|
||||||
|
} else {
|
||||||
|
RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither int32 nor int64, it is " +
|
||||||
|
data_schema_->column(col_num).type().ToString());
|
||||||
|
}
|
||||||
|
// row->push_back(std::move(label));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
|
||||||
|
// TensorPtr float_array;
|
||||||
|
// consider templating this function to handle all ints
|
||||||
|
if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
|
||||||
|
std::vector<double> data;
|
||||||
|
|
||||||
|
// Iterate over the integer list and add those values to the output shape tensor
|
||||||
|
auto items = json_obj.items();
|
||||||
|
using it_type = decltype(items.begin());
|
||||||
|
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
|
||||||
|
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
|
||||||
|
} else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
|
||||||
|
std::vector<float> data;
|
||||||
|
|
||||||
|
// Iterate over the integer list and add those values to the output shape tensor
|
||||||
|
auto items = json_obj.items();
|
||||||
|
using it_type = decltype(items.begin());
|
||||||
|
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
|
||||||
|
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor));
|
||||||
|
} else {
|
||||||
|
RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither float32 nor float64, it is " +
|
||||||
|
data_schema_->column(col_num).type().ToString());
|
||||||
|
}
|
||||||
|
// row->push_back(std::move(float_array));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor) {
|
||||||
|
if (data_schema_->column(col_num).type() == DataType::DE_STRING) {
|
||||||
|
// TensorPtr id;
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, tensor));
|
||||||
|
// row->push_back(std::move(id));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
// hack to get the file name without extension, the 1 is to get rid of the backslash character
|
||||||
|
int64_t image_id = std::atoi(file.substr(1, file.find(".")).c_str());
|
||||||
|
// TensorPtr id;
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(image_id, tensor));
|
||||||
|
MS_LOG(INFO) << "File ID " << image_id << ".";
|
||||||
|
// row->push_back(std::move(id));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor) {
|
||||||
|
// hack to get the file name without extension, the 1 is to get rid of the backslash character
|
||||||
|
// TensorPtr empty_tensor;
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), tensor));
|
||||||
|
// row->push_back(std::move(empty_tensor));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loads a tensor with float value, issue with float64, we don't have reverse look up to the type
|
||||||
|
// So we actually have to check what type we want to fill the tensor with.
|
||||||
|
// Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to
|
||||||
|
// only be float32, seems like a weird limitation to impose
|
||||||
|
Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
|
||||||
|
// TensorPtr float_tensor;
|
||||||
|
if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
|
||||||
|
double data = json_obj;
|
||||||
|
MS_LOG(INFO) << "double found: " << json_obj << ".";
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, tensor));
|
||||||
|
} else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
|
||||||
|
float data = json_obj;
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data, tensor));
|
||||||
|
MS_LOG(INFO) << "float found: " << json_obj << ".";
|
||||||
|
}
|
||||||
|
// row->push_back(std::move(float_tensor));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loads a tensor with int value, we have to cast the value to type specified in the schema.
|
||||||
|
Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
|
||||||
|
// TensorPtr int_tensor;
|
||||||
|
if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
|
||||||
|
int64_t data = json_obj;
|
||||||
|
MS_LOG(INFO) << "int64 found: " << json_obj << ".";
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, tensor));
|
||||||
|
} else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
|
||||||
|
int32_t data = json_obj;
|
||||||
|
RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, tensor));
|
||||||
|
MS_LOG(INFO) << "int32 found: " << json_obj << ".";
|
||||||
|
}
|
||||||
|
// row->push_back(std::move(int_tensor));
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorRow in a DataBuffer
|
||||||
|
// possible optimization: the helper functions of LoadTensorRow should be optimized
|
||||||
|
// to take a reference to a column descriptor?
|
||||||
|
// the design of this class is to make the code more readable, forgoing minor perfomance gain like
|
||||||
|
// getting rid of duplicated checks
|
||||||
|
Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file,
|
||||||
|
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
|
||||||
|
// testing here is to just print out file path
|
||||||
|
// (*row) = TensorRow(row_id, {});
|
||||||
|
MS_LOG(INFO) << "Image row file: " << file << ".";
|
||||||
|
|
||||||
|
std::ifstream file_handle(folder_path_ + file);
|
||||||
|
if (!file_handle.is_open()) {
|
||||||
|
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + folder_path_ + file);
|
||||||
|
}
|
||||||
|
std::string line;
|
||||||
|
while (getline(file_handle, line)) {
|
||||||
|
try {
|
||||||
|
nlohmann::json js = nlohmann::json::parse(line);
|
||||||
|
MS_LOG(INFO) << "This Line: " << line << ".";
|
||||||
|
|
||||||
|
// note if take a schema here, then we have to iterate over all column descriptors in schema and check for key
|
||||||
|
// get columns in schema:
|
||||||
|
int32_t columns = data_schema_->NumColumns();
|
||||||
|
|
||||||
|
// loop over each column descriptor, this can optimized by switch cases
|
||||||
|
for (int32_t i = 0; i < columns; i++) {
|
||||||
|
// special case to handle
|
||||||
|
if (data_schema_->column(i).name() == "id") {
|
||||||
|
// id is internal, special case to load from file
|
||||||
|
TensorPtr tensor;
|
||||||
|
RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor));
|
||||||
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// find if key does not exist, insert placeholder nullptr if not found
|
||||||
|
if (js.find(data_schema_->column(i).name()) == js.end()) {
|
||||||
|
// iterator not found, push nullptr as placeholder
|
||||||
|
MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
|
||||||
|
TensorPtr tensor;
|
||||||
|
RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor));
|
||||||
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
nlohmann::json column_value = js.at(data_schema_->column(i).name());
|
||||||
|
MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
|
||||||
|
bool is_array = column_value.is_array();
|
||||||
|
// load single string
|
||||||
|
if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||||
|
TensorPtr tensor;
|
||||||
|
RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor));
|
||||||
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// load string array
|
||||||
|
if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||||
|
TensorPtr tensor;
|
||||||
|
RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor));
|
||||||
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// load image file
|
||||||
|
if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
|
||||||
|
std::string image_file_path = column_value;
|
||||||
|
TensorPtr tensor;
|
||||||
|
RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor));
|
||||||
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// load float value
|
||||||
|
if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
||||||
|
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
||||||
|
TensorPtr tensor;
|
||||||
|
RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor));
|
||||||
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// load float array
|
||||||
|
if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
||||||
|
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
||||||
|
TensorPtr tensor;
|
||||||
|
RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor));
|
||||||
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// int value
|
||||||
|
if (!is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
|
||||||
|
data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||||
|
TensorPtr tensor;
|
||||||
|
RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor));
|
||||||
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// int array
|
||||||
|
if (is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
|
||||||
|
data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||||
|
TensorPtr tensor;
|
||||||
|
RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor));
|
||||||
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported.";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (const std::exception &err) {
|
||||||
|
file_handle.close();
|
||||||
|
RETURN_STATUS_UNEXPECTED("Invalid file, failed to parse json file: " + folder_path_ + file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
file_handle.close();
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,173 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_
|
||||||
|
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_
|
||||||
|
|
||||||
|
#include <deque>
|
||||||
|
#include <memory>
|
||||||
|
#include <queue>
|
||||||
|
#include <string>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include "minddata/dataset/core/tensor.h"
|
||||||
|
#include "minddata/dataset/engine/data_buffer.h"
|
||||||
|
#include "minddata/dataset/engine/data_schema.h"
|
||||||
|
#include "minddata/dataset/util/path.h"
|
||||||
|
#include "minddata/dataset/util/queue.h"
|
||||||
|
#include "minddata/dataset/util/status.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
// Forward declares
|
||||||
|
template <typename T>
|
||||||
|
class Queue;
|
||||||
|
|
||||||
|
// Define row information as a list of file objects to read
|
||||||
|
using FolderImages = std::shared_ptr<std::pair<std::string, std::queue<std::string>>>;
|
||||||
|
|
||||||
|
/// \class AlbumOp
|
||||||
|
class AlbumOp {
|
||||||
|
public:
|
||||||
|
/// \brief Constructor
|
||||||
|
/// \param[in] file_dir - directory of Album
|
||||||
|
/// \param[in] do_decode - decode image files
|
||||||
|
/// \param[in] schema_file - schema file
|
||||||
|
/// \param[in] exts - set of file extensions to read, if empty, read everything under the dir
|
||||||
|
AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file,
|
||||||
|
const std::set<std::string> &exts);
|
||||||
|
|
||||||
|
/// \brief Constructor
|
||||||
|
/// \param[in] file_dir - directory of Album
|
||||||
|
/// \param[in] do_decode - decode image files
|
||||||
|
/// \param[in] schema_file - schema file
|
||||||
|
/// \param[in] exts - set of file extensions to read, if empty, read everything under the dir
|
||||||
|
/// \param[in] index - the specific file index
|
||||||
|
AlbumOp(const std::string &file_dir, bool do_decode, const std::string &schema_file,
|
||||||
|
const std::set<std::string> &exts, uint32_t index);
|
||||||
|
|
||||||
|
/// \brief Destructor.
|
||||||
|
~AlbumOp() = default;
|
||||||
|
|
||||||
|
/// \brief Initialize AlbumOp related var, calls the function to walk all files
|
||||||
|
/// \return - The error code returned
|
||||||
|
Status PrescanEntry();
|
||||||
|
|
||||||
|
/// \brief Initialize AlbumOp related var, calls the function to walk all files
|
||||||
|
/// \return - The error code returned
|
||||||
|
bool GetNextRow(std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row);
|
||||||
|
|
||||||
|
/// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP
|
||||||
|
/// This function could be optimized to return the tensor to reduce open/closing files
|
||||||
|
/// \return bool - if file is bad then return false
|
||||||
|
bool CheckImageType(const std::string &file_name, bool *valid);
|
||||||
|
|
||||||
|
// Op name getter
|
||||||
|
// @return Name of the current Op
|
||||||
|
std::string Name() const { return "AlbumOp"; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// \brief Load image to tensor
|
||||||
|
/// \param[in] image_file Image name of file
|
||||||
|
/// \param[in] col_num Column num in schema
|
||||||
|
/// \param[inout] Tensor to push to
|
||||||
|
/// \return Status The error code returned
|
||||||
|
Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorPtr *tensor);
|
||||||
|
|
||||||
|
/// \brief Load vector of ints to tensor, append tensor to tensor
|
||||||
|
/// \param[in] json_obj Json object containing multi-dimensional label
|
||||||
|
/// \param[in] col_num Column num in schema
|
||||||
|
/// \param[inout] Tensor to push to
|
||||||
|
/// \return Status The error code returned
|
||||||
|
Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
|
||||||
|
|
||||||
|
/// \brief Load vector of floatss to tensor, append tensor to tensor
|
||||||
|
/// \param[in] json_obj Json object containing array data
|
||||||
|
/// \param[in] col_num Column num in schema
|
||||||
|
/// \param[inout] Tensor to push to
|
||||||
|
/// \return Status The error code returned
|
||||||
|
Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
|
||||||
|
|
||||||
|
/// \brief Load string array into a tensor, append tensor to tensor
|
||||||
|
/// \param[in] json_obj Json object containing string tensor
|
||||||
|
/// \param[in] col_num Column num in schema
|
||||||
|
/// \param[inout] Tensor to push to
|
||||||
|
/// \return Status The error code returned
|
||||||
|
Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
|
||||||
|
|
||||||
|
/// \brief Load string into a tensor, append tensor to tensor
|
||||||
|
/// \param[in] json_obj Json object containing string tensor
|
||||||
|
/// \param[in] col_num Column num in schema
|
||||||
|
/// \param[inout] Tensor to push to
|
||||||
|
/// \return Status The error code returned
|
||||||
|
Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
|
||||||
|
|
||||||
|
/// \brief Load float value to tensor
|
||||||
|
/// \param[in] json_obj Json object containing float
|
||||||
|
/// \param[in] col_num Column num in schema
|
||||||
|
/// \param[inout] Tensor to push to
|
||||||
|
/// \return Status The error code returned
|
||||||
|
Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
|
||||||
|
|
||||||
|
/// \brief Load int value to tensor
|
||||||
|
/// \param[in] json_obj Json object containing int
|
||||||
|
/// \param[in] col_num Column num in schema
|
||||||
|
/// \param[inout] Tensor to push to
|
||||||
|
/// \return Status The error code returned
|
||||||
|
Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
|
||||||
|
|
||||||
|
/// \brief Load emtpy tensor to tensor
|
||||||
|
/// \param[in] col_num Column num in schema
|
||||||
|
/// \param[inout] Tensor to push to
|
||||||
|
/// \return Status The error code returned
|
||||||
|
Status LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor);
|
||||||
|
|
||||||
|
/// \brief Load id from file name to tensor
|
||||||
|
/// \param[in] file The file name to get ID from
|
||||||
|
/// \param[in] col_num Column num in schema
|
||||||
|
/// \param[inout] Tensor to push to
|
||||||
|
/// \return Status The error code returned
|
||||||
|
Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor);
|
||||||
|
|
||||||
|
/// \brief Load a tensor according to a json file
|
||||||
|
/// \param[in] row_id_type row_id - id for this tensor row
|
||||||
|
/// \param[in] ImageColumns file Json file location
|
||||||
|
/// \param[inout] TensorRow Json content stored into a tensor row
|
||||||
|
/// \return Status The error code returned
|
||||||
|
Status LoadTensorRow(row_id_type row_id, const std::string &file,
|
||||||
|
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row);
|
||||||
|
|
||||||
|
std::string folder_path_; // directory of image folder
|
||||||
|
bool decode_;
|
||||||
|
std::vector<std::string> columns_to_load_;
|
||||||
|
std::set<std::string> extensions_; // extensions allowed
|
||||||
|
std::unique_ptr<DataSchema> data_schema_;
|
||||||
|
std::string schema_file_;
|
||||||
|
int64_t row_cnt_;
|
||||||
|
int64_t current_cnt_;
|
||||||
|
int64_t buf_cnt_;
|
||||||
|
int64_t dirname_offset_;
|
||||||
|
bool sampler_;
|
||||||
|
int64_t sampler_index_;
|
||||||
|
std::vector<std::string> image_rows_;
|
||||||
|
std::unordered_map<std::string, int32_t> column_name_id_map_;
|
||||||
|
};
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_ANDROID_OP_H_
|
Loading…
Reference in New Issue