forked from OSSInnovation/mindspore
commit
65c6e54e2e
|
@ -17,6 +17,10 @@ else()
|
|||
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
|
||||
endif()
|
||||
|
||||
if (ENABLE_PYTHON)
|
||||
add_compile_definitions(ENABLE_PYTHON)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC")
|
||||
|
|
11
build.sh
11
build.sh
|
@ -25,7 +25,7 @@ usage()
|
|||
echo "Usage:"
|
||||
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
|
||||
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
|
||||
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]"
|
||||
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -d Debug mode"
|
||||
|
@ -56,6 +56,7 @@ usage()
|
|||
echo " -s Enable serving module, default off"
|
||||
echo " -B Enable debugger, default off"
|
||||
echo " -E Enable IBVERBS for parameter server, default off"
|
||||
echo " -l Compile with python dependency, default on"
|
||||
}
|
||||
|
||||
# check value of input is 'on' or 'off'
|
||||
|
@ -98,9 +99,10 @@ checkopts()
|
|||
ENABLE_SERVING="off"
|
||||
ENABLE_DEBUGGER="off"
|
||||
ENABLE_IBVERBS="off"
|
||||
ENABLE_PYTHON="on"
|
||||
|
||||
# Process the options
|
||||
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt
|
||||
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt
|
||||
do
|
||||
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
|
||||
case "${opt}" in
|
||||
|
@ -151,6 +153,10 @@ checkopts()
|
|||
check_on_off $OPTARG p
|
||||
ENABLE_PROFILE="$OPTARG"
|
||||
;;
|
||||
l)
|
||||
check_on_off $OPTARG l
|
||||
ENABLE_PYTHON="$OPTARG"
|
||||
;;
|
||||
i)
|
||||
INC_BUILD="on"
|
||||
;;
|
||||
|
@ -316,6 +322,7 @@ build_mindspore()
|
|||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
|
||||
fi
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
|
||||
if [[ "X$ENABLE_MPI" = "Xon" ]]; then
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON"
|
||||
fi
|
||||
|
|
|
@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF)
|
|||
option(ENABLE_AKG "enable akg" OFF)
|
||||
option(ENABLE_DEBUGGER "enable debugger" OFF)
|
||||
option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF)
|
||||
option(ENABLE_PYTHON "Enable python" ON)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
if (WIN32)
|
||||
|
|
|
@ -39,6 +39,7 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/device/ascend/platform)
|
|||
include_directories(${CMAKE_BINARY_DIR}) # for protobuf generated .h
|
||||
|
||||
include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/mindrecord/include)
|
||||
include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/dataset/include)
|
||||
######################################################################
|
||||
|
||||
####################### Flags ########################################
|
||||
|
@ -67,7 +68,10 @@ add_dependencies(engine-gnn core)
|
|||
add_dependencies(engine core)
|
||||
add_dependencies(text core)
|
||||
add_dependencies(text-kernels core)
|
||||
add_dependencies(APItoPython core)
|
||||
add_dependencies(cpp-API core)
|
||||
if (ENABLE_PYTHON)
|
||||
add_dependencies(APItoPython core)
|
||||
endif()
|
||||
if (ENABLE_TDTQUE)
|
||||
add_dependencies(engine-tdt core)
|
||||
endif ()
|
||||
|
@ -78,7 +82,7 @@ set(submodules
|
|||
$<TARGET_OBJECTS:kernels>
|
||||
$<TARGET_OBJECTS:kernels-image>
|
||||
$<TARGET_OBJECTS:kernels-data>
|
||||
$<TARGET_OBJECTS:APItoPython>
|
||||
$<TARGET_OBJECTS:cpp-API>
|
||||
$<TARGET_OBJECTS:engine-datasetops-source>
|
||||
$<TARGET_OBJECTS:engine-datasetops-source-sampler>
|
||||
$<TARGET_OBJECTS:engine-gnn>
|
||||
|
@ -90,6 +94,12 @@ set(submodules
|
|||
$<TARGET_OBJECTS:text-kernels>
|
||||
)
|
||||
|
||||
if (ENABLE_PYTHON)
|
||||
set(submodules
|
||||
${submodules}
|
||||
$<TARGET_OBJECTS:APItoPython>)
|
||||
endif()
|
||||
|
||||
if (ENABLE_TDTQUE)
|
||||
add_library(_c_dataengine SHARED ${submodules} $<TARGET_OBJECTS:engine-tdt>)
|
||||
else ()
|
||||
|
|
|
@ -1,7 +1,16 @@
|
|||
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||
add_library(APItoPython OBJECT
|
||||
de_pipeline.cc
|
||||
python_bindings.cc
|
||||
if (ENABLE_PYTHON)
|
||||
add_library(APItoPython OBJECT
|
||||
de_pipeline.cc
|
||||
python_bindings.cc
|
||||
)
|
||||
target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
add_library(cpp-API OBJECT
|
||||
datasets.cc
|
||||
iterator.cc
|
||||
transforms.cc
|
||||
samplers.cc
|
||||
)
|
||||
target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS})
|
||||
|
|
|
@ -0,0 +1,446 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "dataset/include/datasets.h"
|
||||
#include "dataset/include/transforms.h"
|
||||
#include "dataset/include/samplers.h"
|
||||
#include "dataset/engine/dataset_iterator.h"
|
||||
#include "dataset/engine/datasetops/source/image_folder_op.h"
|
||||
#include "dataset/engine/datasetops/source/mnist_op.h"
|
||||
#include "dataset/engine/datasetops/source/cifar_op.h"
|
||||
#include "dataset/engine/datasetops/batch_op.h"
|
||||
#include "dataset/engine/datasetops/map_op.h"
|
||||
#include "dataset/engine/datasetops/repeat_op.h"
|
||||
#include "dataset/engine/datasetops/shuffle_op.h"
|
||||
#include "dataset/engine/datasetops/project_op.h"
|
||||
#include "dataset/engine/datasetops/source/sampler/sampler.h"
|
||||
#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
|
||||
|
||||
#include "dataset/core/config_manager.h"
|
||||
#include "dataset/util/random.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
namespace api {
|
||||
|
||||
#define RETURN_NULL_IF_ERROR(_s) \
|
||||
do { \
|
||||
Status __rc = (_s); \
|
||||
if (__rc.IsError()) { \
|
||||
return nullptr; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
// Function to create the iterator, which will build and launch the execution tree.
|
||||
std::shared_ptr<Iterator> Dataset::CreateIterator() {
|
||||
std::shared_ptr<Iterator> iter;
|
||||
try {
|
||||
iter = std::make_shared<Iterator>();
|
||||
Status rc = iter->BuildAndLaunchTree(shared_from_this());
|
||||
if (rc.IsError()) {
|
||||
MS_LOG(ERROR) << "CreateIterator failed.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return iter;
|
||||
} catch (const std::exception &err) {
|
||||
MS_LOG(ERROR) << "CreateIterator: Iterator exception caught: " << err.what();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return iter;
|
||||
}
|
||||
|
||||
// Constructor
|
||||
Dataset::Dataset() {
|
||||
// Fetch some default value from config manager
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
num_workers_ = cfg->num_parallel_workers();
|
||||
rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
connector_que_size_ = cfg->op_connector_size();
|
||||
}
|
||||
|
||||
// Function to create a ImageFolderDataset.
|
||||
std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode,
|
||||
std::shared_ptr<SamplerObj> sampler, std::set<std::string> extensions,
|
||||
std::map<std::string, int32_t> class_indexing) {
|
||||
// This arg is exist in ImageFolderOp, but not externalized (in Python API). The default value is false.
|
||||
bool recursive = false;
|
||||
|
||||
// Create logical representation of ImageFolderDataset.
|
||||
auto ds = std::make_shared<ImageFolderDataset>(dataset_dir, decode, sampler, recursive, extensions, class_indexing);
|
||||
|
||||
// Call derived class validation method.
|
||||
return ds->ValidateParams() ? ds : nullptr;
|
||||
}
|
||||
|
||||
// Function to create a MnistDataset.
|
||||
std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler) {
|
||||
auto ds = std::make_shared<MnistDataset>(dataset_dir, sampler);
|
||||
|
||||
// Call derived class validation method.
|
||||
return ds->ValidateParams() ? ds : nullptr;
|
||||
}
|
||||
|
||||
// Function to create a Cifar10Dataset.
|
||||
std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples,
|
||||
std::shared_ptr<SamplerObj> sampler) {
|
||||
auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, num_samples, sampler);
|
||||
|
||||
// Call derived class validation method.
|
||||
return ds->ValidateParams() ? ds : nullptr;
|
||||
}
|
||||
|
||||
// Function to create a Batch dataset
|
||||
std::shared_ptr<BatchDataset> Dataset::Batch(int32_t batch_size, bool drop_remainder) {
|
||||
// Default values
|
||||
std::vector<std::string> cols_to_map = {};
|
||||
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map;
|
||||
bool pad = false;
|
||||
auto ds = std::make_shared<BatchDataset>(batch_size, drop_remainder, pad, cols_to_map, pad_map);
|
||||
|
||||
if (!ds->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ds->children.push_back(shared_from_this());
|
||||
|
||||
return ds;
|
||||
}
|
||||
|
||||
// Function to create Repeat dataset.
|
||||
std::shared_ptr<Dataset> Dataset::Repeat(int32_t count) {
|
||||
// Workaround for repeat == 1, do not inject repeat.
|
||||
if (count == 1) {
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
auto ds = std::make_shared<RepeatDataset>(count);
|
||||
|
||||
if (!ds->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ds->children.push_back(shared_from_this());
|
||||
|
||||
return ds;
|
||||
}
|
||||
|
||||
// Function to create a Map dataset.
|
||||
std::shared_ptr<MapDataset> Dataset::Map(std::vector<std::shared_ptr<TensorOperation>> operations,
|
||||
std::vector<std::string> input_columns,
|
||||
std::vector<std::string> output_columns,
|
||||
const std::vector<std::string> &project_columns) {
|
||||
auto ds = std::make_shared<MapDataset>(operations, input_columns, output_columns, project_columns);
|
||||
|
||||
if (!ds->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ds->children.push_back(shared_from_this());
|
||||
|
||||
return ds;
|
||||
}
|
||||
|
||||
// Function to create a ShuffleOp
|
||||
std::shared_ptr<ShuffleDataset> Dataset::Shuffle(int32_t shuffle_size) {
|
||||
// Pass in reshuffle_each_epoch with true
|
||||
auto ds = std::make_shared<ShuffleDataset>(shuffle_size, true);
|
||||
|
||||
if (!ds->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ds->children.push_back(shared_from_this());
|
||||
|
||||
return ds;
|
||||
}
|
||||
|
||||
// Function to create a ProjectDataset.
|
||||
std::shared_ptr<ProjectDataset> Dataset::Project(const std::vector<std::string> &columns) {
|
||||
auto ds = std::make_shared<ProjectDataset>(columns);
|
||||
// Call derived class validation method.
|
||||
if (!ds->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ds->children.push_back(shared_from_this());
|
||||
|
||||
return ds;
|
||||
}
|
||||
|
||||
// Helper function to create default RandomSampler.
|
||||
std::shared_ptr<SamplerObj> CreateDefaultSampler() {
|
||||
int32_t num_samples = 0; // 0 means to sample all ids.
|
||||
bool replacement = false;
|
||||
return std::make_shared<RandomSamplerObj>(replacement, num_samples);
|
||||
}
|
||||
|
||||
/* ####################################### Derived Dataset classes ################################# */
|
||||
|
||||
ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler,
|
||||
bool recursive, std::set<std::string> extensions,
|
||||
std::map<std::string, int32_t> class_indexing)
|
||||
: dataset_dir_(dataset_dir),
|
||||
decode_(decode),
|
||||
sampler_(sampler),
|
||||
recursive_(recursive),
|
||||
class_indexing_(class_indexing),
|
||||
exts_(extensions) {}
|
||||
|
||||
bool ImageFolderDataset::ValidateParams() {
|
||||
if (dataset_dir_.empty()) {
|
||||
MS_LOG(ERROR) << "No dataset path is specified.";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ImageFolderDataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
// Do internal Schema generation.
|
||||
// This arg is exist in ImageFolderOp, but not externalized (in Python API).
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
TensorShape scalar = TensorShape::CreateScalar();
|
||||
RETURN_NULL_IF_ERROR(
|
||||
schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
|
||||
RETURN_NULL_IF_ERROR(
|
||||
schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)));
|
||||
node_ops.push_back(std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
|
||||
recursive_, decode_, exts_, class_indexing_, std::move(schema),
|
||||
std::move(sampler_->Build())));
|
||||
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
|
||||
}
|
||||
|
||||
MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler)
|
||||
: dataset_dir_(dataset_dir), sampler_(sampler) {}
|
||||
|
||||
bool MnistDataset::ValidateParams() {
|
||||
if (dataset_dir_.empty()) {
|
||||
MS_LOG(ERROR) << "No dataset path is specified.";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MnistDataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
// Do internal Schema generation.
|
||||
auto schema = std::make_unique<DataSchema>();
|
||||
RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
|
||||
TensorShape scalar = TensorShape::CreateScalar();
|
||||
RETURN_NULL_IF_ERROR(
|
||||
schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar)));
|
||||
|
||||
node_ops.push_back(std::make_shared<MnistOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
|
||||
std::move(schema), std::move(sampler_->Build())));
|
||||
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
|
||||
}
|
||||
|
||||
BatchDataset::BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map,
|
||||
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map)
|
||||
: batch_size_(batch_size),
|
||||
drop_remainder_(drop_remainder),
|
||||
pad_(pad),
|
||||
cols_to_map_(cols_to_map),
|
||||
pad_map_(pad_map) {}
|
||||
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> BatchDataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::function noop;
|
||||
node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
|
||||
cols_to_map_, noop, noop, pad_map_));
|
||||
#else
|
||||
node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
|
||||
cols_to_map_, pad_map_));
|
||||
#endif
|
||||
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
|
||||
}
|
||||
|
||||
bool BatchDataset::ValidateParams() {
|
||||
if (batch_size_ <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
RepeatDataset::RepeatDataset(uint32_t count) : repeat_count_(count) {}
|
||||
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> RepeatDataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
node_ops.push_back(std::make_shared<RepeatOp>(repeat_count_));
|
||||
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
|
||||
}
|
||||
|
||||
bool RepeatDataset::ValidateParams() {
|
||||
if (repeat_count_ <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
MapDataset::MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns,
|
||||
std::vector<std::string> output_columns, const std::vector<std::string> &project_columns)
|
||||
: operations_(operations),
|
||||
input_columns_(input_columns),
|
||||
output_columns_(output_columns),
|
||||
project_columns_(project_columns) {}
|
||||
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MapDataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// Currently default is true, and this is not exposed to user.
|
||||
bool perf_mode = true;
|
||||
|
||||
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
|
||||
|
||||
// Build tensorOp from tensorOperation vector
|
||||
// This is to ensure each iterator hold its own copy of the tensorOp objects.
|
||||
(void)std::transform(
|
||||
operations_.begin(), operations_.end(), std::back_inserter(tensor_ops),
|
||||
[](std::shared_ptr<TensorOperation> operation) -> std::shared_ptr<TensorOp> { return operation->Build(); });
|
||||
|
||||
// This parameter will be removed with next rebase
|
||||
std::vector<std::string> col_orders;
|
||||
auto map_op =
|
||||
std::make_shared<MapOp>(input_columns_, output_columns_, tensor_ops, num_workers_, connector_que_size_, perf_mode);
|
||||
if (!project_columns_.empty()) {
|
||||
auto project_op = std::make_shared<ProjectOp>(project_columns_);
|
||||
node_ops.push_back(project_op);
|
||||
}
|
||||
|
||||
node_ops.push_back(map_op);
|
||||
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
|
||||
}
|
||||
|
||||
bool MapDataset::ValidateParams() {
|
||||
if (operations_.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Constructor for ShuffleDataset
|
||||
ShuffleDataset::ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch)
|
||||
: shuffle_size_(shuffle_size), shuffle_seed_(GetSeed()), reset_every_epoch_(reset_every_epoch) {}
|
||||
|
||||
// Function to build the ShuffleOp
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ShuffleDataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
node_ops.push_back(std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_,
|
||||
rows_per_buffer_));
|
||||
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
|
||||
}
|
||||
|
||||
// Function to validate the parameters for ShuffleDataset
|
||||
bool ShuffleDataset::ValidateParams() {
|
||||
if (shuffle_size_ <= 1) {
|
||||
MS_LOG(ERROR) << "ShuffleDataset: Invalid input, shuffle_size: " << shuffle_size_;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Constructor for Cifar10Dataset
|
||||
Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler)
|
||||
: dataset_dir_(dataset_dir), num_samples_(num_samples), sampler_(sampler) {}
|
||||
|
||||
bool Cifar10Dataset::ValidateParams() {
|
||||
if (dataset_dir_.empty()) {
|
||||
MS_LOG(ERROR) << "No dataset path is specified.";
|
||||
return false;
|
||||
}
|
||||
if (num_samples_ < 0) {
|
||||
MS_LOG(ERROR) << "Number of samples cannot be negative";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Function to build CifarOp
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Cifar10Dataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
|
||||
if (sampler_ == nullptr) {
|
||||
sampler_ = CreateDefaultSampler();
|
||||
}
|
||||
|
||||
// Do internal Schema generation.
|
||||
auto schema = std::make_unique<DataSchema>();
|
||||
RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
|
||||
TensorShape scalar = TensorShape::CreateScalar();
|
||||
RETURN_NULL_IF_ERROR(
|
||||
schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar)));
|
||||
|
||||
node_ops.push_back(std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, num_workers_, rows_per_buffer_,
|
||||
dataset_dir_, connector_que_size_, std::move(schema),
|
||||
std::move(sampler_->Build())));
|
||||
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
|
||||
}
|
||||
|
||||
// Function to build ProjectOp
|
||||
ProjectDataset::ProjectDataset(const std::vector<std::string> &columns) : columns_(columns) {}
|
||||
|
||||
bool ProjectDataset::ValidateParams() {
|
||||
if (columns_.empty()) {
|
||||
MS_LOG(ERROR) << "No columns are specified.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ProjectDataset::Build() {
|
||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||
|
||||
node_ops.push_back(std::make_shared<ProjectOp>(columns_));
|
||||
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
|
||||
}
|
||||
|
||||
} // namespace api
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,101 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "dataset/include/iterator.h"
|
||||
#include "dataset/core/client.h"
|
||||
#include "dataset/include/datasets.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
namespace api {
|
||||
|
||||
// Get the next row from the data pipeline.
|
||||
void Iterator::GetNextRow(TensorMap *row) {
|
||||
Status rc = iterator_->GetNextAsMap(row);
|
||||
if (rc.IsError()) {
|
||||
MS_LOG(ERROR) << "GetNextRow: Failed to get next row.";
|
||||
row->clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Shut down the data pipeline.
|
||||
void Iterator::Stop() {
|
||||
// Releasing the iterator_ unique_ptre. This should trigger the destructor of iterator_.
|
||||
iterator_.reset();
|
||||
|
||||
// Release ownership of tree_ shared pointer. This will decrement the ref count.
|
||||
tree_.reset();
|
||||
}
|
||||
|
||||
// Function to build and launch the execution tree.
|
||||
Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds) {
|
||||
// One time init
|
||||
Status rc;
|
||||
rc = GlobalInit();
|
||||
RETURN_IF_NOT_OK(rc);
|
||||
|
||||
// Instantiate the execution tree
|
||||
tree_ = std::make_shared<ExecutionTree>();
|
||||
|
||||
// Iterative BFS converting Dataset tree into runtime Execution tree.
|
||||
std::queue<std::pair<std::shared_ptr<Dataset>, std::shared_ptr<DatasetOp>>> q;
|
||||
|
||||
if (ds != nullptr) {
|
||||
// Convert the current root node.
|
||||
auto root_op = ds->Build()->front();
|
||||
RETURN_UNEXPECTED_IF_NULL(root_op);
|
||||
|
||||
RETURN_IF_NOT_OK(tree_->AssociateNode(root_op));
|
||||
|
||||
q.push(std::make_pair(ds, root_op));
|
||||
|
||||
// Traverse down to the children and convert them to the corresponding DatasetOps (i.e. execution tree nodes)
|
||||
while (!q.empty()) {
|
||||
auto node_pair = q.front();
|
||||
q.pop();
|
||||
// Iterate through all the direct children of the first element in our BFS queue
|
||||
for (auto child : node_pair.first->children) {
|
||||
auto child_ops = child->Build();
|
||||
RETURN_UNEXPECTED_IF_NULL(child_ops);
|
||||
auto node_op = node_pair.second;
|
||||
// Iterate through all the DatasetOps returned by calling Build on the last Dataset object, associate them
|
||||
// with the execution tree and add the child and parent relationship between the nodes
|
||||
// Note that some Dataset objects might return more than one DatasetOps
|
||||
// e.g. MapDataset will return MapOp and ProjectOp if project_columns is set for MapDataset
|
||||
for (auto child_op : *child_ops) {
|
||||
RETURN_IF_NOT_OK(tree_->AssociateNode(child_op));
|
||||
RETURN_IF_NOT_OK(node_op->AddChild(child_op));
|
||||
node_op = child_op;
|
||||
}
|
||||
// Add the child and the last element of the returned DatasetOps (which is now the leaf node in our current
|
||||
// execution tree) to the BFS queue
|
||||
q.push(std::make_pair(child, child_ops->back()));
|
||||
}
|
||||
}
|
||||
RETURN_IF_NOT_OK(tree_->AssignRoot(root_op));
|
||||
}
|
||||
|
||||
// Launch the execution tree.
|
||||
RETURN_IF_NOT_OK(tree_->Prepare());
|
||||
RETURN_IF_NOT_OK(tree_->Launch());
|
||||
iterator_ = std::make_unique<DatasetIterator>(tree_);
|
||||
RETURN_UNEXPECTED_IF_NULL(iterator_);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
} // namespace api
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
|
@ -297,7 +297,7 @@ void bindTensor(py::module *m) {
|
|||
}))
|
||||
.def_buffer([](Tensor &tensor) {
|
||||
py::buffer_info info;
|
||||
THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
|
||||
THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info));
|
||||
return info;
|
||||
})
|
||||
.def("__str__", &Tensor::ToString)
|
||||
|
@ -311,7 +311,7 @@ void bindTensor(py::module *m) {
|
|||
return res;
|
||||
}
|
||||
py::buffer_info info;
|
||||
THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
|
||||
THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info));
|
||||
return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t);
|
||||
});
|
||||
|
||||
|
|
|
@ -0,0 +1,224 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dataset/include/samplers.h"
|
||||
#include "dataset/engine/datasetops/source/sampler/sampler.h"
|
||||
#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
|
||||
#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
|
||||
#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
|
||||
#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
|
||||
#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
|
||||
#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
namespace api {
|
||||
|
||||
SamplerObj::SamplerObj() {}
|
||||
|
||||
/// Function to create a Distributed Sampler.
|
||||
std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle,
|
||||
int64_t num_samples, uint32_t seed) {
|
||||
auto sampler = std::make_shared<DistributedSamplerObj>(num_shards, shard_id, shuffle, num_samples, seed);
|
||||
// Input validation
|
||||
if (!sampler->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
/// Function to create a PK Sampler.
|
||||
std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle, int64_t num_samples) {
|
||||
auto sampler = std::make_shared<PKSamplerObj>(num_val, shuffle, num_samples);
|
||||
// Input validation
|
||||
if (!sampler->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
/// Function to create a Random Sampler.
|
||||
std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement, int64_t num_samples) {
|
||||
auto sampler = std::make_shared<RandomSamplerObj>(replacement, num_samples);
|
||||
// Input validation
|
||||
if (!sampler->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
/// Function to create a Sequential Sampler.
|
||||
std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index, int64_t num_samples) {
|
||||
auto sampler = std::make_shared<SequentialSamplerObj>(start_index, num_samples);
|
||||
// Input validation
|
||||
if (!sampler->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
/// Function to create a Subset Random Sampler.
|
||||
std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices, int64_t num_samples) {
|
||||
auto sampler = std::make_shared<SubsetRandomSamplerObj>(indices, num_samples);
|
||||
// Input validation
|
||||
if (!sampler->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
/// Function to create a Weighted Random Sampler.
|
||||
std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights, int64_t num_samples,
|
||||
bool replacement) {
|
||||
auto sampler = std::make_shared<WeightedRandomSamplerObj>(weights, num_samples, replacement);
|
||||
// Input validation
|
||||
if (!sampler->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
/* ####################################### Derived Sampler classes ################################# */
|
||||
|
||||
// DistributedSampler
|
||||
DistributedSamplerObj::DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples,
|
||||
uint32_t seed)
|
||||
: num_shards_(num_shards), shard_id_(shard_id), shuffle_(shuffle), num_samples_(num_samples), seed_(seed) {}
|
||||
|
||||
bool DistributedSamplerObj::ValidateParams() {
|
||||
if (num_shards_ <= 0) {
|
||||
MS_LOG(ERROR) << "DistributedSampler: invalid num_shards: " << num_shards_;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (shard_id_ < 0 || shard_id_ >= num_shards_) {
|
||||
MS_LOG(ERROR) << "DistributedSampler: invalid input, shard_id: " << shard_id_ << ", num_shards: " << num_shards_;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (num_samples_ < 0) {
|
||||
MS_LOG(ERROR) << "DistributedSampler: invalid num_samples: " << num_samples_;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<Sampler> DistributedSamplerObj::Build() {
|
||||
return std::make_shared<dataset::DistributedSampler>(num_samples_, num_shards_, shard_id_, shuffle_, seed_);
|
||||
}
|
||||
|
||||
// PKSampler
|
||||
PKSamplerObj::PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples)
|
||||
: num_val_(num_val), shuffle_(shuffle), num_samples_(num_samples) {}
|
||||
|
||||
bool PKSamplerObj::ValidateParams() {
|
||||
if (num_val_ <= 0) {
|
||||
MS_LOG(ERROR) << "PKSampler: invalid num_val: " << num_val_;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (num_samples_ < 0) {
|
||||
MS_LOG(ERROR) << "PKSampler: invalid num_samples: " << num_samples_;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<Sampler> PKSamplerObj::Build() {
|
||||
return std::make_shared<dataset::PKSampler>(num_samples_, num_val_, shuffle_);
|
||||
}
|
||||
|
||||
// RandomSampler
|
||||
RandomSamplerObj::RandomSamplerObj(bool replacement, int64_t num_samples)
|
||||
: replacement_(replacement), num_samples_(num_samples) {}
|
||||
|
||||
bool RandomSamplerObj::ValidateParams() {
|
||||
if (num_samples_ < 0) {
|
||||
MS_LOG(ERROR) << "RandomSampler: invalid num_samples: " << num_samples_;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<Sampler> RandomSamplerObj::Build() {
|
||||
bool reshuffle_each_epoch = true;
|
||||
auto sampler = std::make_shared<dataset::RandomSampler>(num_samples_, replacement_, reshuffle_each_epoch);
|
||||
return sampler;
|
||||
}
|
||||
|
||||
// SequentialSampler
|
||||
SequentialSamplerObj::SequentialSamplerObj(int64_t start_index, int64_t num_samples)
|
||||
: start_index_(start_index), num_samples_(num_samples) {}
|
||||
|
||||
bool SequentialSamplerObj::ValidateParams() {
|
||||
if (num_samples_ < 0) {
|
||||
MS_LOG(ERROR) << "SequentialSampler: invalid num_samples: " << num_samples_;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (start_index_ < 0) {
|
||||
MS_LOG(ERROR) << "SequentialSampler: invalid start_index: " << start_index_;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<Sampler> SequentialSamplerObj::Build() {
|
||||
auto sampler = std::make_shared<dataset::SequentialSampler>(num_samples_, start_index_);
|
||||
return sampler;
|
||||
}
|
||||
|
||||
// SubsetRandomSampler
|
||||
SubsetRandomSamplerObj::SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples)
|
||||
: indices_(indices), num_samples_(num_samples) {}
|
||||
|
||||
bool SubsetRandomSamplerObj::ValidateParams() {
|
||||
if (num_samples_ < 0) {
|
||||
MS_LOG(ERROR) << "SubsetRandomSampler: invalid num_samples: " << num_samples_;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<Sampler> SubsetRandomSamplerObj::Build() {
|
||||
auto sampler = std::make_shared<dataset::SubsetRandomSampler>(num_samples_, indices_);
|
||||
return sampler;
|
||||
}
|
||||
|
||||
// WeightedRandomSampler
|
||||
WeightedRandomSamplerObj::WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples,
|
||||
bool replacement)
|
||||
: weights_(weights), num_samples_(num_samples), replacement_(replacement) {}
|
||||
|
||||
bool WeightedRandomSamplerObj::ValidateParams() {
|
||||
if (num_samples_ < 0) {
|
||||
MS_LOG(ERROR) << "WeightedRandomSampler: invalid num_samples: " << num_samples_;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<Sampler> WeightedRandomSamplerObj::Build() {
|
||||
auto sampler = std::make_shared<dataset::WeightedRandomSampler>(num_samples_, weights_, replacement_);
|
||||
return sampler;
|
||||
}
|
||||
|
||||
} // namespace api
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,491 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dataset/include/transforms.h"
|
||||
#include "dataset/kernels/image/image_utils.h"
|
||||
#include "dataset/kernels/image/normalize_op.h"
|
||||
#include "dataset/kernels/image/decode_op.h"
|
||||
#include "dataset/kernels/image/resize_op.h"
|
||||
#include "dataset/kernels/image/random_crop_op.h"
|
||||
#include "dataset/kernels/image/center_crop_op.h"
|
||||
#include "dataset/kernels/image/uniform_aug_op.h"
|
||||
#include "dataset/kernels/image/random_horizontal_flip_op.h"
|
||||
#include "dataset/kernels/image/random_vertical_flip_op.h"
|
||||
#include "dataset/kernels/image/random_rotation_op.h"
|
||||
#include "dataset/kernels/image/cut_out_op.h"
|
||||
#include "dataset/kernels/image/random_color_adjust_op.h"
|
||||
#include "dataset/kernels/image/pad_op.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
namespace api {
|
||||
|
||||
TensorOperation::TensorOperation() {}
|
||||
|
||||
// Transform operations for computer vision.
|
||||
namespace vision {
|
||||
|
||||
// Function to create NormalizeOperation.
|
||||
std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std) {
|
||||
auto op = std::make_shared<NormalizeOperation>(mean, std);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
// Function to create DecodeOperation.
|
||||
std::shared_ptr<DecodeOperation> Decode(bool rgb) {
|
||||
auto op = std::make_shared<DecodeOperation>(rgb);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
// Function to create ResizeOperation.
|
||||
std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size, InterpolationMode interpolation) {
|
||||
auto op = std::make_shared<ResizeOperation>(size, interpolation);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
// Function to create RandomCropOperation.
|
||||
std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding,
|
||||
bool pad_if_needed, std::vector<uint8_t> fill_value) {
|
||||
auto op = std::make_shared<RandomCropOperation>(size, padding, pad_if_needed, fill_value);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
// Function to create CenterCropOperation.
|
||||
std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size) {
|
||||
auto op = std::make_shared<CenterCropOperation>(size);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
// Function to create UniformAugOperation.
|
||||
std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations,
|
||||
int32_t num_ops) {
|
||||
auto op = std::make_shared<UniformAugOperation>(operations, num_ops);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
// Function to create RandomHorizontalFlipOperation.
|
||||
std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob) {
|
||||
auto op = std::make_shared<RandomHorizontalFlipOperation>(prob);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
// Function to create RandomVerticalFlipOperation.
|
||||
std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob) {
|
||||
auto op = std::make_shared<RandomVerticalFlipOperation>(prob);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
// Function to create RandomRotationOperation.
|
||||
std::shared_ptr<RandomRotationOperation> RandomRotation(std::vector<float> degrees, InterpolationMode resample,
|
||||
bool expand, std::vector<float> center,
|
||||
std::vector<uint8_t> fill_value) {
|
||||
auto op = std::make_shared<RandomRotationOperation>(degrees, resample, expand, center, fill_value);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
// Function to create PadOperation.
|
||||
std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value,
|
||||
BorderType padding_mode) {
|
||||
auto op = std::make_shared<PadOperation>(padding, fill_value, padding_mode);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
// Function to create CutOutOp.
|
||||
std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches) {
|
||||
auto op = std::make_shared<CutOutOperation>(length, num_patches);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
// Function to create RandomColorAdjustOperation.
|
||||
std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness,
|
||||
std::vector<float> contrast,
|
||||
std::vector<float> saturation, std::vector<float> hue) {
|
||||
auto op = std::make_shared<RandomColorAdjustOperation>(brightness, contrast, saturation, hue);
|
||||
// Input validation
|
||||
if (!op->ValidateParams()) {
|
||||
return nullptr;
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
/* ####################################### Derived TensorOperation classes ################################# */
|
||||
|
||||
// NormalizeOperation
|
||||
NormalizeOperation::NormalizeOperation(std::vector<float> mean, std::vector<float> std) : mean_(mean), std_(std) {}
|
||||
|
||||
bool NormalizeOperation::ValidateParams() {
|
||||
if (mean_.size() != 3) {
|
||||
MS_LOG(ERROR) << "Normalize: mean vector has incorrect size: " << mean_.size();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (std_.size() != 3) {
|
||||
MS_LOG(ERROR) << "Normalize: std vector has incorrect size: " << std_.size();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> NormalizeOperation::Build() {
|
||||
return std::make_shared<NormalizeOp>(mean_[0], mean_[1], mean_[2], std_[0], std_[1], std_[2]);
|
||||
}
|
||||
|
||||
// DecodeOperation
|
||||
DecodeOperation::DecodeOperation(bool rgb) : rgb_(rgb) {}
|
||||
|
||||
bool DecodeOperation::ValidateParams() { return true; }
|
||||
|
||||
std::shared_ptr<TensorOp> DecodeOperation::Build() { return std::make_shared<DecodeOp>(rgb_); }
|
||||
|
||||
// ResizeOperation
|
||||
ResizeOperation::ResizeOperation(std::vector<int32_t> size, InterpolationMode interpolation)
|
||||
: size_(size), interpolation_(interpolation) {}
|
||||
|
||||
bool ResizeOperation::ValidateParams() {
|
||||
if (size_.empty() || size_.size() > 2) {
|
||||
MS_LOG(ERROR) << "Resize: size vector has incorrect size: " << size_.size();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> ResizeOperation::Build() {
|
||||
int32_t height = size_[0];
|
||||
int32_t width = 0;
|
||||
|
||||
// User specified the width value.
|
||||
if (size_.size() == 2) {
|
||||
width = size_[1];
|
||||
}
|
||||
|
||||
return std::make_shared<ResizeOp>(height, width, interpolation_);
|
||||
}
|
||||
|
||||
// RandomCropOperation
|
||||
RandomCropOperation::RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding, bool pad_if_needed,
|
||||
std::vector<uint8_t> fill_value)
|
||||
: size_(size), padding_(padding), pad_if_needed_(pad_if_needed), fill_value_(fill_value) {}
|
||||
|
||||
bool RandomCropOperation::ValidateParams() {
|
||||
if (size_.empty() || size_.size() > 2) {
|
||||
MS_LOG(ERROR) << "RandomCrop: size vector has incorrect size: " << size_.size();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (padding_.empty() || padding_.size() != 4) {
|
||||
MS_LOG(ERROR) << "RandomCrop: padding vector has incorrect size: padding.size()";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fill_value_.empty() || fill_value_.size() != 3) {
|
||||
MS_LOG(ERROR) << "RandomCrop: fill_value vector has incorrect size: fill_value.size()";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> RandomCropOperation::Build() {
|
||||
int32_t crop_height = size_[0];
|
||||
int32_t crop_width = 0;
|
||||
|
||||
int32_t pad_top = padding_[0];
|
||||
int32_t pad_bottom = padding_[1];
|
||||
int32_t pad_left = padding_[2];
|
||||
int32_t pad_right = padding_[3];
|
||||
|
||||
uint8_t fill_r = fill_value_[0];
|
||||
uint8_t fill_g = fill_value_[1];
|
||||
uint8_t fill_b = fill_value_[2];
|
||||
|
||||
// User has specified the crop_width value.
|
||||
if (size_.size() == 2) {
|
||||
crop_width = size_[1];
|
||||
}
|
||||
|
||||
auto tensor_op = std::make_shared<RandomCropOp>(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right,
|
||||
BorderType::kConstant, pad_if_needed_, fill_r, fill_g, fill_b);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
// CenterCropOperation
|
||||
CenterCropOperation::CenterCropOperation(std::vector<int32_t> size) : size_(size) {}
|
||||
|
||||
bool CenterCropOperation::ValidateParams() {
|
||||
if (size_.empty() || size_.size() > 2) {
|
||||
MS_LOG(ERROR) << "CenterCrop: size vector has incorrect size.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> CenterCropOperation::Build() {
|
||||
int32_t crop_height = size_[0];
|
||||
int32_t crop_width = 0;
|
||||
|
||||
// User has specified crop_width.
|
||||
if (size_.size() == 2) {
|
||||
crop_width = size_[1];
|
||||
}
|
||||
|
||||
std::shared_ptr<CenterCropOp> tensor_op = std::make_shared<CenterCropOp>(crop_height, crop_width);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
// UniformAugOperation
|
||||
UniformAugOperation::UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops)
|
||||
: operations_(operations), num_ops_(num_ops) {}
|
||||
|
||||
bool UniformAugOperation::ValidateParams() { return true; }
|
||||
|
||||
std::shared_ptr<TensorOp> UniformAugOperation::Build() {
|
||||
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
|
||||
(void)std::transform(operations_.begin(), operations_.end(), std::back_inserter(tensor_ops),
|
||||
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
|
||||
std::shared_ptr<UniformAugOp> tensor_op = std::make_shared<UniformAugOp>(tensor_ops, num_ops_);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
// RandomHorizontalFlipOperation
|
||||
RandomHorizontalFlipOperation::RandomHorizontalFlipOperation(float probability) : probability_(probability) {}
|
||||
|
||||
bool RandomHorizontalFlipOperation::ValidateParams() { return true; }
|
||||
|
||||
std::shared_ptr<TensorOp> RandomHorizontalFlipOperation::Build() {
|
||||
std::shared_ptr<RandomHorizontalFlipOp> tensor_op = std::make_shared<RandomHorizontalFlipOp>(probability_);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
// RandomVerticalFlipOperation
|
||||
RandomVerticalFlipOperation::RandomVerticalFlipOperation(float probability) : probability_(probability) {}
|
||||
|
||||
bool RandomVerticalFlipOperation::ValidateParams() { return true; }
|
||||
|
||||
std::shared_ptr<TensorOp> RandomVerticalFlipOperation::Build() {
|
||||
std::shared_ptr<RandomVerticalFlipOp> tensor_op = std::make_shared<RandomVerticalFlipOp>(probability_);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
// Function to create RandomRotationOperation.
|
||||
RandomRotationOperation::RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode,
|
||||
bool expand, std::vector<float> center,
|
||||
std::vector<uint8_t> fill_value)
|
||||
: degrees_(degrees),
|
||||
interpolation_mode_(interpolation_mode),
|
||||
expand_(expand),
|
||||
center_(center),
|
||||
fill_value_(fill_value) {}
|
||||
|
||||
bool RandomRotationOperation::ValidateParams() {
|
||||
if (degrees_.empty() || degrees_.size() != 2) {
|
||||
MS_LOG(ERROR) << "RandomRotation: degrees vector has incorrect size: degrees.size()";
|
||||
return false;
|
||||
}
|
||||
if (center_.empty() || center_.size() != 2) {
|
||||
MS_LOG(ERROR) << "RandomRotation: center vector has incorrect size: center.size()";
|
||||
return false;
|
||||
}
|
||||
if (fill_value_.empty() || fill_value_.size() != 3) {
|
||||
MS_LOG(ERROR) << "RandomRotation: fill_value vector has incorrect size: fill_value.size()";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> RandomRotationOperation::Build() {
|
||||
std::shared_ptr<RandomRotationOp> tensor_op =
|
||||
std::make_shared<RandomRotationOp>(degrees_[0], degrees_[1], center_[0], center_[1], interpolation_mode_, expand_,
|
||||
fill_value_[0], fill_value_[1], fill_value_[2]);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
// PadOperation
|
||||
PadOperation::PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value, BorderType padding_mode)
|
||||
: padding_(padding), fill_value_(fill_value), padding_mode_(padding_mode) {}
|
||||
|
||||
bool PadOperation::ValidateParams() {
|
||||
if (padding_.empty() || padding_.size() == 3 || padding_.size() > 4) {
|
||||
MS_LOG(ERROR) << "Pad: padding vector has incorrect size: padding.size()";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fill_value_.empty() || (fill_value_.size() != 1 && fill_value_.size() != 3)) {
|
||||
MS_LOG(ERROR) << "Pad: fill_value vector has incorrect size: fill_value.size()";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> PadOperation::Build() {
|
||||
int32_t pad_top, pad_bottom, pad_left, pad_right;
|
||||
switch (padding_.size()) {
|
||||
case 1:
|
||||
pad_left = padding_[0];
|
||||
pad_top = padding_[0];
|
||||
pad_right = padding_[0];
|
||||
pad_bottom = padding_[0];
|
||||
break;
|
||||
case 2:
|
||||
pad_left = padding_[0];
|
||||
pad_top = padding_[1];
|
||||
pad_right = padding_[0];
|
||||
pad_bottom = padding_[1];
|
||||
break;
|
||||
default:
|
||||
pad_left = padding_[0];
|
||||
pad_top = padding_[1];
|
||||
pad_right = padding_[2];
|
||||
pad_bottom = padding_[3];
|
||||
}
|
||||
uint8_t fill_r, fill_g, fill_b;
|
||||
|
||||
fill_r = fill_value_[0];
|
||||
fill_g = fill_value_[0];
|
||||
fill_b = fill_value_[0];
|
||||
|
||||
if (fill_value_.size() == 3) {
|
||||
fill_r = fill_value_[0];
|
||||
fill_g = fill_value_[1];
|
||||
fill_b = fill_value_[2];
|
||||
}
|
||||
|
||||
std::shared_ptr<PadOp> tensor_op =
|
||||
std::make_shared<PadOp>(pad_top, pad_bottom, pad_left, pad_right, padding_mode_, fill_r, fill_g, fill_b);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
// CutOutOperation
|
||||
CutOutOperation::CutOutOperation(int32_t length, int32_t num_patches) : length_(length), num_patches_(num_patches) {}
|
||||
|
||||
bool CutOutOperation::ValidateParams() {
|
||||
if (length_ < 0) {
|
||||
MS_LOG(ERROR) << "CutOut: length cannot be negative";
|
||||
return false;
|
||||
}
|
||||
if (num_patches_ < 0) {
|
||||
MS_LOG(ERROR) << "CutOut: number of patches cannot be negative";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> CutOutOperation::Build() {
|
||||
std::shared_ptr<CutOutOp> tensor_op = std::make_shared<CutOutOp>(length_, length_, num_patches_, false, 0, 0, 0);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
// RandomColorAdjustOperation.
|
||||
RandomColorAdjustOperation::RandomColorAdjustOperation(std::vector<float> brightness, std::vector<float> contrast,
|
||||
std::vector<float> saturation, std::vector<float> hue)
|
||||
: brightness_(brightness), contrast_(contrast), saturation_(saturation), hue_(hue) {}
|
||||
|
||||
bool RandomColorAdjustOperation::ValidateParams() {
|
||||
// Do some input validation.
|
||||
if (brightness_.empty() || brightness_.size() > 2) {
|
||||
MS_LOG(ERROR) << "RandomColorAdjust: brightness must be a vector of one or two values";
|
||||
return false;
|
||||
}
|
||||
if (contrast_.empty() || contrast_.size() > 2) {
|
||||
MS_LOG(ERROR) << "RandomColorAdjust: contrast must be a vector of one or two values";
|
||||
return false;
|
||||
}
|
||||
if (saturation_.empty() || saturation_.size() > 2) {
|
||||
MS_LOG(ERROR) << "RandomColorAdjust: saturation must be a vector of one or two values";
|
||||
return false;
|
||||
}
|
||||
if (hue_.empty() || hue_.size() > 2) {
|
||||
MS_LOG(ERROR) << "RandomColorAdjust: hue must be a vector of one or two values";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> RandomColorAdjustOperation::Build() {
|
||||
float brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub;
|
||||
|
||||
brightness_lb = brightness_[0];
|
||||
brightness_ub = brightness_[0];
|
||||
|
||||
if (brightness_.size() == 2) brightness_ub = brightness_[1];
|
||||
|
||||
contrast_lb = contrast_[0];
|
||||
contrast_ub = contrast_[0];
|
||||
|
||||
if (contrast_.size() == 2) contrast_ub = contrast_[1];
|
||||
|
||||
saturation_lb = saturation_[0];
|
||||
saturation_ub = saturation_[0];
|
||||
|
||||
if (saturation_.size() == 2) saturation_ub = saturation_[1];
|
||||
|
||||
hue_lb = hue_[0];
|
||||
hue_ub = hue_[0];
|
||||
|
||||
if (hue_.size() == 2) hue_ub = hue_[1];
|
||||
|
||||
std::shared_ptr<RandomColorAdjustOp> tensor_op = std::make_shared<RandomColorAdjustOp>(
|
||||
brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
} // namespace vision
|
||||
} // namespace api
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
|
@ -1,10 +1,6 @@
|
|||
ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
|
||||
ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
|
||||
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||
add_library(core OBJECT
|
||||
${EXAMPLE_SRCS}
|
||||
${FEATURE_SRCS}
|
||||
set(DATASET_CORE_SRC_FILES
|
||||
client.cc
|
||||
config_manager.cc
|
||||
cv_tensor.cc
|
||||
|
@ -13,6 +9,13 @@ add_library(core OBJECT
|
|||
tensor.cc
|
||||
tensor_row.cc
|
||||
tensor_shape.cc
|
||||
)
|
||||
)
|
||||
|
||||
ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
|
||||
ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
|
||||
add_library(core OBJECT ${DATASET_CORE_SRC_FILES} ${EXAMPLE_SRCS} ${FEATURE_SRCS})
|
||||
add_dependencies(core mindspore::protobuf)
|
||||
target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS})
|
||||
|
||||
if (ENABLE_PYTHON)
|
||||
target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
|
|
@ -25,21 +25,25 @@
|
|||
#include "dataset/core/tensor_shape.h"
|
||||
#include "dataset/engine/data_schema.h"
|
||||
#include "dataset/engine/dataset_iterator.h"
|
||||
#include "dataset/engine/datasetops/source/mindrecord_op.h"
|
||||
#include "dataset/engine/datasetops/source/tf_reader_op.h"
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
#include "dataset/engine/datasetops/barrier_op.h"
|
||||
#include "dataset/engine/datasetops/batch_op.h"
|
||||
#include "dataset/engine/datasetops/filter_op.h"
|
||||
#include "dataset/engine/datasetops/source/generator_op.h"
|
||||
#include "dataset/engine/datasetops/build_vocab_op.h"
|
||||
#endif
|
||||
|
||||
#include "dataset/engine/datasetops/batch_op.h"
|
||||
#include "dataset/engine/datasetops/dataset_op.h"
|
||||
#include "dataset/engine/datasetops/device_queue_op.h"
|
||||
#include "dataset/engine/datasetops/map_op.h"
|
||||
#include "dataset/engine/datasetops/project_op.h"
|
||||
#include "dataset/engine/datasetops/rename_op.h"
|
||||
#include "dataset/engine/datasetops/filter_op.h"
|
||||
#include "dataset/engine/datasetops/repeat_op.h"
|
||||
#include "dataset/engine/datasetops/skip_op.h"
|
||||
#include "dataset/engine/datasetops/shuffle_op.h"
|
||||
#include "dataset/engine/datasetops/source/generator_op.h"
|
||||
#include "dataset/engine/datasetops/source/mindrecord_op.h"
|
||||
#include "dataset/engine/datasetops/source/tf_reader_op.h"
|
||||
#include "dataset/engine/datasetops/take_op.h"
|
||||
#include "dataset/engine/datasetops/zip_op.h"
|
||||
#include "dataset/engine/datasetops/concat_op.h"
|
||||
|
|
|
@ -32,6 +32,12 @@ enum class DatasetType { kUnknown, kArrow, kTf };
|
|||
// Possible flavours of Tensor implementations
|
||||
enum class TensorImpl { kNone, kFlexible, kCv, kNP };
|
||||
|
||||
// Possible values for Border types
|
||||
enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
|
||||
|
||||
// Possible interpolation modes
|
||||
enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
|
||||
|
||||
// convenience functions for 32bit int bitmask
|
||||
inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; }
|
||||
|
||||
|
|
|
@ -14,11 +14,12 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include "dataset/core/data_type.h"
|
||||
#ifdef ENABLE_PYTHON
|
||||
#include "dataset/core/pybind_support.h"
|
||||
#endif
|
||||
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
#include "dataset/core/pybind_support.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
|
@ -29,12 +30,14 @@ uint8_t DataType::SizeInBytes() const {
|
|||
return 0;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::dtype DataType::AsNumpyType() const {
|
||||
if (type_ < DataType::NUM_OF_TYPES)
|
||||
return py::dtype(kTypeInfo[type_].pybindType_);
|
||||
else
|
||||
return py::dtype("unknown");
|
||||
}
|
||||
#endif
|
||||
|
||||
uint8_t DataType::AsCVType() const {
|
||||
uint8_t res = kCVInvalidType;
|
||||
|
@ -112,6 +115,7 @@ std::string DataType::ToString() const {
|
|||
return "unknown";
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
DataType DataType::FromNpArray(const py::array &arr) {
|
||||
if (py::isinstance<py::array_t<bool>>(arr)) {
|
||||
return DataType(DataType::DE_BOOL);
|
||||
|
@ -156,6 +160,7 @@ std::string DataType::GetPybindFormat() const {
|
|||
}
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -19,14 +19,16 @@
|
|||
#include <opencv2/core/hal/interface.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
#include "pybind11/numpy.h"
|
||||
#include "pybind11/pybind11.h"
|
||||
|
||||
#include "dataset/core/constants.h"
|
||||
#include "dataset/core/pybind_support.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
#else
|
||||
#include "Eigen/Core"
|
||||
using float16 = Eigen::half;
|
||||
#endif
|
||||
#include "dataset/core/constants.h"
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
|
@ -59,6 +61,7 @@ class DataType {
|
|||
const uint8_t cvType_; // OpenCv matching type
|
||||
};
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
static inline const TypeInfo kTypeInfo[] = {
|
||||
// name, sizeInBytes, pybindTypem formatDescriptor, openCV
|
||||
{"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN
|
||||
|
@ -76,19 +79,38 @@ class DataType {
|
|||
{"float64", 8, "double", py::format_descriptor<double>::format(), CV_64F}, // DE_FLOAT64
|
||||
{"string", 0, "bytes", "S", kCVInvalidType} // DE_STRING
|
||||
};
|
||||
#else
|
||||
static inline const TypeInfo kTypeInfo[] = {
|
||||
// name, sizeInBytes, pybindTypem formatDescriptor, openCV
|
||||
{"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN
|
||||
{"bool", 1, "bool", "", CV_8U}, // DE_BOOL
|
||||
{"int8", 1, "int8", "", CV_8S}, // DE_INT8
|
||||
{"uint8", 1, "uint8", "", CV_8U}, // DE_UINT8
|
||||
{"int16", 2, "int16", "", CV_16S}, // DE_INT16
|
||||
{"uint16", 2, "uint16", "", CV_16U}, // DE_UINT16
|
||||
{"int32", 4, "int32", "", CV_32S}, // DE_INT32
|
||||
{"uint32", 4, "uint32", "", kCVInvalidType}, // DE_UINT32
|
||||
{"int64", 8, "int64", "", kCVInvalidType}, // DE_INT64
|
||||
{"uint64", 8, "uint64", "", kCVInvalidType}, // DE_UINT64
|
||||
{"float16", 2, "float16", "", CV_16F}, // DE_FLOAT16
|
||||
{"float32", 4, "float32", "", CV_32F}, // DE_FLOAT32
|
||||
{"float64", 8, "double", "", CV_64F}, // DE_FLOAT64
|
||||
{"string", 0, "bytes", "", kCVInvalidType} // DE_STRING
|
||||
};
|
||||
#endif
|
||||
|
||||
// No arg constructor to create an unknown shape
|
||||
DataType() : type_(DE_UNKNOWN) {}
|
||||
|
||||
// Create a type from a given string
|
||||
// @param type_str
|
||||
/// \param type_str
|
||||
explicit DataType(const std::string &type_str);
|
||||
|
||||
// Default destructor
|
||||
~DataType() = default;
|
||||
|
||||
// Create a type from a given enum
|
||||
// @param d
|
||||
/// \param d
|
||||
constexpr explicit DataType(Type d) : type_(d) {}
|
||||
|
||||
constexpr bool operator==(const DataType a) const { return type_ == a.type_; }
|
||||
|
@ -100,49 +122,49 @@ class DataType {
|
|||
constexpr bool operator!=(const Type a) const { return type_ != a; }
|
||||
|
||||
// Disable this usage `if(d)` where d is of type DataType
|
||||
// @return
|
||||
/// \return
|
||||
operator bool() = delete;
|
||||
|
||||
// To be used in Switch/case
|
||||
// @return
|
||||
/// \return
|
||||
operator Type() const { return type_; }
|
||||
|
||||
// The number of bytes needed to store one value of this type
|
||||
// @return
|
||||
/// \return
|
||||
uint8_t SizeInBytes() const;
|
||||
|
||||
// Convert from DataType to OpenCV type
|
||||
// @return
|
||||
/// \return
|
||||
uint8_t AsCVType() const;
|
||||
|
||||
// Convert from OpenCV type to DataType
|
||||
// @param cv_type
|
||||
// @return
|
||||
/// \param cv_type
|
||||
/// \return
|
||||
static DataType FromCVType(int cv_type);
|
||||
|
||||
// Returns a string representation of the type
|
||||
// @return
|
||||
/// \return
|
||||
std::string ToString() const;
|
||||
|
||||
// returns true if the template type is the same as the Tensor type_
|
||||
// @tparam T
|
||||
// @return true or false
|
||||
/// \tparam T
|
||||
/// \return true or false
|
||||
template <typename T>
|
||||
bool IsCompatible() const {
|
||||
return type_ == FromCType<T>();
|
||||
}
|
||||
|
||||
// returns true if the template type is the same as the Tensor type_
|
||||
// @tparam T
|
||||
// @return true or false
|
||||
/// \tparam T
|
||||
/// \return true or false
|
||||
template <typename T>
|
||||
bool IsLooselyCompatible() const;
|
||||
|
||||
// << Stream output operator overload
|
||||
// @notes This allows you to print the info using stream operators
|
||||
// @param out - reference to the output stream being overloaded
|
||||
// @param rO - reference to the DataType to display
|
||||
// @return - the output stream must be returned
|
||||
/// \notes This allows you to print the info using stream operators
|
||||
/// \param out - reference to the output stream being overloaded
|
||||
/// \param rO - reference to the DataType to display
|
||||
/// \return - the output stream must be returned
|
||||
friend std::ostream &operator<<(std::ostream &out, const DataType &so) {
|
||||
out << so.ToString();
|
||||
return out;
|
||||
|
@ -151,22 +173,24 @@ class DataType {
|
|||
template <typename T>
|
||||
static DataType FromCType();
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Convert from DataType to Pybind type
|
||||
// @return
|
||||
/// \return
|
||||
py::dtype AsNumpyType() const;
|
||||
|
||||
// Convert from NP type to DataType
|
||||
// @param type
|
||||
// @return
|
||||
/// \param type
|
||||
/// \return
|
||||
static DataType FromNpType(const py::dtype &type);
|
||||
|
||||
// Convert from NP array to DataType
|
||||
// @param py array
|
||||
// @return
|
||||
/// \param py array
|
||||
/// \return
|
||||
static DataType FromNpArray(const py::array &arr);
|
||||
#endif
|
||||
|
||||
// Get the buffer string format of the current type. Used in pybind buffer protocol.
|
||||
// @return
|
||||
/// \return
|
||||
std::string GetPybindFormat() const;
|
||||
|
||||
bool IsSignedInt() const {
|
||||
|
|
|
@ -28,10 +28,12 @@
|
|||
#include "dataset/core/constants.h"
|
||||
#include "dataset/core/cv_tensor.h"
|
||||
#include "dataset/core/global_context.h"
|
||||
#ifdef ENABLE_PYTHON
|
||||
#include "dataset/core/pybind_support.h"
|
||||
namespace py = pybind11;
|
||||
#endif
|
||||
#include "dataset/core/tensor_shape.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
// Helper macros for printing tensor elements
|
||||
|
@ -155,6 +157,7 @@ Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape
|
|||
MS_ASSERT(num_bytes == 0);
|
||||
if (shape.known()) Tensor::Reshape(shape);
|
||||
}
|
||||
|
||||
Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape)
|
||||
: Tensor(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), DataType(DataType::DE_STRING)) {
|
||||
// total bytes needed = offset array + strings
|
||||
|
@ -194,6 +197,7 @@ Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape
|
|||
MS_ASSERT(num_bytes == 0);
|
||||
if (shape.known()) Tensor::Reshape(shape);
|
||||
}
|
||||
|
||||
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape,
|
||||
DataType type, const unsigned char *data) {
|
||||
if (!shape.known()) {
|
||||
|
@ -223,6 +227,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl
|
|||
return Status::OK(); // returns base-class shared_ptr
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) {
|
||||
std::vector<dsize_t> shape;
|
||||
for (dsize_t i = 0; i < arr.ndim(); i++) {
|
||||
|
@ -297,6 +302,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
|
|||
|
||||
return Status::OK(); // returns base-class shared_ptr
|
||||
}
|
||||
#endif
|
||||
|
||||
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
|
||||
const TensorShape &shape) {
|
||||
|
@ -698,21 +704,24 @@ std::vector<dsize_t> Tensor::Strides() {
|
|||
return strides;
|
||||
}
|
||||
|
||||
Status Tensor::GetBufferInfo(Tensor &t, py::buffer_info *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(t.type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(t);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
|
||||
|
||||
std::string format_desc = t.type().GetPybindFormat();
|
||||
std::string format_desc = t->type().GetPybindFormat();
|
||||
if (format_desc.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format");
|
||||
}
|
||||
*out = py::buffer_info(t.GetMutableBuffer(), /* Pointer to buffer */
|
||||
t.type().SizeInBytes(), /* Size of one scalar */
|
||||
format_desc, /* Python struct-style format descriptor */
|
||||
t.Rank(), /* Number of dimensions */
|
||||
t.shape().AsVector(), /* Buffer dimensions */
|
||||
t.Strides());
|
||||
*out = py::buffer_info(t->GetMutableBuffer(), /* Pointer to buffer */
|
||||
t->type().SizeInBytes(), /* Size of one scalar */
|
||||
format_desc, /* Python struct-style format descriptor */
|
||||
t->Rank(), /* Number of dimensions */
|
||||
t->shape().AsVector(), /* Buffer dimensions */
|
||||
t->Strides());
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
|
||||
|
@ -752,6 +761,8 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index)
|
|||
o->swap(sv);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// return data as numpy, should return status
|
||||
Status Tensor::GetDataAsNumpy(py::array *data) {
|
||||
RETURN_UNEXPECTED_IF_NULL(data_);
|
||||
|
@ -815,6 +826,7 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
|
|||
data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data));
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
|
||||
|
||||
|
|
|
@ -26,20 +26,27 @@
|
|||
#undef HAVE_STDDEF_H
|
||||
#undef HAVE_STDLIB_H
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
#include "pybind11/numpy.h"
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "pybind11/stl.h"
|
||||
#endif
|
||||
|
||||
#include "dataset/core/constants.h"
|
||||
#include "dataset/core/data_type.h"
|
||||
#include "dataset/core/tensor_shape.h"
|
||||
#include "dataset/util/allocator.h"
|
||||
#include "dataset/util/status.h"
|
||||
#include "proto/example.pb.h"
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
namespace py = pybind11;
|
||||
#endif
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
class Tensor;
|
||||
template <typename T>
|
||||
class Allocator;
|
||||
|
||||
using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>;
|
||||
using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors
|
||||
|
@ -114,16 +121,17 @@ class Tensor {
|
|||
static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type,
|
||||
const unsigned char *data = nullptr);
|
||||
|
||||
/// Create a copy of the input tensor
|
||||
/// \param out [out] output tensor to be generated
|
||||
/// \param in [in] orginal tensor to be copied
|
||||
/// \return Status
|
||||
// Create a copy of the input tensor
|
||||
// @param out [out] output tensor to be generated
|
||||
// @param in [in] orginal tensor to be copied
|
||||
// @return Status
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) {
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// A static factory method to create a Tensor from a given py::array.
|
||||
// @param ptr output argument to hold the created Tensor
|
||||
// @param arr py::array
|
||||
|
@ -132,6 +140,7 @@ class Tensor {
|
|||
|
||||
// Helper function to create a tensor from Numpy of strings
|
||||
static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr);
|
||||
#endif
|
||||
|
||||
// A static factory method to create a Tensor from a given list of strings.
|
||||
// @param ptr output argument to hold the created Tensor
|
||||
|
@ -170,6 +179,7 @@ class Tensor {
|
|||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) {
|
||||
return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar());
|
||||
}
|
||||
|
||||
// Create tensor from protobuf bytelist with uint8 or int8 types
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
|
||||
const TensorShape &shape, const DataType &type, dsize_t pad_size);
|
||||
|
@ -346,12 +356,12 @@ class Tensor {
|
|||
|
||||
virtual void Squeeze();
|
||||
|
||||
/// Calculates the strides of the Tensor
|
||||
/// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
|
||||
/// The strides will be {6,2,1}.
|
||||
/// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
|
||||
/// The strides will be {24,8,4}.
|
||||
/// @return vector of integers
|
||||
// Calculates the strides of the Tensor
|
||||
// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
|
||||
// The strides will be {6,2,1}.
|
||||
// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
|
||||
// The strides will be {24,8,4}.
|
||||
// @return vector of integers
|
||||
std::vector<dsize_t> Strides();
|
||||
|
||||
std::string ToString() {
|
||||
|
@ -376,6 +386,7 @@ class Tensor {
|
|||
// Slice string tensors
|
||||
Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices);
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Constructs numpy array from input tensor
|
||||
// @param data this data is the location of python data
|
||||
// @return Status code
|
||||
|
@ -383,7 +394,8 @@ class Tensor {
|
|||
|
||||
Status GetDataAsNumpyStrings(py::array *data);
|
||||
|
||||
static Status GetBufferInfo(Tensor &t, py::buffer_info *out);
|
||||
static Status GetBufferInfo(Tensor *t, py::buffer_info *out);
|
||||
#endif
|
||||
|
||||
// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
|
||||
Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
|
||||
|
@ -570,7 +582,7 @@ class Tensor {
|
|||
|
||||
// Return a TensorIterator that points to the start of the Tensor.
|
||||
// It's the user responsibility to use the correct type that matches the Tensor type
|
||||
// @tparam T The type of values in the Tensor
|
||||
// @param T The type of values in the Tensor
|
||||
// @return TensorIterator
|
||||
template <typename T>
|
||||
TensorIterator<T> begin() {
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
|
||||
#include "dataset/core/tensor_row.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
|
|
|
@ -77,6 +77,7 @@ TensorShape::TensorShape(const TensorShape &shape)
|
|||
known_ = shape.known_; // override with the input shape in case of unknown-rank tensor shape.
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
TensorShape::TensorShape(py::list l)
|
||||
: raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
|
||||
std::vector<dsize_t> list_c;
|
||||
|
@ -89,6 +90,7 @@ TensorShape::TensorShape(py::list l)
|
|||
}
|
||||
AddListToShape(list_c);
|
||||
}
|
||||
#endif
|
||||
|
||||
TensorShape::TensorShape(cv::MatSize cv_size, uint32_t type)
|
||||
: raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
|
||||
|
@ -197,6 +199,7 @@ TensorShape TensorShape::AppendDim(dsize_t dim) const {
|
|||
return TensorShape(vec);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::list TensorShape::AsPyList() {
|
||||
py::list list;
|
||||
for (auto i : raw_shape_) {
|
||||
|
@ -204,6 +207,7 @@ py::list TensorShape::AsPyList() {
|
|||
}
|
||||
return list;
|
||||
}
|
||||
#endif
|
||||
|
||||
TensorShape TensorShape::Squeeze() const {
|
||||
std::vector<dsize_t> new_shape;
|
||||
|
|
|
@ -24,13 +24,16 @@
|
|||
|
||||
#include <opencv2/core/mat.hpp>
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
#include "pybind11/pybind11.h"
|
||||
namespace py = pybind11;
|
||||
#endif
|
||||
|
||||
#include "dataset/core/constants.h"
|
||||
#include "dataset/util/status.h"
|
||||
#include "dataset/core/global_context.h"
|
||||
#include "dataset/util/allocator.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
// Class that represents a shape of a Tensor. A shape can be:
|
||||
|
@ -43,7 +46,8 @@ namespace dataset {
|
|||
// -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n
|
||||
// Example: <3,?> (the 1st dim is unknown)\n
|
||||
// <2,?,?,?> (all dims but the 0th dim are unknown)
|
||||
// TensorShape supports any dim > 0 and < 2^31-1
|
||||
|
||||
/// \brief TensorShape supports any dim > 0 and < 2^31-1
|
||||
class TensorShape {
|
||||
public:
|
||||
static constexpr dsize_t kDimUnknown = -1; // constant for an unknown dimension
|
||||
|
@ -51,57 +55,59 @@ class TensorShape {
|
|||
// Force the compiler to not create a no-arg constructor
|
||||
TensorShape() = delete;
|
||||
|
||||
// Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
|
||||
// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
|
||||
// @param list
|
||||
/// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
|
||||
/// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
|
||||
/// \param[in] list
|
||||
explicit TensorShape(const std::initializer_list<dsize_t> &list);
|
||||
|
||||
// Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
|
||||
// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
|
||||
// @param list
|
||||
/// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
|
||||
/// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
|
||||
/// \param[in] list
|
||||
explicit TensorShape(const std::vector<dsize_t> &list);
|
||||
|
||||
// Copy constructor
|
||||
// @param shape
|
||||
/// \brief Copy constructor
|
||||
/// \param[in] shape
|
||||
TensorShape(const TensorShape &shape);
|
||||
|
||||
// construct a TensorShape via a python list
|
||||
// @param py::list l - a list object from python
|
||||
#ifdef ENABLE_PYTHON
|
||||
/// \brief construct a TensorShape via a python list
|
||||
/// \param[in] py::list l - a list object from python
|
||||
explicit TensorShape(py::list l);
|
||||
#endif
|
||||
|
||||
~TensorShape() = default;
|
||||
|
||||
// Create a scalar Shape (i.e., empty shape with mKnown = true)
|
||||
// @return TensorShape
|
||||
/// \brief Create a scalar Shape (i.e., empty shape with mKnown = true)
|
||||
/// \return TensorShape
|
||||
static TensorShape CreateScalar() { return TensorShape({}); }
|
||||
|
||||
// Create a shape with an unknown rank.
|
||||
// @return TensorShape
|
||||
/// \brief Create a shape with an unknown rank.
|
||||
/// \return TensorShape
|
||||
static TensorShape CreateUnknownRankShape();
|
||||
|
||||
// Create a shape with a known rank .
|
||||
// @return TensorShape
|
||||
/// \brief Create a shape with a known rank .
|
||||
/// \return TensorShape
|
||||
static TensorShape CreateUnknownShapeWithRank(dsize_t rank);
|
||||
|
||||
// Insert a new dim into a copy of the current shape.
|
||||
// @param dim to be added
|
||||
// @param axis the index where dim should be added
|
||||
// @return New modified shape
|
||||
/// \brief Insert a new dim into a copy of the current shape.
|
||||
/// \param[in] dim to be added
|
||||
/// \param[in] axis the index where dim should be added
|
||||
/// \return New modified shape
|
||||
TensorShape InsertDim(dsize_t axis, dsize_t dim) const;
|
||||
|
||||
// Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4>
|
||||
// @param dim
|
||||
// @return
|
||||
/// \brief Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4>
|
||||
/// \param[in] dim
|
||||
/// \return
|
||||
TensorShape PrependDim(dsize_t dim) const;
|
||||
|
||||
// Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4>
|
||||
// @param dim
|
||||
// @return
|
||||
/// \brief Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4>
|
||||
/// \param[in] dim
|
||||
/// \return
|
||||
TensorShape AppendDim(dsize_t dim) const;
|
||||
|
||||
// Create a shape based on OpenCV shape and type
|
||||
// @param cv_size
|
||||
// @param type int that represent the type in OpenCV, example CV_8U, CV_64S
|
||||
/// \brief Create a shape based on OpenCV shape and type
|
||||
/// \param[in] cv_size
|
||||
/// \param[in] type int that represent the type in OpenCV, example CV_8U, CV_64S
|
||||
TensorShape(cv::MatSize cv_size, uint32_t type);
|
||||
|
||||
dsize_t Size() const { return raw_shape_.size(); }
|
||||
|
@ -123,47 +129,50 @@ class TensorShape {
|
|||
return raw_shape_[index];
|
||||
}
|
||||
|
||||
// Return the Shape as a vector
|
||||
// @return
|
||||
/// \brief Return the Shape as a vector
|
||||
/// \return
|
||||
std::vector<dsize_t> AsVector() const;
|
||||
|
||||
// Returns the class info as a string
|
||||
// @return
|
||||
/// \brief Returns the class info as a string
|
||||
/// \return
|
||||
std::string ToString() const {
|
||||
std::stringstream ss;
|
||||
ss << *this;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
// Actual print function used by operator<<
|
||||
// @param out output string stream
|
||||
/// \brief Actual print function used by operator<<
|
||||
/// \param out output string stream
|
||||
void Print(std::ostream &out) const;
|
||||
|
||||
// << Stream output operator overload
|
||||
// @notes This allows you to print the info using stream operators
|
||||
// @param out - reference to the output stream being overloaded
|
||||
// @param rO - reference to the TensorShape to display
|
||||
// @return - the output stream must be returned
|
||||
/// \brief << Stream output operator overload
|
||||
/// This allows you to print the info using stream operators
|
||||
/// \param[in] out - reference to the output stream being overloaded
|
||||
/// \param[in] rO - reference to the TensorShape to display
|
||||
/// \return - the output stream must be returned
|
||||
friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) {
|
||||
so.Print(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::list AsPyList();
|
||||
#endif
|
||||
|
||||
// Checks if the given index is a valid index for this tensor.
|
||||
// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
|
||||
// @param index
|
||||
// @return bool
|
||||
/// \brief Checks if the given index is a valid index for this tensor.
|
||||
/// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
|
||||
/// \param[in] index
|
||||
/// \return bool
|
||||
bool IsValidIndex(const std::vector<dsize_t> &index) const;
|
||||
|
||||
TensorShape Squeeze() const;
|
||||
|
||||
std::vector<dsize_t> Strides() const;
|
||||
|
||||
// Returns the location of the item assuming row major memory layout.
|
||||
// @param index
|
||||
// @return
|
||||
/// \brief Returns the location of the item assuming row major memory layout.
|
||||
/// \param[in] index
|
||||
/// \param[out] flat_index
|
||||
/// \return
|
||||
Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const;
|
||||
|
||||
private:
|
||||
|
@ -174,11 +183,11 @@ class TensorShape {
|
|||
// Vector to keep the strides of the shape. The size is rank+1
|
||||
std::vector<dsize_t, IntAlloc> strides_;
|
||||
|
||||
// Internal utility function to iterate over a list, check if the dim is valid and then insert it into the shape.
|
||||
// @tparam T list
|
||||
// @param list Iterable list
|
||||
// @return true if the shape is valid and no overflow would be generated when counting the number of elements.
|
||||
// False otherwise.
|
||||
/// \brief Internal utility function to iterate over a list,
|
||||
/// check if the dim is valid and then insert it into the shape.
|
||||
/// \param[in] list Iterable list
|
||||
/// \return true if the shape is valid and no overflow would be generated when counting the number of elements.
|
||||
/// False otherwise.
|
||||
template <typename T>
|
||||
void AddListToShape(const T &list);
|
||||
};
|
||||
|
|
|
@ -2,13 +2,12 @@ add_subdirectory(source)
|
|||
|
||||
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||
add_library(engine-datasetops OBJECT
|
||||
|
||||
set(DATASET_ENGINE_DATASETOPS_SRC_FILES
|
||||
dataset_op.cc
|
||||
parallel_op.cc
|
||||
pipeline_op.cc
|
||||
barrier_op.cc
|
||||
batch_op.cc
|
||||
bucket_batch_by_length_op.cc
|
||||
device_queue_op.cc
|
||||
map_op.cc
|
||||
project_op.cc
|
||||
|
@ -19,7 +18,17 @@ add_library(engine-datasetops OBJECT
|
|||
shuffle_op.cc
|
||||
zip_op.cc
|
||||
concat_op.cc
|
||||
filter_op.cc
|
||||
build_vocab_op.cc
|
||||
)
|
||||
|
||||
if (ENABLE_PYTHON)
|
||||
set(DATASET_ENGINE_DATASETOPS_SRC_FILES
|
||||
${DATASET_ENGINE_DATASETOPS_SRC_FILES}
|
||||
bucket_batch_by_length_op.cc
|
||||
barrier_op.cc
|
||||
filter_op.cc
|
||||
build_vocab_op.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(engine-datasetops OBJECT ${DATASET_ENGINE_DATASETOPS_SRC_FILES})
|
||||
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
#include <iomanip>
|
||||
|
||||
#include "common/utils.h"
|
||||
#ifdef ENABLE_PYTHON
|
||||
#include "dataset/core/pybind_support.h"
|
||||
#endif
|
||||
#include "dataset/engine/data_buffer.h"
|
||||
#include "dataset/engine/db_connector.h"
|
||||
#include "dataset/engine/opt/pass.h"
|
||||
|
@ -38,9 +40,14 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa
|
|||
|
||||
Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) {
|
||||
RETURN_IF_NOT_OK(SanityCheck());
|
||||
#ifdef ENABLE_PYTHON
|
||||
*ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
|
||||
builder_num_workers_, builder_cols_to_map_, builder_batch_size_func_,
|
||||
builder_batch_map_func_, builder_pad_map_);
|
||||
#else
|
||||
*ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
|
||||
builder_num_workers_, builder_cols_to_map_, builder_pad_map_);
|
||||
#endif
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -52,6 +59,7 @@ Status BatchOp::Builder::SanityCheck() {
|
|||
return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err));
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
|
||||
const std::vector<std::string> &cols_to_map, py::function batch_size_func, py::function batch_map_func,
|
||||
PadInfo pad_map)
|
||||
|
@ -65,6 +73,18 @@ BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size,
|
|||
pad_info_(pad_map) {
|
||||
worker_queues_.Init(num_workers, op_queue_size);
|
||||
}
|
||||
#else
|
||||
BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
|
||||
const std::vector<std::string> &cols_to_map, PadInfo pad_map)
|
||||
: ParallelOp(num_workers, op_queue_size),
|
||||
start_batch_size_(batch_size),
|
||||
drop_(drop),
|
||||
pad_(pad),
|
||||
pyfunc_column_names_(cols_to_map),
|
||||
pad_info_(pad_map) {
|
||||
worker_queues_.Init(num_workers, op_queue_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
Status BatchOp::operator()() {
|
||||
Status rc = LaunchThreadsAndInitOp();
|
||||
|
@ -206,7 +226,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
|
|||
Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair,
|
||||
std::unique_ptr<DataBuffer> *db) {
|
||||
RETURN_UNEXPECTED_IF_NULL(table_pair.first);
|
||||
if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc
|
||||
#ifdef ENABLE_PYTHON
|
||||
if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc
|
||||
#endif
|
||||
if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair.first, pad_info_, column_name_id_map_)); // do padding if needed
|
||||
(*db) = std::make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone);
|
||||
std::unique_ptr<TensorQTable> dest_table = std::make_unique<TensorQTable>();
|
||||
|
@ -229,6 +251,7 @@ Status BatchOp::EoeReceived(int32_t) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) {
|
||||
TensorBatchTable input_table;
|
||||
input_table.reserve(pyfunc_column_names_.size());
|
||||
|
@ -259,16 +282,22 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
|
|||
}
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) {
|
||||
#ifdef ENABLE_PYTHON
|
||||
if (batch_size_func_ != nullptr) {
|
||||
RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info));
|
||||
} else {
|
||||
(*batch_size) = start_batch_size_;
|
||||
}
|
||||
#else
|
||||
(*batch_size) = start_batch_size_;
|
||||
#endif
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
|
||||
{
|
||||
// Acquire Python GIL
|
||||
|
@ -336,6 +365,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou
|
|||
}
|
||||
return Status(StatusCode::kOK);
|
||||
}
|
||||
#endif
|
||||
|
||||
Status BatchOp::PadColumns(std::unique_ptr<TensorQTable> *table, const PadInfo &pad_info,
|
||||
const std::unordered_map<std::string, int32_t> &column_name_id_map) {
|
||||
|
|
|
@ -89,6 +89,7 @@ class BatchOp : public ParallelOp {
|
|||
return *this;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// set columns to perform map on
|
||||
// @param const std::vector<std::string> & cols_to_map - name of columns to perform map on
|
||||
// @return Builder & reference to builder class object
|
||||
|
@ -104,6 +105,7 @@ class BatchOp : public ParallelOp {
|
|||
builder_batch_size_func_ = batch_size_func;
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
// @param std::shared_ptr<BatchOp> *ptr pointer to shared_ptr, actual return arg
|
||||
// @return Status - The error code return
|
||||
|
@ -121,8 +123,10 @@ class BatchOp : public ParallelOp {
|
|||
int32_t builder_op_connector_size_;
|
||||
std::vector<std::string> builder_cols_to_map_;
|
||||
PadInfo builder_pad_map_;
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::function builder_batch_size_func_;
|
||||
py::function builder_batch_map_func_;
|
||||
#endif
|
||||
};
|
||||
|
||||
enum batchCtrl : int8_t { kNoCtrl = 0, kEOE = 1, kEOF = 2, kQuit = 3 };
|
||||
|
@ -144,6 +148,7 @@ class BatchOp : public ParallelOp {
|
|||
const int64_t get_epoch_num() const { return epoch_num_; }
|
||||
};
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// BatchOp constructor
|
||||
// @param int32_t batch_size
|
||||
// @param bool drop
|
||||
|
@ -152,6 +157,10 @@ class BatchOp : public ParallelOp {
|
|||
// @param int32_t num_workers
|
||||
BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
|
||||
const std::vector<std::string> &, py::function batch_size_func, py::function batch_map_func, PadInfo pad_map);
|
||||
#else
|
||||
BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
|
||||
const std::vector<std::string> &, PadInfo pad_map);
|
||||
#endif
|
||||
|
||||
// BatchOp destructor
|
||||
~BatchOp() {}
|
||||
|
@ -219,10 +228,13 @@ class BatchOp : public ParallelOp {
|
|||
// @return Status - The error code return
|
||||
Status MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair,
|
||||
std::unique_ptr<DataBuffer> *db);
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Function that calls pyfunc to perform map on batch
|
||||
// @param (std::pair<std::unique_ptr<TensorQTable>, batch_stats> *table_pair - contains un-batched tensor
|
||||
// @return Status - The error code return
|
||||
Status MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair);
|
||||
#endif
|
||||
|
||||
// @param const PadInfo &pad_info pad info to unpack
|
||||
// @param const std::unordered_map<std::string, int32_t>& column_name_id_map - column names to index mapping
|
||||
|
@ -247,6 +259,7 @@ class BatchOp : public ParallelOp {
|
|||
// @return Status - The error code return
|
||||
Status LaunchThreadsAndInitOp();
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Invoke batch size function with current BatchInfo to generate batch size.
|
||||
// @return Status - The error code return
|
||||
Status InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info);
|
||||
|
@ -254,6 +267,7 @@ class BatchOp : public ParallelOp {
|
|||
// Invoke batch map function with current BatchInfo to generate tensors to batch.
|
||||
// @return Status - The error code return
|
||||
Status InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info);
|
||||
#endif
|
||||
|
||||
int32_t start_batch_size_;
|
||||
bool drop_; // bool for whether to drop remainder or not
|
||||
|
@ -262,8 +276,10 @@ class BatchOp : public ParallelOp {
|
|||
PadInfo pad_info_; // column names to perform padding on
|
||||
std::unique_ptr<ChildIterator> child_iterator_; // child iterator for fetching TensorRows 1 by 1
|
||||
QueueList<std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>> worker_queues_; // internal queue for syncing worker
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::function batch_size_func_; // Function pointer of batch size function
|
||||
py::function batch_map_func_; // Function pointer of per batch map function
|
||||
#endif
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -1,19 +1,32 @@
|
|||
add_subdirectory(sampler)
|
||||
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||
add_library(engine-datasetops-source OBJECT
|
||||
generator_op.cc
|
||||
|
||||
set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
|
||||
io_block.cc
|
||||
mindrecord_op.cc
|
||||
tf_reader_op.cc
|
||||
image_folder_op.cc
|
||||
mnist_op.cc
|
||||
voc_op.cc
|
||||
coco_op.cc
|
||||
manifest_op.cc
|
||||
cifar_op.cc
|
||||
random_data_op.cc
|
||||
celeba_op.cc
|
||||
text_file_op.cc
|
||||
clue_op.cc
|
||||
)
|
||||
|
||||
set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
|
||||
${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}
|
||||
mindrecord_op.cc
|
||||
tf_reader_op.cc
|
||||
)
|
||||
|
||||
if (ENABLE_PYTHON)
|
||||
set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
|
||||
${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}
|
||||
generator_op.cc
|
||||
voc_op.cc
|
||||
manifest_op.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(engine-datasetops-source OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES})
|
|
@ -1,12 +1,21 @@
|
|||
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||
add_library(engine-datasetops-source-sampler OBJECT
|
||||
|
||||
set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
|
||||
distributed_sampler.cc
|
||||
pk_sampler.cc
|
||||
python_sampler.cc
|
||||
random_sampler.cc
|
||||
sampler.cc
|
||||
sequential_sampler.cc
|
||||
subset_random_sampler.cc
|
||||
weighted_random_sampler.cc
|
||||
)
|
||||
|
||||
if (ENABLE_PYTHON)
|
||||
set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
|
||||
${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES}
|
||||
python_sampler.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(engine-datasetops-source-sampler OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES})
|
||||
|
|
|
@ -89,6 +89,7 @@ void Sampler::Print(std::ostream &out, bool show_all) const {
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status Sampler::GetAllIdsThenReset(py::array *data) {
|
||||
std::unique_ptr<DataBuffer> db;
|
||||
std::shared_ptr<Tensor> sample_ids;
|
||||
|
@ -120,6 +121,7 @@ Status Sampler::GetAllIdsThenReset(py::array *data) {
|
|||
RETURN_IF_NOT_OK(ResetSampler());
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
Status Sampler::SetNumSamples(int64_t num_samples) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "num_samples is negative");
|
||||
|
|
|
@ -74,8 +74,11 @@ class Sampler {
|
|||
// @return - The error code return
|
||||
virtual Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) = 0;
|
||||
|
||||
// This function only called by python layer. Not needed by Android.
|
||||
#ifdef ENABLE_PYTHON
|
||||
// return all ids in one epoch as a numpy array, then call reset
|
||||
Status GetAllIdsThenReset(py::array *data);
|
||||
#endif
|
||||
|
||||
// for next epoch of sampleIds
|
||||
// @return - The error code return
|
||||
|
@ -155,5 +158,4 @@ class Sampler {
|
|||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SAMPLER_H_
|
||||
|
|
|
@ -429,6 +429,7 @@ Status Graph::GetMetaInfo(MetaInfo *meta_info) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status Graph::GraphInfo(py::dict *out) {
|
||||
MetaInfo meta_info;
|
||||
RETURN_IF_NOT_OK(GetMetaInfo(&meta_info));
|
||||
|
@ -440,6 +441,7 @@ Status Graph::GraphInfo(py::dict *out) {
|
|||
(*out)["edge_feature_type"] = py::cast(meta_info.edge_feature_type);
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
Status Graph::LoadNodeAndEdge() {
|
||||
GraphLoader gl(dataset_file_, num_workers_);
|
||||
|
|
|
@ -140,8 +140,10 @@ class Graph {
|
|||
// @return Status - The error code return
|
||||
Status GetMetaInfo(MetaInfo *meta_info);
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Return meta information to python layer
|
||||
Status GraphInfo(py::dict *out);
|
||||
#endif
|
||||
|
||||
Status Init();
|
||||
|
||||
|
|
|
@ -21,13 +21,15 @@
|
|||
#include "dataset/engine/datasetops/map_op.h"
|
||||
#include "dataset/engine/datasetops/project_op.h"
|
||||
#include "dataset/engine/datasetops/rename_op.h"
|
||||
#include "dataset/engine/datasetops/filter_op.h"
|
||||
#include "dataset/engine/datasetops/repeat_op.h"
|
||||
#include "dataset/engine/datasetops/skip_op.h"
|
||||
#include "dataset/engine/datasetops/shuffle_op.h"
|
||||
#include "dataset/engine/datasetops/source/generator_op.h"
|
||||
#include "dataset/engine/datasetops/source/mindrecord_op.h"
|
||||
#include "dataset/engine/datasetops/source/tf_reader_op.h"
|
||||
#ifdef ENABLE_PYTHON
|
||||
#include "dataset/engine/datasetops/filter_op.h"
|
||||
#include "dataset/engine/datasetops/source/generator_op.h"
|
||||
#endif
|
||||
#include "dataset/engine/datasetops/source/image_folder_op.h"
|
||||
#include "dataset/engine/datasetops/take_op.h"
|
||||
#include "dataset/engine/datasetops/zip_op.h"
|
||||
|
@ -111,11 +113,6 @@ Status NodePass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) {
|
|||
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
|
||||
}
|
||||
|
||||
Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
|
||||
// Fallback to base class visitor by default
|
||||
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
|
||||
}
|
||||
|
||||
Status NodePass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) {
|
||||
// Fallback to base class visitor by default
|
||||
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
|
||||
|
@ -126,11 +123,6 @@ Status NodePass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
|
|||
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
|
||||
}
|
||||
|
||||
Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
|
||||
// Fallback to base class visitor by default
|
||||
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
|
||||
}
|
||||
|
||||
Status NodePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
|
||||
// Fallback to base class visitor by default
|
||||
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
|
||||
|
@ -141,6 +133,18 @@ Status NodePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) {
|
|||
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
|
||||
// Fallback to base class visitor by default
|
||||
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
|
||||
}
|
||||
|
||||
Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
|
||||
// Fallback to base class visitor by default
|
||||
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
|
||||
}
|
||||
#endif
|
||||
|
||||
Status NodePass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) {
|
||||
// Fallback to base class visitor by default
|
||||
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
|
||||
|
|
|
@ -33,18 +33,20 @@ class ProjectOp;
|
|||
|
||||
class RenameOp;
|
||||
|
||||
class FilterOp;
|
||||
|
||||
class SkipOp;
|
||||
|
||||
class ShuffleOp;
|
||||
|
||||
class GeneratorOp;
|
||||
|
||||
class MindRecordOp;
|
||||
|
||||
class TFReaderOp;
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
class FilterOp;
|
||||
|
||||
class GeneratorOp;
|
||||
#endif
|
||||
|
||||
class TakeOp;
|
||||
|
||||
class ZipOp;
|
||||
|
@ -122,18 +124,20 @@ class NodePass : public Pass {
|
|||
|
||||
virtual Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified);
|
||||
|
||||
virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified);
|
||||
|
||||
virtual Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified);
|
||||
|
||||
virtual Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified);
|
||||
|
||||
virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified);
|
||||
|
||||
virtual Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified);
|
||||
|
||||
virtual Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified);
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified);
|
||||
|
||||
virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified);
|
||||
#endif
|
||||
|
||||
virtual Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified);
|
||||
|
||||
virtual Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified);
|
||||
|
|
|
@ -50,12 +50,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
|
||||
*modified = false;
|
||||
std::cout << "Visiting FilterOp" << '\n';
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PrinterPass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) {
|
||||
*modified = false;
|
||||
std::cout << "Visiting SkipOp" << '\n';
|
||||
|
@ -67,11 +61,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
|
||||
*modified = false;
|
||||
std::cout << "Visiting GeneratorOp" << '\n';
|
||||
return Status::OK();
|
||||
}
|
||||
Status PrinterPass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
|
||||
*modified = false;
|
||||
std::cout << "Visiting MindRecordOp" << '\n';
|
||||
|
@ -84,6 +73,20 @@ Status PrinterPass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified)
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
|
||||
*modified = false;
|
||||
std::cout << "Visiting FilterOp" << '\n';
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
|
||||
*modified = false;
|
||||
std::cout << "Visiting GeneratorOp" << '\n';
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
Status PrinterPass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) {
|
||||
*modified = false;
|
||||
std::cout << "Visiting TakeOp" << '\n';
|
||||
|
|
|
@ -35,18 +35,20 @@ class PrinterPass : public NodePass {
|
|||
|
||||
Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) override;
|
||||
|
||||
Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override;
|
||||
|
||||
Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) override;
|
||||
|
||||
Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) override;
|
||||
|
||||
Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override;
|
||||
|
||||
Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) override;
|
||||
|
||||
Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) override;
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override;
|
||||
|
||||
Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override;
|
||||
#endif
|
||||
|
||||
Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) override;
|
||||
|
||||
Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified) override;
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
../../../core/constants.h
|
|
@ -0,0 +1 @@
|
|||
../../../core/data_type.h
|
|
@ -0,0 +1 @@
|
|||
../../../core/tensor_shape.h
|
|
@ -0,0 +1 @@
|
|||
../../../util/status.h
|
|
@ -0,0 +1,357 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef DATASET_INCLUDE_DATASETS_H_
|
||||
#define DATASET_INCLUDE_DATASETS_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include "dataset/include/tensor.h"
|
||||
#include "dataset/include/iterator.h"
|
||||
#include "dataset/include/samplers.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// Forward declare
|
||||
class DatasetOp;
|
||||
class DataSchema;
|
||||
class Tensor;
|
||||
class TensorShape;
|
||||
|
||||
namespace api {
|
||||
|
||||
class TensorOperation;
|
||||
class SamplerObj;
|
||||
class ImageFolderDataset;
|
||||
class MnistDataset;
|
||||
class BatchDataset;
|
||||
class RepeatDataset;
|
||||
class MapDataset;
|
||||
class ShuffleDataset;
|
||||
class Cifar10Dataset;
|
||||
class ProjectDataset;
|
||||
|
||||
/// \brief Function to create an ImageFolderDataset
|
||||
/// \notes A source dataset that reads images from a tree of directories
|
||||
/// All images within one folder have the same label
|
||||
/// The generated dataset has two columns ['image', 'label']
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] decode A flag to decode in ImageFolder
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
|
||||
/// A `RandomSampler` will be used to randomly iterate the entire dataset
|
||||
/// \param[in] extensions File extensions to be read
|
||||
/// \param[in] class_indexing a class name to label map
|
||||
/// \return Shared pointer to the current ImageFolderDataset
|
||||
std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode = false,
|
||||
std::shared_ptr<SamplerObj> sampler = nullptr,
|
||||
std::set<std::string> extensions = {},
|
||||
std::map<std::string, int32_t> class_indexing = {});
|
||||
|
||||
/// \brief Function to create a MnistDataset
|
||||
/// \notes The generated dataset has two columns ['image', 'label']
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
|
||||
/// A `RandomSampler` will be used to randomly iterate the entire dataset
|
||||
/// \return Shared pointer to the current MnistDataset
|
||||
std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler = nullptr);
|
||||
|
||||
/// \brief Function to create a Cifar10 Dataset
|
||||
/// \notes The generated dataset has two columns ['image', 'label']
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] num_samples The number of images to be included in the dataset
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
|
||||
/// will be used to randomly iterate the entire dataset
|
||||
/// \return Shared pointer to the current Dataset
|
||||
std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples,
|
||||
std::shared_ptr<SamplerObj> sampler);
|
||||
|
||||
/// \class Dataset datasets.h
|
||||
/// \brief A base class to represent a dataset in the data pipeline.
|
||||
class Dataset : public std::enable_shared_from_this<Dataset> {
|
||||
public:
|
||||
friend class Iterator;
|
||||
|
||||
/// \brief Constructor
|
||||
Dataset();
|
||||
|
||||
/// \brief Destructor
|
||||
~Dataset() = default;
|
||||
|
||||
/// \brief Pure virtual function to convert a Dataset class into a runtime dataset object
|
||||
/// \return shared pointer to the list of newly created DatasetOps
|
||||
virtual std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() = 0;
|
||||
|
||||
/// \brief Pure virtual function for derived class to implement parameters validation
|
||||
/// \return bool True if all the params are valid
|
||||
virtual bool ValidateParams() = 0;
|
||||
|
||||
/// \brief Setter function for runtime number of workers
|
||||
/// \param[in] num_workers The number of threads in this operator
|
||||
/// \return Shared pointer to the original object
|
||||
std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers) {
|
||||
num_workers_ = num_workers;
|
||||
return shared_from_this();
|
||||
}
|
||||
|
||||
/// \brief Function to create an Iterator over the Dataset pipeline
|
||||
/// \return Shared pointer to the Iterator
|
||||
std::shared_ptr<Iterator> CreateIterator();
|
||||
|
||||
/// \brief Function to create a BatchDataset
|
||||
/// \notes Combines batch_size number of consecutive rows into batches
|
||||
/// \param[in] batch_size Path to the root directory that contains the dataset
|
||||
/// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
|
||||
/// batch. If true, and if there are less than batch_size rows
|
||||
/// available to make the last batch, then those rows will
|
||||
/// be dropped and not propagated to the next node
|
||||
/// \return Shared pointer to the current BatchDataset
|
||||
std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false);
|
||||
|
||||
/// \brief Function to create a RepeatDataset
|
||||
/// \notes Repeats this dataset count times. Repeat indefinitely if count is -1
|
||||
/// \param[in] count Number of times the dataset should be repeated
|
||||
/// \return Shared pointer to the current Dataset
|
||||
/// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset`
|
||||
/// due to a limitation in the current implementation
|
||||
std::shared_ptr<Dataset> Repeat(int32_t count = -1);
|
||||
|
||||
/// \brief Function to create a MapDataset
|
||||
/// \notes Applies each operation in operations to this dataset
|
||||
/// \param[in] operations Vector of operations to be applied on the dataset. Operations are
|
||||
/// applied in the order they appear in this list
|
||||
/// \param[in] input_columns Vector of the names of the columns that will be passed to the first
|
||||
/// operation as input. The size of this list must match the number of
|
||||
/// input columns expected by the first operator. The default input_columns
|
||||
/// is the first column
|
||||
/// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation
|
||||
/// This parameter is mandatory if len(input_columns) != len(output_columns)
|
||||
/// The size of this list must match the number of output columns of the
|
||||
/// last operation. The default output_columns will have the same
|
||||
/// name as the input columns, i.e., the columns will be replaced
|
||||
/// \param[in] project_columns A list of column names to project
|
||||
/// \return Shared pointer to the current MapDataset
|
||||
std::shared_ptr<MapDataset> Map(std::vector<std::shared_ptr<TensorOperation>> operations,
|
||||
std::vector<std::string> input_columns = {},
|
||||
std::vector<std::string> output_columns = {},
|
||||
const std::vector<std::string> &project_columns = {});
|
||||
|
||||
/// \brief Function to create a Shuffle Dataset
|
||||
/// \notes Randomly shuffles the rows of this dataset
|
||||
/// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling
|
||||
/// \return Shared pointer to the current ShuffleDataset
|
||||
std::shared_ptr<ShuffleDataset> Shuffle(int32_t shuffle_size);
|
||||
|
||||
/// \brief Function to create a Project Dataset
|
||||
/// \notes Applies project to the dataset
|
||||
/// \param[in] columns The name of columns to project
|
||||
/// \return Shared pointer to the current Dataset
|
||||
std::shared_ptr<ProjectDataset> Project(const std::vector<std::string> &columns);
|
||||
|
||||
protected:
|
||||
std::vector<std::shared_ptr<Dataset>> children;
|
||||
std::shared_ptr<Dataset> parent;
|
||||
|
||||
int32_t num_workers_;
|
||||
int32_t rows_per_buffer_;
|
||||
int32_t connector_que_size_;
|
||||
};
|
||||
|
||||
/* ####################################### Derived Dataset classes ################################# */
|
||||
|
||||
/// \class ImageFolderDataset
|
||||
/// \brief A Dataset derived class to represent ImageFolder dataset
|
||||
class ImageFolderDataset : public Dataset {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive,
|
||||
std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing);
|
||||
|
||||
/// \brief Destructor
|
||||
~ImageFolderDataset() = default;
|
||||
|
||||
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
||||
/// \return shared pointer to the list of newly created DatasetOps
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
|
||||
|
||||
/// \brief Parameters validation
|
||||
/// \return bool true if all the params are valid
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::string dataset_dir_;
|
||||
bool decode_;
|
||||
bool recursive_;
|
||||
std::shared_ptr<SamplerObj> sampler_;
|
||||
std::map<std::string, int32_t> class_indexing_;
|
||||
std::set<std::string> exts_;
|
||||
};
|
||||
|
||||
class MnistDataset : public Dataset {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler);
|
||||
|
||||
/// \brief Destructor
|
||||
~MnistDataset() = default;
|
||||
|
||||
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
||||
/// \return shared pointer to the list of newly created DatasetOps
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
|
||||
|
||||
/// \brief Parameters validation
|
||||
/// \return bool true if all the params are valid
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::string dataset_dir_;
|
||||
std::shared_ptr<SamplerObj> sampler_;
|
||||
};
|
||||
|
||||
class BatchDataset : public Dataset {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map,
|
||||
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map);
|
||||
|
||||
/// \brief Destructor
|
||||
~BatchDataset() = default;
|
||||
|
||||
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
||||
/// \return shared pointer to the list of newly created DatasetOps
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
|
||||
|
||||
/// \brief Parameters validation
|
||||
/// \return bool true if all the params are valid
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
int32_t batch_size_;
|
||||
bool drop_remainder_;
|
||||
bool pad_;
|
||||
std::vector<std::string> cols_to_map_;
|
||||
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_;
|
||||
};
|
||||
|
||||
class RepeatDataset : public Dataset {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
explicit RepeatDataset(uint32_t count);
|
||||
|
||||
/// \brief Destructor
|
||||
~RepeatDataset() = default;
|
||||
|
||||
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
||||
/// \return shared pointer to the list of newly created DatasetOps
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
|
||||
|
||||
/// \brief Parameters validation
|
||||
/// \return bool true if all the params are valid
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
uint32_t repeat_count_;
|
||||
};
|
||||
|
||||
class ShuffleDataset : public Dataset {
|
||||
public:
|
||||
ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch);
|
||||
|
||||
~ShuffleDataset() = default;
|
||||
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
int32_t shuffle_size_;
|
||||
uint32_t shuffle_seed_;
|
||||
bool reset_every_epoch_;
|
||||
};
|
||||
|
||||
class MapDataset : public Dataset {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns = {},
|
||||
std::vector<std::string> output_columns = {}, const std::vector<std::string> &columns = {});
|
||||
|
||||
/// \brief Destructor
|
||||
~MapDataset() = default;
|
||||
|
||||
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
||||
/// \return shared pointer to the list of newly created DatasetOps
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
|
||||
|
||||
/// \brief Parameters validation
|
||||
/// \return bool true if all the params are valid
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<TensorOperation>> operations_;
|
||||
std::vector<std::string> input_columns_;
|
||||
std::vector<std::string> output_columns_;
|
||||
std::vector<std::string> project_columns_;
|
||||
};
|
||||
|
||||
class Cifar10Dataset : public Dataset {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler);
|
||||
|
||||
/// \brief Destructor
|
||||
~Cifar10Dataset() = default;
|
||||
|
||||
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
||||
/// \return shared pointer to the list of newly created DatasetOps
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
|
||||
|
||||
/// \brief Parameters validation
|
||||
/// \return bool true if all the params are valid
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::string dataset_dir_;
|
||||
int32_t num_samples_;
|
||||
std::shared_ptr<SamplerObj> sampler_;
|
||||
};
|
||||
|
||||
class ProjectDataset : public Dataset {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
explicit ProjectDataset(const std::vector<std::string> &columns);
|
||||
|
||||
/// \brief Destructor
|
||||
~ProjectDataset() = default;
|
||||
|
||||
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
||||
/// \return shared pointer to the list of newly created DatasetOps
|
||||
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
|
||||
|
||||
/// \brief Parameters validation
|
||||
/// \return bool true if all the params are valid
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::vector<std::string> columns_;
|
||||
};
|
||||
} // namespace api
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // DATASET_INCLUDE_DATASETS_H_
|
|
@ -0,0 +1,115 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef DATASET_INCLUDE_ITERATOR_H_
|
||||
#define DATASET_INCLUDE_ITERATOR_H_
|
||||
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "dataset/include/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// Forward declare
|
||||
class ExecutionTree;
|
||||
class DatasetIterator;
|
||||
class DatasetOp;
|
||||
class Tensor;
|
||||
|
||||
namespace api {
|
||||
|
||||
class Dataset;
|
||||
|
||||
using TensorMap = std::unordered_map<std::string, std::shared_ptr<Tensor>>;
|
||||
|
||||
// Abstract class for iterating over the dataset.
|
||||
class Iterator {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
Iterator() = default;
|
||||
|
||||
/// \brief Destructor
|
||||
~Iterator() = default;
|
||||
|
||||
/// \brief Method for building and launching the pipeline.
|
||||
/// \param[in] ops - a vector of DatasetOp in the data pipeline.
|
||||
/// \return - a Status error code, returns OK if no error encountered.
|
||||
Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds);
|
||||
|
||||
/// \brief Function to get the next row from the data pipeline.
|
||||
/// \param[out] row - the output tensor row.
|
||||
void GetNextRow(TensorMap *row);
|
||||
|
||||
/// \brief Function to shut down the data pipeline.
|
||||
void Stop();
|
||||
|
||||
class _Iterator {
|
||||
public:
|
||||
explicit _Iterator(Iterator *lt) : lt_{lt}, cur_row_{nullptr} {
|
||||
if (lt_) {
|
||||
cur_row_ = new TensorMap();
|
||||
lt_->GetNextRow(cur_row_);
|
||||
}
|
||||
}
|
||||
|
||||
// Destructor
|
||||
~_Iterator() {
|
||||
if (cur_row_) {
|
||||
delete cur_row_;
|
||||
}
|
||||
}
|
||||
|
||||
_Iterator &operator++() {
|
||||
if (lt_) {
|
||||
++ind_;
|
||||
lt_->GetNextRow(cur_row_);
|
||||
}
|
||||
if (cur_row_ && cur_row_->size() == 0) {
|
||||
delete cur_row_;
|
||||
cur_row_ = nullptr;
|
||||
}
|
||||
return *this;
|
||||
} // prefix ++ overload
|
||||
TensorMap &operator*() { return *cur_row_; } // dereference operator
|
||||
TensorMap *operator->() { return cur_row_; }
|
||||
|
||||
bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; }
|
||||
|
||||
private:
|
||||
int ind_; // the cur node our Iterator points to
|
||||
Iterator *lt_;
|
||||
TensorMap *cur_row_;
|
||||
};
|
||||
|
||||
_Iterator begin() { return _Iterator(this); }
|
||||
|
||||
_Iterator end() { return _Iterator(nullptr); }
|
||||
|
||||
private:
|
||||
// Runtime tree.
|
||||
// Use shared_ptr instead of unique_ptr because the DatasetIterator constructor takes in a shared_ptr type.
|
||||
std::shared_ptr<ExecutionTree> tree_;
|
||||
|
||||
// Runtime iterator
|
||||
std::unique_ptr<DatasetIterator> iterator_;
|
||||
};
|
||||
} // namespace api
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // DATASET_INCLUDE_ITERATOR_H_
|
|
@ -0,0 +1,199 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef DATASET_API_SAMPLERS_H_
|
||||
#define DATASET_API_SAMPLERS_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// Internal Sampler class forward declaration
|
||||
class Sampler;
|
||||
|
||||
namespace api {
|
||||
|
||||
class SamplerObj : public std::enable_shared_from_this<SamplerObj> {
|
||||
public:
|
||||
SamplerObj();
|
||||
|
||||
~SamplerObj() = default;
|
||||
|
||||
virtual std::shared_ptr<Sampler> Build() = 0;
|
||||
virtual bool ValidateParams() = 0;
|
||||
};
|
||||
|
||||
class DistributedSamplerObj;
|
||||
class PKSamplerObj;
|
||||
class RandomSamplerObj;
|
||||
class SequentialSamplerObj;
|
||||
class SubsetRandomSamplerObj;
|
||||
class WeightedRandomSamplerObj;
|
||||
|
||||
/// Function to create a Distributed Sampler.
|
||||
/// \notes A Sampler that access a shard of the dataset.
|
||||
/// \param[in] num_shards - Number of shards to divide the dataset into.
|
||||
/// \param[in] shard_id - Shard ID of the current shard within num_shards.
|
||||
/// \param[in] shuffle - If true, the indices are shuffled.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \param[in] seed - The seed in use when shuffle is true.
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true,
|
||||
int64_t num_samples = 0, uint32_t seed = 1);
|
||||
|
||||
/// Function to create a PK Sampler.
|
||||
/// \notes Samples K elements for each P class in the dataset.
|
||||
/// This will sample all classes.
|
||||
/// \param[in] num_val - Number of elements to sample for each class.
|
||||
/// \param[in] shuffle - If true, the class IDs are shuffled.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0);
|
||||
|
||||
/// Function to create a Random Sampler.
|
||||
/// \notes Samples the elements randomly.
|
||||
/// \param[in] replacement - If True, put the sample ID back for the next draw.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement = false, int64_t num_samples = 0);
|
||||
|
||||
/// Function to create a Sequential Sampler.
|
||||
/// \notes Samples the dataset elements sequentially, same as not having a sampler.
|
||||
/// \param[in] start_index - Index to start sampling at (dafault to start at first id).
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0);
|
||||
|
||||
/// Function to create a Subset Random Sampler.
|
||||
/// \notes Samples the elements randomly from a sequence of indices.
|
||||
/// \param[in] indices - A vector sequence of indices.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices,
|
||||
int64_t num_samples = 0);
|
||||
|
||||
/// Function to create a Weighted Random Sampler.
|
||||
/// \notes Samples the elements from [0, len(weights) - 1] randomly with the given
|
||||
/// weights (probabilities).
|
||||
/// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \param[in] replacement - If True, put the sample ID back for the next draw.
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights,
|
||||
int64_t num_samples = 0, bool replacement = true);
|
||||
|
||||
/* ####################################### Derived Sampler classes ################################# */
|
||||
class DistributedSamplerObj : public SamplerObj {
|
||||
public:
|
||||
DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, uint32_t seed);
|
||||
|
||||
~DistributedSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<Sampler> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
int64_t num_shards_;
|
||||
int64_t shard_id_;
|
||||
bool shuffle_;
|
||||
int64_t num_samples_;
|
||||
uint32_t seed_;
|
||||
};
|
||||
|
||||
class PKSamplerObj : public SamplerObj {
|
||||
public:
|
||||
PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples);
|
||||
|
||||
~PKSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<Sampler> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
int64_t num_val_;
|
||||
bool shuffle_;
|
||||
int64_t num_samples_;
|
||||
};
|
||||
|
||||
class RandomSamplerObj : public SamplerObj {
|
||||
public:
|
||||
RandomSamplerObj(bool replacement, int64_t num_samples);
|
||||
|
||||
~RandomSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<Sampler> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
bool replacement_;
|
||||
int64_t num_samples_;
|
||||
};
|
||||
|
||||
class SequentialSamplerObj : public SamplerObj {
|
||||
public:
|
||||
SequentialSamplerObj(int64_t start_index, int64_t num_samples);
|
||||
|
||||
~SequentialSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<Sampler> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
int64_t start_index_;
|
||||
int64_t num_samples_;
|
||||
};
|
||||
|
||||
class SubsetRandomSamplerObj : public SamplerObj {
|
||||
public:
|
||||
SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples);
|
||||
|
||||
~SubsetRandomSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<Sampler> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
const std::vector<int64_t> &indices_;
|
||||
int64_t num_samples_;
|
||||
};
|
||||
|
||||
class WeightedRandomSamplerObj : public SamplerObj {
|
||||
public:
|
||||
explicit WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples = 0,
|
||||
bool replacement = true);
|
||||
|
||||
~WeightedRandomSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<Sampler> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
const std::vector<double> &weights_;
|
||||
int64_t num_samples_;
|
||||
bool replacement_;
|
||||
};
|
||||
} // namespace api
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // DATASET_API_SAMPLERS_H_
|
|
@ -0,0 +1 @@
|
|||
../util/status.h
|
|
@ -0,0 +1 @@
|
|||
../core/tensor.h
|
|
@ -0,0 +1,380 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef DATASET_API_TRANSFORMS_H_
|
||||
#define DATASET_API_TRANSFORMS_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "dataset/core/constants.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
class TensorOp;
|
||||
|
||||
namespace api {
|
||||
// Abstract class to represent a dataset in the data pipeline.
|
||||
class TensorOperation : public std::enable_shared_from_this<TensorOperation> {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
TensorOperation();
|
||||
|
||||
/// \brief Destructor
|
||||
~TensorOperation() = default;
|
||||
|
||||
/// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object.
|
||||
/// \return shared pointer to the newly created TensorOp.
|
||||
virtual std::shared_ptr<TensorOp> Build() = 0;
|
||||
|
||||
virtual bool ValidateParams() = 0;
|
||||
};
|
||||
|
||||
// Transform operations for performing computer vision.
|
||||
namespace vision {
|
||||
|
||||
class NormalizeOperation;
|
||||
class DecodeOperation;
|
||||
class ResizeOperation;
|
||||
class RandomCropOperation;
|
||||
class CenterCropOperation;
|
||||
class UniformAugOperation;
|
||||
class RandomHorizontalFlipOperation;
|
||||
class RandomVerticalFlipOperation;
|
||||
class RandomRotationOperation;
|
||||
class PadOperation;
|
||||
class CutOutOperation;
|
||||
class RandomColorAdjustOperation;
|
||||
|
||||
/// \brief Function to create a Normalize TensorOperation.
|
||||
/// \notes Normalize the input image with respect to mean and standard deviation.
|
||||
/// \param[in] mean - a vector of mean values for each channel, w.r.t channel order.
|
||||
/// \param[in] std - a vector of standard deviations for each channel, w.r.t. channel order.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std);
|
||||
|
||||
/// \brief Function to create a Decode TensorOperation.
|
||||
/// \notes Decode the input image in RGB mode.
|
||||
/// \param[in] rgb - a boolean of whether to decode in RGB mode or not.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<DecodeOperation> Decode(bool rgb = true);
|
||||
|
||||
/// \brief Function to create a Resize TensorOperation.
|
||||
/// \notes Resize the input image to the given size..
|
||||
/// \param[in] size - a vector representing the output size of the resized image.
|
||||
/// If size is a single value, the image will be resized to this value with
|
||||
/// the same image aspect ratio. If size has 2 values, it should be (height, width).
|
||||
/// \param[in] interpolation An enum for the mode of interpolation
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size,
|
||||
InterpolationMode interpolation = InterpolationMode::kLinear);
|
||||
|
||||
/// \brief Function to create a RandomCrop TensorOperation.
|
||||
/// \notes Crop the input image at a random location.
|
||||
/// \param[in] size - a vector representing the output size of the cropped image.
|
||||
/// If size is a single value, a square crop of size (size, size) is returned.
|
||||
/// If size has 2 values, it should be (height, width).
|
||||
/// \param[in] padding - a vector with the value of pixels to pad the image. If 4 values are provided,
|
||||
/// it pads the left, top, right and bottom respectively.
|
||||
/// \param[in] pad_if_needed - a boolean whether to pad the image if either side is smaller than
|
||||
/// the given output size.
|
||||
/// \param[in] fill_value - a vector representing the pixel intensity of the borders, it is used to
|
||||
/// fill R, G, B channels respectively.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
|
||||
bool pad_if_needed = false,
|
||||
std::vector<uint8_t> fill_value = {0, 0, 0});
|
||||
|
||||
/// \brief Function to create a CenterCrop TensorOperation.
|
||||
/// \notes Crops the input image at the center to the given size.
|
||||
/// \param[in] size - a vector representing the output size of the cropped image.
|
||||
/// If size is a single value, a square crop of size (size, size) is returned.
|
||||
/// If size has 2 values, it should be (height, width).
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size);
|
||||
|
||||
/// \brief Function to create a UniformAugment TensorOperation.
|
||||
/// \notes Tensor operation to perform randomly selected augmentation.
|
||||
/// \param[in] operations - a vector of TensorOperation operations.
|
||||
/// \param[in] num_ops - integer representing the number of OPs to be selected and applied.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations,
|
||||
int32_t num_ops = 2);
|
||||
|
||||
/// \brief Function to create a RandomHorizontalFlip TensorOperation.
|
||||
/// \notes Tensor operation to perform random horizontal flip.
|
||||
/// \param[in] prob - float representing the probability of flip.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob = 0.5);
|
||||
|
||||
/// \brief Function to create a RandomVerticalFlip TensorOperation.
|
||||
/// \notes Tensor operation to perform random vertical flip.
|
||||
/// \param[in] prob - float representing the probability of flip.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob = 0.5);
|
||||
|
||||
/// \brief Function to create a RandomRotation TensorOp
|
||||
/// \notes Rotates the image according to parameters
|
||||
/// \param[in] degrees A float vector size 2, representing the starting and ending degree
|
||||
/// \param[in] resample An enum for the mode of interpolation
|
||||
/// \param[in] expand A boolean representing whether the image is expanded after rotation
|
||||
/// \param[in] center A float vector size 2, representing the x and y center of rotation.
|
||||
/// \param[in] fill_value A uint8_t vector size 3, representing the rgb value of the fill color
|
||||
/// \return Shared pointer to the current TensorOp
|
||||
std::shared_ptr<RandomRotationOperation> RandomRotation(
|
||||
std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false,
|
||||
std::vector<float> center = {-1, -1}, std::vector<uint8_t> fill_value = {0, 0, 0});
|
||||
|
||||
/// \brief Function to create a Pad TensorOp
|
||||
/// \notes Pads the image according to padding parameters
|
||||
/// \param[in] padding A vector representing the number of pixels to pad the image
|
||||
/// If vector has one value, it pads all sides of the image with that value
|
||||
/// If vector has two values, it pads left and right with the first and
|
||||
/// top and bottom with the second value
|
||||
/// If vector has four values, it pads left, top, right, and bottom with
|
||||
/// those values respectively
|
||||
/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
|
||||
/// BorderType.kConstant. If 3 values are provided,
|
||||
/// it is used to fill R, G, B channels respectively
|
||||
/// \param[in] padding_mode The method of padding (default=BorderType.kConstant)
|
||||
/// Can be any of
|
||||
/// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
|
||||
/// - BorderType.kConstant, means it fills the border with constant values
|
||||
/// - BorderType.kEdge, means it pads with the last value on the edge
|
||||
/// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
|
||||
/// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
|
||||
/// \return Shared pointer to the current TensorOp
|
||||
std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
|
||||
BorderType padding_mode = BorderType::kConstant);
|
||||
|
||||
/// \brief Function to create a CutOut TensorOp
|
||||
/// \notes Randomly cut (mask) out a given number of square patches from the input image
|
||||
/// \param[in] length Integer representing the side length of each square patch
|
||||
/// \param[in] num_patches Integer representing the number of patches to be cut out of an image
|
||||
/// \return Shared pointer to the current TensorOp
|
||||
std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches = 1);
|
||||
|
||||
/// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image
|
||||
/// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
|
||||
/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
|
||||
/// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
|
||||
/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
|
||||
/// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
|
||||
/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
|
||||
/// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values
|
||||
/// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
|
||||
/// Default value is {0, 0}
|
||||
/// \return Shared pointer to the current TensorOp
|
||||
std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0},
|
||||
std::vector<float> contrast = {1.0, 1.0},
|
||||
std::vector<float> saturation = {1.0, 1.0},
|
||||
std::vector<float> hue = {0.0, 0.0});
|
||||
|
||||
/* ####################################### Derived TensorOperation classes ################################# */
|
||||
|
||||
class NormalizeOperation : public TensorOperation {
|
||||
public:
|
||||
NormalizeOperation(std::vector<float> mean, std::vector<float> std);
|
||||
|
||||
~NormalizeOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::vector<float> mean_;
|
||||
std::vector<float> std_;
|
||||
};
|
||||
|
||||
class DecodeOperation : public TensorOperation {
|
||||
public:
|
||||
explicit DecodeOperation(bool rgb = true);
|
||||
|
||||
~DecodeOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
bool rgb_;
|
||||
};
|
||||
|
||||
class ResizeOperation : public TensorOperation {
|
||||
public:
|
||||
explicit ResizeOperation(std::vector<int32_t> size,
|
||||
InterpolationMode interpolation_mode = InterpolationMode::kLinear);
|
||||
|
||||
~ResizeOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::vector<int32_t> size_;
|
||||
InterpolationMode interpolation_;
|
||||
};
|
||||
|
||||
class RandomCropOperation : public TensorOperation {
|
||||
public:
|
||||
RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
|
||||
bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0});
|
||||
|
||||
~RandomCropOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::vector<int32_t> size_;
|
||||
std::vector<int32_t> padding_;
|
||||
bool pad_if_needed_;
|
||||
std::vector<uint8_t> fill_value_;
|
||||
};
|
||||
|
||||
class CenterCropOperation : public TensorOperation {
|
||||
public:
|
||||
explicit CenterCropOperation(std::vector<int32_t> size);
|
||||
|
||||
~CenterCropOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::vector<int32_t> size_;
|
||||
};
|
||||
|
||||
class UniformAugOperation : public TensorOperation {
|
||||
public:
|
||||
explicit UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops = 2);
|
||||
|
||||
~UniformAugOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<TensorOperation>> operations_;
|
||||
int32_t num_ops_;
|
||||
};
|
||||
|
||||
class RandomHorizontalFlipOperation : public TensorOperation {
|
||||
public:
|
||||
explicit RandomHorizontalFlipOperation(float probability = 0.5);
|
||||
|
||||
~RandomHorizontalFlipOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
float probability_;
|
||||
};
|
||||
|
||||
class RandomVerticalFlipOperation : public TensorOperation {
|
||||
public:
|
||||
explicit RandomVerticalFlipOperation(float probability = 0.5);
|
||||
|
||||
~RandomVerticalFlipOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
float probability_;
|
||||
};
|
||||
|
||||
class RandomRotationOperation : public TensorOperation {
|
||||
public:
|
||||
RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode, bool expand,
|
||||
std::vector<float> center, std::vector<uint8_t> fill_value);
|
||||
|
||||
~RandomRotationOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::vector<float> degrees_;
|
||||
InterpolationMode interpolation_mode_;
|
||||
std::vector<float> center_;
|
||||
bool expand_;
|
||||
std::vector<uint8_t> fill_value_;
|
||||
};
|
||||
|
||||
class PadOperation : public TensorOperation {
|
||||
public:
|
||||
PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
|
||||
BorderType padding_mode = BorderType::kConstant);
|
||||
|
||||
~PadOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::vector<int32_t> padding_;
|
||||
std::vector<uint8_t> fill_value_;
|
||||
BorderType padding_mode_;
|
||||
};
|
||||
|
||||
class CutOutOperation : public TensorOperation {
|
||||
public:
|
||||
explicit CutOutOperation(int32_t length, int32_t num_patches = 1);
|
||||
|
||||
~CutOutOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
int32_t length_;
|
||||
int32_t num_patches_;
|
||||
};
|
||||
|
||||
class RandomColorAdjustOperation : public TensorOperation {
|
||||
public:
|
||||
RandomColorAdjustOperation(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0},
|
||||
std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0});
|
||||
|
||||
~RandomColorAdjustOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
bool ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::vector<float> brightness_;
|
||||
std::vector<float> contrast_;
|
||||
std::vector<float> saturation_;
|
||||
std::vector<float> hue_;
|
||||
};
|
||||
} // namespace vision
|
||||
} // namespace api
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // DATASET_API_TRANSFORMS_H_
|
|
@ -0,0 +1 @@
|
|||
../../../utils/log_adapter.h
|
|
@ -0,0 +1 @@
|
|||
../../../utils/overload.h
|
|
@ -2,7 +2,13 @@ add_subdirectory(image)
|
|||
add_subdirectory(data)
|
||||
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||
add_library(kernels OBJECT
|
||||
py_func_op.cc
|
||||
tensor_op.cc)
|
||||
target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS})
|
||||
if (ENABLE_PYTHON)
|
||||
add_library(kernels OBJECT
|
||||
py_func_op.cc
|
||||
tensor_op.cc)
|
||||
target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS})
|
||||
else()
|
||||
add_library(kernels OBJECT
|
||||
tensor_op.cc)
|
||||
endif()
|
||||
|
||||
|
|
|
@ -23,7 +23,9 @@
|
|||
|
||||
#include "dataset/core/constants.h"
|
||||
#include "dataset/core/data_type.h"
|
||||
#ifdef ENABLE_PYTHON
|
||||
#include "dataset/core/pybind_support.h"
|
||||
#endif
|
||||
#include "dataset/core/tensor.h"
|
||||
#include "dataset/core/tensor_shape.h"
|
||||
#include "dataset/kernels/data/type_cast_op.h"
|
||||
|
|
|
@ -729,7 +729,6 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
|
|||
int num_channels = input_cv->shape()[2];
|
||||
if (input_cv->Rank() == 3 && num_channels == 1 && output_cv->Rank() == 2) output_cv->ExpandDim(2);
|
||||
*output = std::static_pointer_cast<Tensor>(output_cv);
|
||||
|
||||
return Status::OK();
|
||||
} catch (const cv::Exception &e) {
|
||||
RETURN_STATUS_UNEXPECTED("Unexpected error in pad");
|
||||
|
|
|
@ -35,10 +35,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
|
||||
|
||||
enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
|
||||
|
||||
void JpegErrorExitCustom(j_common_ptr cinfo);
|
||||
|
||||
struct JpegErrorManagerCustom {
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "dataset/kernels/image/pad_op.h"
|
||||
|
||||
#include "dataset/kernels/image/image_utils.h"
|
||||
#include "dataset/core/constants.h"
|
||||
#include "dataset/util/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
#include "dataset/core/tensor.h"
|
||||
#include "dataset/kernels/tensor_op.h"
|
||||
#include "dataset/kernels/image/image_utils.h"
|
||||
#include "dataset/core/constants.h"
|
||||
#include "dataset/util/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#include "dataset/kernels/image/image_utils.h"
|
||||
#include "dataset/util/status.h"
|
||||
#include "dataset/core/cv_tensor.h"
|
||||
#include "dataset/core/pybind_support.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
|
|
@ -16,8 +16,6 @@
|
|||
#ifndef DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_
|
||||
#define DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_
|
||||
|
||||
#include <pybind11/numpy.h>
|
||||
#include <pybind11/stl.h>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <cstdlib>
|
||||
|
@ -26,8 +24,6 @@
|
|||
#include "dataset/kernels/tensor_op.h"
|
||||
#include "dataset/util/random.h"
|
||||
#include "dataset/util/status.h"
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "pybind11/stl_bind.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
namespace py = pybind11;
|
||||
|
||||
class NgramOp : public TensorOp {
|
||||
public:
|
||||
|
|
|
@ -32,7 +32,15 @@ if(ENABLE_MINDDATA)
|
|||
endif()
|
||||
# fetch ut test files
|
||||
if(ENABLE_MINDDATA)
|
||||
file(GLOB_RECURSE UT_SRCS ./*.cc)
|
||||
file(GLOB_RECURSE UT_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./*.cc)
|
||||
if(NOT ENABLE_PYTHON)
|
||||
set(PYTHON_RELATED_SRCS
|
||||
dataset/filter_op_test.cc
|
||||
dataset/voc_op_test.cc
|
||||
dataset/manifest_op_test.cc
|
||||
)
|
||||
list(REMOVE_ITEM UT_SRCS ${PYTHON_RELATED_SRCS})
|
||||
endif()
|
||||
else()
|
||||
file(GLOB_RECURSE TEMP_UT_SRCS ./*.cc)
|
||||
foreach(OBJ ${TEMP_UT_SRCS})
|
||||
|
|
|
@ -90,6 +90,7 @@ SET(DE_UT_SRCS
|
|||
concatenate_op_test.cc
|
||||
cyclic_array_test.cc
|
||||
perf_data_test.cc
|
||||
c_api_test.cc
|
||||
)
|
||||
|
||||
add_executable(de_ut_tests ${DE_UT_SRCS})
|
||||
|
|
|
@ -0,0 +1,771 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "utils/log_adapter.h"
|
||||
#include "common/utils.h"
|
||||
#include "common/common.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "securec.h"
|
||||
#include "dataset/include/datasets.h"
|
||||
#include "dataset/include/status.h"
|
||||
#include "dataset/include/transforms.h"
|
||||
#include "dataset/include/iterator.h"
|
||||
#include "dataset/core/constants.h"
|
||||
#include "dataset/include/samplers.h"
|
||||
|
||||
using namespace mindspore::dataset::api;
|
||||
using mindspore::MsLogLevel::ERROR;
|
||||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::LogStream;
|
||||
using mindspore::dataset::Tensor;
|
||||
using mindspore::dataset::Status;
|
||||
using mindspore::dataset::BorderType;
|
||||
|
||||
|
||||
class MindDataTestPipeline : public UT::DatasetOpTesting {
|
||||
protected:
|
||||
};
|
||||
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestBatchAndRepeat) {
|
||||
// Create a Mnist Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testMnistData/";
|
||||
std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 2;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 10);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) {
|
||||
// Create a Mnist Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testMnistData/";
|
||||
std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create objects for the tensor ops
|
||||
std::shared_ptr<TensorOperation> resize_op = vision::Resize({30, 30});
|
||||
EXPECT_TRUE(resize_op != nullptr);
|
||||
|
||||
std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop({16, 16});
|
||||
EXPECT_TRUE(center_crop_op != nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({resize_op, center_crop_op});
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 40);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestUniformAugWithOps) {
|
||||
// Create a Mnist Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testMnistData/";
|
||||
std::shared_ptr<Dataset> ds = Mnist(folder_path, RandomSampler(false, 20));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 1;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create objects for the tensor ops
|
||||
std::shared_ptr<TensorOperation> resize_op = vision::Resize({30, 30});
|
||||
EXPECT_TRUE(resize_op != nullptr);
|
||||
|
||||
std::shared_ptr<TensorOperation> random_crop_op = vision::RandomCrop({28, 28});
|
||||
EXPECT_TRUE(random_crop_op != nullptr);
|
||||
|
||||
std::shared_ptr<TensorOperation> center_crop_op = vision::CenterCrop({16, 16});
|
||||
EXPECT_TRUE(center_crop_op != nullptr);
|
||||
|
||||
std::shared_ptr<TensorOperation> uniform_aug_op = vision::UniformAugment({random_crop_op, center_crop_op}, 2);
|
||||
EXPECT_TRUE(uniform_aug_op != nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({resize_op, uniform_aug_op});
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 20);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestRandomFlip) {
|
||||
// Create an ImageFolder Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create objects for the tensor ops
|
||||
std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5);
|
||||
EXPECT_TRUE(random_vertical_flip_op != nullptr);
|
||||
|
||||
std::shared_ptr<TensorOperation> random_horizontal_flip_op = vision::RandomHorizontalFlip(0.5);
|
||||
EXPECT_TRUE(random_horizontal_flip_op != nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({random_vertical_flip_op, random_horizontal_flip_op});
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 20);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) {
|
||||
// Create an ImageFolder Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 2;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 10);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
|
||||
std::shared_ptr<SamplerObj> sampl = DistributedSampler(2, 1);
|
||||
EXPECT_NE(sampl, nullptr);
|
||||
|
||||
sampl = PKSampler(3);
|
||||
EXPECT_NE(sampl, nullptr);
|
||||
|
||||
sampl = RandomSampler(false, 12);
|
||||
EXPECT_NE(sampl, nullptr);
|
||||
|
||||
sampl = SequentialSampler(0, 12);
|
||||
EXPECT_NE(sampl, nullptr);
|
||||
|
||||
std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
|
||||
sampl = WeightedRandomSampler(weights, 12);
|
||||
EXPECT_NE(sampl, nullptr);
|
||||
|
||||
std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
|
||||
sampl = SubsetRandomSampler(indices);
|
||||
EXPECT_NE(sampl, nullptr);
|
||||
|
||||
// Create an ImageFolder Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 2;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 12);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestPad) {
|
||||
// Create an ImageFolder Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create objects for the tensor ops
|
||||
std::shared_ptr<TensorOperation> pad_op1 = vision::Pad({1, 2, 3, 4}, {0}, BorderType::kSymmetric);
|
||||
EXPECT_TRUE(pad_op1 != nullptr);
|
||||
|
||||
std::shared_ptr<TensorOperation> pad_op2 = vision::Pad({1}, {1, 1, 1}, BorderType::kEdge);
|
||||
EXPECT_TRUE(pad_op2 != nullptr);
|
||||
|
||||
std::shared_ptr<TensorOperation> pad_op3 = vision::Pad({1, 4});
|
||||
EXPECT_TRUE(pad_op3 != nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({pad_op1, pad_op2, pad_op3});
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 20);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestCutOut) {
|
||||
// Create an ImageFolder Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create objects for the tensor ops
|
||||
std::shared_ptr<TensorOperation> cut_out1 = vision::CutOut(30, 5);
|
||||
EXPECT_TRUE(cut_out1!= nullptr);
|
||||
|
||||
std::shared_ptr<TensorOperation> cut_out2 = vision::CutOut(30);
|
||||
EXPECT_TRUE(cut_out2 != nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({cut_out1, cut_out2});
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 20);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestNormalize) {
|
||||
// Create an ImageFolder Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create objects for the tensor ops
|
||||
std::shared_ptr<TensorOperation> normalize = vision::Normalize({121.0, 115.0, 100.0}, {70.0, 68.0, 71.0});
|
||||
EXPECT_TRUE(normalize != nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({normalize});
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 20);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestDecode) {
|
||||
// Create an ImageFolder Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create objects for the tensor ops
|
||||
std::shared_ptr<TensorOperation> decode = vision::Decode(true);
|
||||
EXPECT_TRUE(decode != nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({decode});
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
EXPECT_EQ(i, 20);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestShuffleDataset) {
|
||||
// Create an ImageFolder Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Shuffle operation on ds
|
||||
int32_t shuffle_size = 10;
|
||||
ds = ds->Shuffle(shuffle_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 2;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 10);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestCifar10Dataset) {
|
||||
|
||||
// Create a Cifar10 Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
|
||||
std::shared_ptr<Dataset> ds = Cifar10(folder_path, 0, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 2;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 10);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestRandomColorAdjust) {
|
||||
// Create an ImageFolder Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create objects for the tensor ops
|
||||
std::shared_ptr<TensorOperation> random_color_adjust1 = vision::RandomColorAdjust({1.0}, {0.0}, {0.5}, {0.5});
|
||||
EXPECT_TRUE(random_color_adjust1 != nullptr);
|
||||
|
||||
std::shared_ptr<TensorOperation> random_color_adjust2 = vision::RandomColorAdjust({1.0, 1.0}, {0.0, 0.0}, {0.5, 0.5},
|
||||
{0.5, 0.5});
|
||||
EXPECT_TRUE(random_color_adjust2 != nullptr);
|
||||
|
||||
std::shared_ptr<TensorOperation> random_color_adjust3 = vision::RandomColorAdjust({0.5, 1.0}, {0.0, 0.5}, {0.25, 0.5},
|
||||
{0.25, 0.5});
|
||||
EXPECT_TRUE(random_color_adjust3 != nullptr);
|
||||
|
||||
std::shared_ptr<TensorOperation> random_color_adjust4 = vision::RandomColorAdjust();
|
||||
EXPECT_TRUE(random_color_adjust4 != nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({random_color_adjust1, random_color_adjust2, random_color_adjust3, random_color_adjust4});
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 20);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestRandomRotation) {
|
||||
// Create an ImageFolder Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create objects for the tensor ops
|
||||
std::shared_ptr<TensorOperation> random_rotation_op = vision::RandomRotation({-180, 180});
|
||||
EXPECT_TRUE(random_rotation_op != nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({random_rotation_op});
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 20);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestProjectMap) {
|
||||
// Create an ImageFolder Dataset
|
||||
std::string folder_path = datasets_root_path_ + "/testPK/data/";
|
||||
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, RandomSampler(false, 10));
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Repeat operation on ds
|
||||
int32_t repeat_num = 2;
|
||||
ds = ds->Repeat(repeat_num);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create objects for the tensor ops
|
||||
std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0.5);
|
||||
EXPECT_TRUE(random_vertical_flip_op != nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"});
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Project operation on ds
|
||||
std::vector<std::string> column_project = {"label"};
|
||||
ds = ds->Project(column_project);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create a Batch operation on ds
|
||||
int32_t batch_size = 1;
|
||||
ds = ds->Batch(batch_size);
|
||||
EXPECT_TRUE(ds != nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_TRUE(iter != nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
MS_LOG(INFO) << "Tensor image shape: " << image->shape();
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(i == 20);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
|
@ -23,8 +23,6 @@
|
|||
|
||||
using namespace mindspore::dataset;
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
class MindDataTestDatatype : public UT::Common {
|
||||
public:
|
||||
MindDataTestDatatype() = default;
|
||||
|
|
Loading…
Reference in New Issue