From bd5a777f810454c076bc69838cdf91e1cf2f8492 Mon Sep 17 00:00:00 2001 From: ervinzhang Date: Fri, 10 Jul 2020 16:26:00 -0400 Subject: [PATCH] introducing new C++ API --- CMakeLists.txt | 4 + build.sh | 11 +- cmake/options.cmake | 1 + mindspore/ccsrc/dataset/CMakeLists.txt | 14 +- mindspore/ccsrc/dataset/api/CMakeLists.txt | 17 +- mindspore/ccsrc/dataset/api/datasets.cc | 446 ++++++++++ mindspore/ccsrc/dataset/api/iterator.cc | 101 +++ .../ccsrc/dataset/api/python_bindings.cc | 4 +- mindspore/ccsrc/dataset/api/samplers.cc | 224 +++++ mindspore/ccsrc/dataset/api/transforms.cc | 491 +++++++++++ mindspore/ccsrc/dataset/core/CMakeLists.txt | 17 +- mindspore/ccsrc/dataset/core/client.h | 14 +- mindspore/ccsrc/dataset/core/constants.h | 6 + mindspore/ccsrc/dataset/core/data_type.cc | 9 +- mindspore/ccsrc/dataset/core/data_type.h | 78 +- mindspore/ccsrc/dataset/core/tensor.cc | 32 +- mindspore/ccsrc/dataset/core/tensor.h | 38 +- mindspore/ccsrc/dataset/core/tensor_row.cc | 1 - mindspore/ccsrc/dataset/core/tensor_shape.cc | 4 + mindspore/ccsrc/dataset/core/tensor_shape.h | 117 +-- .../dataset/engine/datasetops/CMakeLists.txt | 21 +- .../dataset/engine/datasetops/batch_op.cc | 32 +- .../dataset/engine/datasetops/batch_op.h | 16 + .../engine/datasetops/source/CMakeLists.txt | 27 +- .../datasetops/source/sampler/CMakeLists.txt | 13 +- .../datasetops/source/sampler/sampler.cc | 2 + .../datasetops/source/sampler/sampler.h | 4 +- mindspore/ccsrc/dataset/engine/gnn/graph.cc | 2 + mindspore/ccsrc/dataset/engine/gnn/graph.h | 2 + mindspore/ccsrc/dataset/engine/opt/pass.cc | 28 +- mindspore/ccsrc/dataset/engine/opt/pass.h | 20 +- .../dataset/engine/opt/util/printer_pass.cc | 25 +- .../dataset/engine/opt/util/printer_pass.h | 10 +- .../dataset/include/dataset/core/constants.h | 1 + .../dataset/include/dataset/core/data_type.h | 1 + .../include/dataset/core/tensor_shape.h | 1 + .../dataset/include/dataset/util/status.h | 1 + mindspore/ccsrc/dataset/include/datasets.h | 357 ++++++++ mindspore/ccsrc/dataset/include/iterator.h | 115 +++ mindspore/ccsrc/dataset/include/samplers.h | 199 +++++ mindspore/ccsrc/dataset/include/status.h | 1 + mindspore/ccsrc/dataset/include/tensor.h | 1 + mindspore/ccsrc/dataset/include/transforms.h | 380 +++++++++ .../ccsrc/dataset/include/utils/log_adapter.h | 1 + .../ccsrc/dataset/include/utils/overload.h | 1 + .../ccsrc/dataset/kernels/CMakeLists.txt | 14 +- .../ccsrc/dataset/kernels/data/data_utils.cc | 2 + .../dataset/kernels/image/image_utils.cc | 1 - .../ccsrc/dataset/kernels/image/image_utils.h | 4 - .../ccsrc/dataset/kernels/image/pad_op.cc | 1 + .../ccsrc/dataset/kernels/image/pad_op.h | 2 +- .../random_horizontal_flip_with_bbox_op.cc | 1 - .../random_horizontal_flip_with_bbox_op.h | 4 - .../ccsrc/dataset/text/kernels/ngram_op.h | 1 - tests/ut/cpp/CMakeLists.txt | 10 +- tests/ut/cpp/dataset/CMakeLists.txt | 1 + tests/ut/cpp/dataset/c_api_test.cc | 771 ++++++++++++++++++ tests/ut/cpp/dataset/datatype_test.cc | 2 - 58 files changed, 3504 insertions(+), 200 deletions(-) create mode 100644 mindspore/ccsrc/dataset/api/datasets.cc create mode 100644 mindspore/ccsrc/dataset/api/iterator.cc create mode 100644 mindspore/ccsrc/dataset/api/samplers.cc create mode 100644 mindspore/ccsrc/dataset/api/transforms.cc create mode 120000 mindspore/ccsrc/dataset/include/dataset/core/constants.h create mode 120000 mindspore/ccsrc/dataset/include/dataset/core/data_type.h create mode 120000 mindspore/ccsrc/dataset/include/dataset/core/tensor_shape.h create mode 120000 mindspore/ccsrc/dataset/include/dataset/util/status.h create mode 100644 mindspore/ccsrc/dataset/include/datasets.h create mode 100644 mindspore/ccsrc/dataset/include/iterator.h create mode 100644 mindspore/ccsrc/dataset/include/samplers.h create mode 120000 mindspore/ccsrc/dataset/include/status.h create mode 120000 mindspore/ccsrc/dataset/include/tensor.h create mode 100644 mindspore/ccsrc/dataset/include/transforms.h create mode 120000 mindspore/ccsrc/dataset/include/utils/log_adapter.h create mode 120000 mindspore/ccsrc/dataset/include/utils/overload.h create mode 100644 tests/ut/cpp/dataset/c_api_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 987e4ae709c..c4da105cac2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,10 @@ else() set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2") endif() +if (ENABLE_PYTHON) + add_compile_definitions(ENABLE_PYTHON) +endif() + set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC") diff --git a/build.sh b/build.sh index 059478b9afe..428743f0ffb 100755 --- a/build.sh +++ b/build.sh @@ -25,7 +25,7 @@ usage() echo "Usage:" echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" - echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]" + echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]" echo "" echo "Options:" echo " -d Debug mode" @@ -56,6 +56,7 @@ usage() echo " -s Enable serving module, default off" echo " -B Enable debugger, default off" echo " -E Enable IBVERBS for parameter server, default off" + echo " -l Compile with python dependency, default on" } # check value of input is 'on' or 'off' @@ -98,9 +99,10 @@ checkopts() ENABLE_SERVING="off" ENABLE_DEBUGGER="off" ENABLE_IBVERBS="off" + ENABLE_PYTHON="on" # Process the options - while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt + while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt do OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') case "${opt}" in @@ -151,6 +153,10 @@ checkopts() check_on_off $OPTARG p ENABLE_PROFILE="$OPTARG" ;; + l) + check_on_off $OPTARG l + ENABLE_PYTHON="$OPTARG" + ;; i) INC_BUILD="on" ;; @@ -316,6 +322,7 @@ build_mindspore() CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" fi CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" + CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" if [[ "X$ENABLE_MPI" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON" fi diff --git a/cmake/options.cmake b/cmake/options.cmake index 18db942d681..b01c623377d 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF) option(ENABLE_AKG "enable akg" OFF) option(ENABLE_DEBUGGER "enable debugger" OFF) option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF) +option(ENABLE_PYTHON "Enable python" ON) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if (WIN32) diff --git a/mindspore/ccsrc/dataset/CMakeLists.txt b/mindspore/ccsrc/dataset/CMakeLists.txt index 9238be93f29..8d7da15b223 100644 --- a/mindspore/ccsrc/dataset/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/CMakeLists.txt @@ -39,6 +39,7 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/device/ascend/platform) include_directories(${CMAKE_BINARY_DIR}) # for protobuf generated .h include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/mindrecord/include) +include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/dataset/include) ###################################################################### ####################### Flags ######################################## @@ -67,7 +68,10 @@ add_dependencies(engine-gnn core) add_dependencies(engine core) add_dependencies(text core) add_dependencies(text-kernels core) -add_dependencies(APItoPython core) +add_dependencies(cpp-API core) +if (ENABLE_PYTHON) + add_dependencies(APItoPython core) +endif() if (ENABLE_TDTQUE) add_dependencies(engine-tdt core) endif () @@ -78,7 +82,7 @@ set(submodules $ $ $ - $ + $ $ $ $ @@ -90,6 +94,12 @@ set(submodules $ ) +if (ENABLE_PYTHON) + set(submodules + ${submodules} + $) +endif() + if (ENABLE_TDTQUE) add_library(_c_dataengine SHARED ${submodules} $) else () diff --git a/mindspore/ccsrc/dataset/api/CMakeLists.txt b/mindspore/ccsrc/dataset/api/CMakeLists.txt index 194aeed4572..ae0b9cc28ed 100644 --- a/mindspore/ccsrc/dataset/api/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/api/CMakeLists.txt @@ -1,7 +1,16 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) -add_library(APItoPython OBJECT - de_pipeline.cc - python_bindings.cc +if (ENABLE_PYTHON) + add_library(APItoPython OBJECT + de_pipeline.cc + python_bindings.cc + ) + target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS}) +endif() + +add_library(cpp-API OBJECT + datasets.cc + iterator.cc + transforms.cc + samplers.cc ) -target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS}) diff --git a/mindspore/ccsrc/dataset/api/datasets.cc b/mindspore/ccsrc/dataset/api/datasets.cc new file mode 100644 index 00000000000..5684e6770aa --- /dev/null +++ b/mindspore/ccsrc/dataset/api/datasets.cc @@ -0,0 +1,446 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "dataset/include/datasets.h" +#include "dataset/include/transforms.h" +#include "dataset/include/samplers.h" +#include "dataset/engine/dataset_iterator.h" +#include "dataset/engine/datasetops/source/image_folder_op.h" +#include "dataset/engine/datasetops/source/mnist_op.h" +#include "dataset/engine/datasetops/source/cifar_op.h" +#include "dataset/engine/datasetops/batch_op.h" +#include "dataset/engine/datasetops/map_op.h" +#include "dataset/engine/datasetops/repeat_op.h" +#include "dataset/engine/datasetops/shuffle_op.h" +#include "dataset/engine/datasetops/project_op.h" +#include "dataset/engine/datasetops/source/sampler/sampler.h" +#include "dataset/engine/datasetops/source/sampler/random_sampler.h" + +#include "dataset/core/config_manager.h" +#include "dataset/util/random.h" + +namespace mindspore { +namespace dataset { +namespace api { + +#define RETURN_NULL_IF_ERROR(_s) \ + do { \ + Status __rc = (_s); \ + if (__rc.IsError()) { \ + return nullptr; \ + } \ + } while (false) + +// Function to create the iterator, which will build and launch the execution tree. +std::shared_ptr Dataset::CreateIterator() { + std::shared_ptr iter; + try { + iter = std::make_shared(); + Status rc = iter->BuildAndLaunchTree(shared_from_this()); + if (rc.IsError()) { + MS_LOG(ERROR) << "CreateIterator failed."; + return nullptr; + } + + return iter; + } catch (const std::exception &err) { + MS_LOG(ERROR) << "CreateIterator: Iterator exception caught: " << err.what(); + return nullptr; + } + + return iter; +} + +// Constructor +Dataset::Dataset() { + // Fetch some default value from config manager + std::shared_ptr cfg = GlobalContext::config_manager(); + num_workers_ = cfg->num_parallel_workers(); + rows_per_buffer_ = cfg->rows_per_buffer(); + connector_que_size_ = cfg->op_connector_size(); +} + +// Function to create a ImageFolderDataset. +std::shared_ptr ImageFolder(std::string dataset_dir, bool decode, + std::shared_ptr sampler, std::set extensions, + std::map class_indexing) { + // This arg is exist in ImageFolderOp, but not externalized (in Python API). The default value is false. + bool recursive = false; + + // Create logical representation of ImageFolderDataset. + auto ds = std::make_shared(dataset_dir, decode, sampler, recursive, extensions, class_indexing); + + // Call derived class validation method. + return ds->ValidateParams() ? ds : nullptr; +} + +// Function to create a MnistDataset. +std::shared_ptr Mnist(std::string dataset_dir, std::shared_ptr sampler) { + auto ds = std::make_shared(dataset_dir, sampler); + + // Call derived class validation method. + return ds->ValidateParams() ? ds : nullptr; +} + +// Function to create a Cifar10Dataset. +std::shared_ptr Cifar10(const std::string &dataset_dir, int32_t num_samples, + std::shared_ptr sampler) { + auto ds = std::make_shared(dataset_dir, num_samples, sampler); + + // Call derived class validation method. + return ds->ValidateParams() ? ds : nullptr; +} + +// Function to create a Batch dataset +std::shared_ptr Dataset::Batch(int32_t batch_size, bool drop_remainder) { + // Default values + std::vector cols_to_map = {}; + std::map>> pad_map; + bool pad = false; + auto ds = std::make_shared(batch_size, drop_remainder, pad, cols_to_map, pad_map); + + if (!ds->ValidateParams()) { + return nullptr; + } + + ds->children.push_back(shared_from_this()); + + return ds; +} + +// Function to create Repeat dataset. +std::shared_ptr Dataset::Repeat(int32_t count) { + // Workaround for repeat == 1, do not inject repeat. + if (count == 1) { + return shared_from_this(); + } + + auto ds = std::make_shared(count); + + if (!ds->ValidateParams()) { + return nullptr; + } + + ds->children.push_back(shared_from_this()); + + return ds; +} + +// Function to create a Map dataset. +std::shared_ptr Dataset::Map(std::vector> operations, + std::vector input_columns, + std::vector output_columns, + const std::vector &project_columns) { + auto ds = std::make_shared(operations, input_columns, output_columns, project_columns); + + if (!ds->ValidateParams()) { + return nullptr; + } + + ds->children.push_back(shared_from_this()); + + return ds; +} + +// Function to create a ShuffleOp +std::shared_ptr Dataset::Shuffle(int32_t shuffle_size) { + // Pass in reshuffle_each_epoch with true + auto ds = std::make_shared(shuffle_size, true); + + if (!ds->ValidateParams()) { + return nullptr; + } + + ds->children.push_back(shared_from_this()); + + return ds; +} + +// Function to create a ProjectDataset. +std::shared_ptr Dataset::Project(const std::vector &columns) { + auto ds = std::make_shared(columns); + // Call derived class validation method. + if (!ds->ValidateParams()) { + return nullptr; + } + + ds->children.push_back(shared_from_this()); + + return ds; +} + +// Helper function to create default RandomSampler. +std::shared_ptr CreateDefaultSampler() { + int32_t num_samples = 0; // 0 means to sample all ids. + bool replacement = false; + return std::make_shared(replacement, num_samples); +} + +/* ####################################### Derived Dataset classes ################################# */ + +ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr sampler, + bool recursive, std::set extensions, + std::map class_indexing) + : dataset_dir_(dataset_dir), + decode_(decode), + sampler_(sampler), + recursive_(recursive), + class_indexing_(class_indexing), + exts_(extensions) {} + +bool ImageFolderDataset::ValidateParams() { + if (dataset_dir_.empty()) { + MS_LOG(ERROR) << "No dataset path is specified."; + return false; + } + + return true; +} + +std::shared_ptr>> ImageFolderDataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + + // If user does not specify Sampler, create a default sampler, i.e., RandomSampler. + if (sampler_ == nullptr) { + sampler_ = CreateDefaultSampler(); + } + + // Do internal Schema generation. + // This arg is exist in ImageFolderOp, but not externalized (in Python API). + std::unique_ptr schema = std::make_unique(); + TensorShape scalar = TensorShape::CreateScalar(); + RETURN_NULL_IF_ERROR( + schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); + RETURN_NULL_IF_ERROR( + schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar))); + node_ops.push_back(std::make_shared(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, + recursive_, decode_, exts_, class_indexing_, std::move(schema), + std::move(sampler_->Build()))); + return std::make_shared>>(node_ops); +} + +MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr sampler) + : dataset_dir_(dataset_dir), sampler_(sampler) {} + +bool MnistDataset::ValidateParams() { + if (dataset_dir_.empty()) { + MS_LOG(ERROR) << "No dataset path is specified."; + return false; + } + + return true; +} + +std::shared_ptr>> MnistDataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + + // If user does not specify Sampler, create a default sampler, i.e., RandomSampler. + if (sampler_ == nullptr) { + sampler_ = CreateDefaultSampler(); + } + + // Do internal Schema generation. + auto schema = std::make_unique(); + RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); + TensorShape scalar = TensorShape::CreateScalar(); + RETURN_NULL_IF_ERROR( + schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar))); + + node_ops.push_back(std::make_shared(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, + std::move(schema), std::move(sampler_->Build()))); + return std::make_shared>>(node_ops); +} + +BatchDataset::BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector cols_to_map, + std::map>> pad_map) + : batch_size_(batch_size), + drop_remainder_(drop_remainder), + pad_(pad), + cols_to_map_(cols_to_map), + pad_map_(pad_map) {} + +std::shared_ptr>> BatchDataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + +#ifdef ENABLE_PYTHON + py::function noop; + node_ops.push_back(std::make_shared(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_, + cols_to_map_, noop, noop, pad_map_)); +#else + node_ops.push_back(std::make_shared(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_, + cols_to_map_, pad_map_)); +#endif + return std::make_shared>>(node_ops); +} + +bool BatchDataset::ValidateParams() { + if (batch_size_ <= 0) { + return false; + } + + return true; +} + +RepeatDataset::RepeatDataset(uint32_t count) : repeat_count_(count) {} + +std::shared_ptr>> RepeatDataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + + node_ops.push_back(std::make_shared(repeat_count_)); + return std::make_shared>>(node_ops); +} + +bool RepeatDataset::ValidateParams() { + if (repeat_count_ <= 0) { + return false; + } + + return true; +} +MapDataset::MapDataset(std::vector> operations, std::vector input_columns, + std::vector output_columns, const std::vector &project_columns) + : operations_(operations), + input_columns_(input_columns), + output_columns_(output_columns), + project_columns_(project_columns) {} + +std::shared_ptr>> MapDataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + + // Currently default is true, and this is not exposed to user. + bool perf_mode = true; + + std::vector> tensor_ops; + + // Build tensorOp from tensorOperation vector + // This is to ensure each iterator hold its own copy of the tensorOp objects. + (void)std::transform( + operations_.begin(), operations_.end(), std::back_inserter(tensor_ops), + [](std::shared_ptr operation) -> std::shared_ptr { return operation->Build(); }); + + // This parameter will be removed with next rebase + std::vector col_orders; + auto map_op = + std::make_shared(input_columns_, output_columns_, tensor_ops, num_workers_, connector_que_size_, perf_mode); + if (!project_columns_.empty()) { + auto project_op = std::make_shared(project_columns_); + node_ops.push_back(project_op); + } + + node_ops.push_back(map_op); + return std::make_shared>>(node_ops); +} + +bool MapDataset::ValidateParams() { + if (operations_.empty()) { + return false; + } + + return true; +} + +// Constructor for ShuffleDataset +ShuffleDataset::ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch) + : shuffle_size_(shuffle_size), shuffle_seed_(GetSeed()), reset_every_epoch_(reset_every_epoch) {} + +// Function to build the ShuffleOp +std::shared_ptr>> ShuffleDataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + + node_ops.push_back(std::make_shared(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_, + rows_per_buffer_)); + return std::make_shared>>(node_ops); +} + +// Function to validate the parameters for ShuffleDataset +bool ShuffleDataset::ValidateParams() { + if (shuffle_size_ <= 1) { + MS_LOG(ERROR) << "ShuffleDataset: Invalid input, shuffle_size: " << shuffle_size_; + return false; + } + + return true; +} + +// Constructor for Cifar10Dataset +Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr sampler) + : dataset_dir_(dataset_dir), num_samples_(num_samples), sampler_(sampler) {} + +bool Cifar10Dataset::ValidateParams() { + if (dataset_dir_.empty()) { + MS_LOG(ERROR) << "No dataset path is specified."; + return false; + } + if (num_samples_ < 0) { + MS_LOG(ERROR) << "Number of samples cannot be negative"; + return false; + } + return true; +} + +// Function to build CifarOp +std::shared_ptr>> Cifar10Dataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + + // If user does not specify Sampler, create a default sampler based on the shuffle variable. + if (sampler_ == nullptr) { + sampler_ = CreateDefaultSampler(); + } + + // Do internal Schema generation. + auto schema = std::make_unique(); + RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1))); + TensorShape scalar = TensorShape::CreateScalar(); + RETURN_NULL_IF_ERROR( + schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar))); + + node_ops.push_back(std::make_shared(CifarOp::CifarType::kCifar10, num_workers_, rows_per_buffer_, + dataset_dir_, connector_que_size_, std::move(schema), + std::move(sampler_->Build()))); + return std::make_shared>>(node_ops); +} + +// Function to build ProjectOp +ProjectDataset::ProjectDataset(const std::vector &columns) : columns_(columns) {} + +bool ProjectDataset::ValidateParams() { + if (columns_.empty()) { + MS_LOG(ERROR) << "No columns are specified."; + return false; + } + return true; +} + +std::shared_ptr>> ProjectDataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + + node_ops.push_back(std::make_shared(columns_)); + return std::make_shared>>(node_ops); +} + +} // namespace api +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/api/iterator.cc b/mindspore/ccsrc/dataset/api/iterator.cc new file mode 100644 index 00000000000..3875dcf8aa3 --- /dev/null +++ b/mindspore/ccsrc/dataset/api/iterator.cc @@ -0,0 +1,101 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "dataset/include/iterator.h" +#include "dataset/core/client.h" +#include "dataset/include/datasets.h" + +namespace mindspore { +namespace dataset { +namespace api { + +// Get the next row from the data pipeline. +void Iterator::GetNextRow(TensorMap *row) { + Status rc = iterator_->GetNextAsMap(row); + if (rc.IsError()) { + MS_LOG(ERROR) << "GetNextRow: Failed to get next row."; + row->clear(); + } +} + +// Shut down the data pipeline. +void Iterator::Stop() { + // Releasing the iterator_ unique_ptre. This should trigger the destructor of iterator_. + iterator_.reset(); + + // Release ownership of tree_ shared pointer. This will decrement the ref count. + tree_.reset(); +} + +// Function to build and launch the execution tree. +Status Iterator::BuildAndLaunchTree(std::shared_ptr ds) { + // One time init + Status rc; + rc = GlobalInit(); + RETURN_IF_NOT_OK(rc); + + // Instantiate the execution tree + tree_ = std::make_shared(); + + // Iterative BFS converting Dataset tree into runtime Execution tree. + std::queue, std::shared_ptr>> q; + + if (ds != nullptr) { + // Convert the current root node. + auto root_op = ds->Build()->front(); + RETURN_UNEXPECTED_IF_NULL(root_op); + + RETURN_IF_NOT_OK(tree_->AssociateNode(root_op)); + + q.push(std::make_pair(ds, root_op)); + + // Traverse down to the children and convert them to the corresponding DatasetOps (i.e. execution tree nodes) + while (!q.empty()) { + auto node_pair = q.front(); + q.pop(); + // Iterate through all the direct children of the first element in our BFS queue + for (auto child : node_pair.first->children) { + auto child_ops = child->Build(); + RETURN_UNEXPECTED_IF_NULL(child_ops); + auto node_op = node_pair.second; + // Iterate through all the DatasetOps returned by calling Build on the last Dataset object, associate them + // with the execution tree and add the child and parent relationship between the nodes + // Note that some Dataset objects might return more than one DatasetOps + // e.g. MapDataset will return MapOp and ProjectOp if project_columns is set for MapDataset + for (auto child_op : *child_ops) { + RETURN_IF_NOT_OK(tree_->AssociateNode(child_op)); + RETURN_IF_NOT_OK(node_op->AddChild(child_op)); + node_op = child_op; + } + // Add the child and the last element of the returned DatasetOps (which is now the leaf node in our current + // execution tree) to the BFS queue + q.push(std::make_pair(child, child_ops->back())); + } + } + RETURN_IF_NOT_OK(tree_->AssignRoot(root_op)); + } + + // Launch the execution tree. + RETURN_IF_NOT_OK(tree_->Prepare()); + RETURN_IF_NOT_OK(tree_->Launch()); + iterator_ = std::make_unique(tree_); + RETURN_UNEXPECTED_IF_NULL(iterator_); + + return rc; +} + +} // namespace api +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/dataset/api/python_bindings.cc index aa9f7af046d..f4c4f7f41de 100644 --- a/mindspore/ccsrc/dataset/api/python_bindings.cc +++ b/mindspore/ccsrc/dataset/api/python_bindings.cc @@ -297,7 +297,7 @@ void bindTensor(py::module *m) { })) .def_buffer([](Tensor &tensor) { py::buffer_info info; - THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info)); + THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info)); return info; }) .def("__str__", &Tensor::ToString) @@ -311,7 +311,7 @@ void bindTensor(py::module *m) { return res; } py::buffer_info info; - THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info)); + THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info)); return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t); }); diff --git a/mindspore/ccsrc/dataset/api/samplers.cc b/mindspore/ccsrc/dataset/api/samplers.cc new file mode 100644 index 00000000000..44d01c2f0c2 --- /dev/null +++ b/mindspore/ccsrc/dataset/api/samplers.cc @@ -0,0 +1,224 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataset/include/samplers.h" +#include "dataset/engine/datasetops/source/sampler/sampler.h" +#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h" +#include "dataset/engine/datasetops/source/sampler/random_sampler.h" +#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h" +#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h" +#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h" +#include "dataset/engine/datasetops/source/sampler/pk_sampler.h" + +namespace mindspore { +namespace dataset { +namespace api { + +SamplerObj::SamplerObj() {} + +/// Function to create a Distributed Sampler. +std::shared_ptr DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle, + int64_t num_samples, uint32_t seed) { + auto sampler = std::make_shared(num_shards, shard_id, shuffle, num_samples, seed); + // Input validation + if (!sampler->ValidateParams()) { + return nullptr; + } + return sampler; +} + +/// Function to create a PK Sampler. +std::shared_ptr PKSampler(int64_t num_val, bool shuffle, int64_t num_samples) { + auto sampler = std::make_shared(num_val, shuffle, num_samples); + // Input validation + if (!sampler->ValidateParams()) { + return nullptr; + } + return sampler; +} + +/// Function to create a Random Sampler. +std::shared_ptr RandomSampler(bool replacement, int64_t num_samples) { + auto sampler = std::make_shared(replacement, num_samples); + // Input validation + if (!sampler->ValidateParams()) { + return nullptr; + } + return sampler; +} + +/// Function to create a Sequential Sampler. +std::shared_ptr SequentialSampler(int64_t start_index, int64_t num_samples) { + auto sampler = std::make_shared(start_index, num_samples); + // Input validation + if (!sampler->ValidateParams()) { + return nullptr; + } + return sampler; +} + +/// Function to create a Subset Random Sampler. +std::shared_ptr SubsetRandomSampler(const std::vector &indices, int64_t num_samples) { + auto sampler = std::make_shared(indices, num_samples); + // Input validation + if (!sampler->ValidateParams()) { + return nullptr; + } + return sampler; +} + +/// Function to create a Weighted Random Sampler. +std::shared_ptr WeightedRandomSampler(const std::vector &weights, int64_t num_samples, + bool replacement) { + auto sampler = std::make_shared(weights, num_samples, replacement); + // Input validation + if (!sampler->ValidateParams()) { + return nullptr; + } + return sampler; +} + +/* ####################################### Derived Sampler classes ################################# */ + +// DistributedSampler +DistributedSamplerObj::DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, + uint32_t seed) + : num_shards_(num_shards), shard_id_(shard_id), shuffle_(shuffle), num_samples_(num_samples), seed_(seed) {} + +bool DistributedSamplerObj::ValidateParams() { + if (num_shards_ <= 0) { + MS_LOG(ERROR) << "DistributedSampler: invalid num_shards: " << num_shards_; + return false; + } + + if (shard_id_ < 0 || shard_id_ >= num_shards_) { + MS_LOG(ERROR) << "DistributedSampler: invalid input, shard_id: " << shard_id_ << ", num_shards: " << num_shards_; + return false; + } + + if (num_samples_ < 0) { + MS_LOG(ERROR) << "DistributedSampler: invalid num_samples: " << num_samples_; + return false; + } + + return true; +} + +std::shared_ptr DistributedSamplerObj::Build() { + return std::make_shared(num_samples_, num_shards_, shard_id_, shuffle_, seed_); +} + +// PKSampler +PKSamplerObj::PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples) + : num_val_(num_val), shuffle_(shuffle), num_samples_(num_samples) {} + +bool PKSamplerObj::ValidateParams() { + if (num_val_ <= 0) { + MS_LOG(ERROR) << "PKSampler: invalid num_val: " << num_val_; + return false; + } + + if (num_samples_ < 0) { + MS_LOG(ERROR) << "PKSampler: invalid num_samples: " << num_samples_; + return false; + } + return true; +} + +std::shared_ptr PKSamplerObj::Build() { + return std::make_shared(num_samples_, num_val_, shuffle_); +} + +// RandomSampler +RandomSamplerObj::RandomSamplerObj(bool replacement, int64_t num_samples) + : replacement_(replacement), num_samples_(num_samples) {} + +bool RandomSamplerObj::ValidateParams() { + if (num_samples_ < 0) { + MS_LOG(ERROR) << "RandomSampler: invalid num_samples: " << num_samples_; + return false; + } + return true; +} + +std::shared_ptr RandomSamplerObj::Build() { + bool reshuffle_each_epoch = true; + auto sampler = std::make_shared(num_samples_, replacement_, reshuffle_each_epoch); + return sampler; +} + +// SequentialSampler +SequentialSamplerObj::SequentialSamplerObj(int64_t start_index, int64_t num_samples) + : start_index_(start_index), num_samples_(num_samples) {} + +bool SequentialSamplerObj::ValidateParams() { + if (num_samples_ < 0) { + MS_LOG(ERROR) << "SequentialSampler: invalid num_samples: " << num_samples_; + return false; + } + + if (start_index_ < 0) { + MS_LOG(ERROR) << "SequentialSampler: invalid start_index: " << start_index_; + return false; + } + + return true; +} + +std::shared_ptr SequentialSamplerObj::Build() { + auto sampler = std::make_shared(num_samples_, start_index_); + return sampler; +} + +// SubsetRandomSampler +SubsetRandomSamplerObj::SubsetRandomSamplerObj(const std::vector &indices, int64_t num_samples) + : indices_(indices), num_samples_(num_samples) {} + +bool SubsetRandomSamplerObj::ValidateParams() { + if (num_samples_ < 0) { + MS_LOG(ERROR) << "SubsetRandomSampler: invalid num_samples: " << num_samples_; + return false; + } + + return true; +} + +std::shared_ptr SubsetRandomSamplerObj::Build() { + auto sampler = std::make_shared(num_samples_, indices_); + return sampler; +} + +// WeightedRandomSampler +WeightedRandomSamplerObj::WeightedRandomSamplerObj(const std::vector &weights, int64_t num_samples, + bool replacement) + : weights_(weights), num_samples_(num_samples), replacement_(replacement) {} + +bool WeightedRandomSamplerObj::ValidateParams() { + if (num_samples_ < 0) { + MS_LOG(ERROR) << "WeightedRandomSampler: invalid num_samples: " << num_samples_; + return false; + } + return true; +} + +std::shared_ptr WeightedRandomSamplerObj::Build() { + auto sampler = std::make_shared(num_samples_, weights_, replacement_); + return sampler; +} + +} // namespace api +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/api/transforms.cc b/mindspore/ccsrc/dataset/api/transforms.cc new file mode 100644 index 00000000000..e0868374475 --- /dev/null +++ b/mindspore/ccsrc/dataset/api/transforms.cc @@ -0,0 +1,491 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataset/include/transforms.h" +#include "dataset/kernels/image/image_utils.h" +#include "dataset/kernels/image/normalize_op.h" +#include "dataset/kernels/image/decode_op.h" +#include "dataset/kernels/image/resize_op.h" +#include "dataset/kernels/image/random_crop_op.h" +#include "dataset/kernels/image/center_crop_op.h" +#include "dataset/kernels/image/uniform_aug_op.h" +#include "dataset/kernels/image/random_horizontal_flip_op.h" +#include "dataset/kernels/image/random_vertical_flip_op.h" +#include "dataset/kernels/image/random_rotation_op.h" +#include "dataset/kernels/image/cut_out_op.h" +#include "dataset/kernels/image/random_color_adjust_op.h" +#include "dataset/kernels/image/pad_op.h" + +namespace mindspore { +namespace dataset { +namespace api { + +TensorOperation::TensorOperation() {} + +// Transform operations for computer vision. +namespace vision { + +// Function to create NormalizeOperation. +std::shared_ptr Normalize(std::vector mean, std::vector std) { + auto op = std::make_shared(mean, std); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +// Function to create DecodeOperation. +std::shared_ptr Decode(bool rgb) { + auto op = std::make_shared(rgb); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +// Function to create ResizeOperation. +std::shared_ptr Resize(std::vector size, InterpolationMode interpolation) { + auto op = std::make_shared(size, interpolation); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +// Function to create RandomCropOperation. +std::shared_ptr RandomCrop(std::vector size, std::vector padding, + bool pad_if_needed, std::vector fill_value) { + auto op = std::make_shared(size, padding, pad_if_needed, fill_value); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +// Function to create CenterCropOperation. +std::shared_ptr CenterCrop(std::vector size) { + auto op = std::make_shared(size); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +// Function to create UniformAugOperation. +std::shared_ptr UniformAugment(std::vector> operations, + int32_t num_ops) { + auto op = std::make_shared(operations, num_ops); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +// Function to create RandomHorizontalFlipOperation. +std::shared_ptr RandomHorizontalFlip(float prob) { + auto op = std::make_shared(prob); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +// Function to create RandomVerticalFlipOperation. +std::shared_ptr RandomVerticalFlip(float prob) { + auto op = std::make_shared(prob); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +// Function to create RandomRotationOperation. +std::shared_ptr RandomRotation(std::vector degrees, InterpolationMode resample, + bool expand, std::vector center, + std::vector fill_value) { + auto op = std::make_shared(degrees, resample, expand, center, fill_value); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +// Function to create PadOperation. +std::shared_ptr Pad(std::vector padding, std::vector fill_value, + BorderType padding_mode) { + auto op = std::make_shared(padding, fill_value, padding_mode); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +// Function to create CutOutOp. +std::shared_ptr CutOut(int32_t length, int32_t num_patches) { + auto op = std::make_shared(length, num_patches); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +// Function to create RandomColorAdjustOperation. +std::shared_ptr RandomColorAdjust(std::vector brightness, + std::vector contrast, + std::vector saturation, std::vector hue) { + auto op = std::make_shared(brightness, contrast, saturation, hue); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + +/* ####################################### Derived TensorOperation classes ################################# */ + +// NormalizeOperation +NormalizeOperation::NormalizeOperation(std::vector mean, std::vector std) : mean_(mean), std_(std) {} + +bool NormalizeOperation::ValidateParams() { + if (mean_.size() != 3) { + MS_LOG(ERROR) << "Normalize: mean vector has incorrect size: " << mean_.size(); + return false; + } + + if (std_.size() != 3) { + MS_LOG(ERROR) << "Normalize: std vector has incorrect size: " << std_.size(); + return false; + } + + return true; +} + +std::shared_ptr NormalizeOperation::Build() { + return std::make_shared(mean_[0], mean_[1], mean_[2], std_[0], std_[1], std_[2]); +} + +// DecodeOperation +DecodeOperation::DecodeOperation(bool rgb) : rgb_(rgb) {} + +bool DecodeOperation::ValidateParams() { return true; } + +std::shared_ptr DecodeOperation::Build() { return std::make_shared(rgb_); } + +// ResizeOperation +ResizeOperation::ResizeOperation(std::vector size, InterpolationMode interpolation) + : size_(size), interpolation_(interpolation) {} + +bool ResizeOperation::ValidateParams() { + if (size_.empty() || size_.size() > 2) { + MS_LOG(ERROR) << "Resize: size vector has incorrect size: " << size_.size(); + return false; + } + return true; +} + +std::shared_ptr ResizeOperation::Build() { + int32_t height = size_[0]; + int32_t width = 0; + + // User specified the width value. + if (size_.size() == 2) { + width = size_[1]; + } + + return std::make_shared(height, width, interpolation_); +} + +// RandomCropOperation +RandomCropOperation::RandomCropOperation(std::vector size, std::vector padding, bool pad_if_needed, + std::vector fill_value) + : size_(size), padding_(padding), pad_if_needed_(pad_if_needed), fill_value_(fill_value) {} + +bool RandomCropOperation::ValidateParams() { + if (size_.empty() || size_.size() > 2) { + MS_LOG(ERROR) << "RandomCrop: size vector has incorrect size: " << size_.size(); + return false; + } + + if (padding_.empty() || padding_.size() != 4) { + MS_LOG(ERROR) << "RandomCrop: padding vector has incorrect size: padding.size()"; + return false; + } + + if (fill_value_.empty() || fill_value_.size() != 3) { + MS_LOG(ERROR) << "RandomCrop: fill_value vector has incorrect size: fill_value.size()"; + return false; + } + return true; +} + +std::shared_ptr RandomCropOperation::Build() { + int32_t crop_height = size_[0]; + int32_t crop_width = 0; + + int32_t pad_top = padding_[0]; + int32_t pad_bottom = padding_[1]; + int32_t pad_left = padding_[2]; + int32_t pad_right = padding_[3]; + + uint8_t fill_r = fill_value_[0]; + uint8_t fill_g = fill_value_[1]; + uint8_t fill_b = fill_value_[2]; + + // User has specified the crop_width value. + if (size_.size() == 2) { + crop_width = size_[1]; + } + + auto tensor_op = std::make_shared(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right, + BorderType::kConstant, pad_if_needed_, fill_r, fill_g, fill_b); + return tensor_op; +} + +// CenterCropOperation +CenterCropOperation::CenterCropOperation(std::vector size) : size_(size) {} + +bool CenterCropOperation::ValidateParams() { + if (size_.empty() || size_.size() > 2) { + MS_LOG(ERROR) << "CenterCrop: size vector has incorrect size."; + return false; + } + return true; +} + +std::shared_ptr CenterCropOperation::Build() { + int32_t crop_height = size_[0]; + int32_t crop_width = 0; + + // User has specified crop_width. + if (size_.size() == 2) { + crop_width = size_[1]; + } + + std::shared_ptr tensor_op = std::make_shared(crop_height, crop_width); + return tensor_op; +} + +// UniformAugOperation +UniformAugOperation::UniformAugOperation(std::vector> operations, int32_t num_ops) + : operations_(operations), num_ops_(num_ops) {} + +bool UniformAugOperation::ValidateParams() { return true; } + +std::shared_ptr UniformAugOperation::Build() { + std::vector> tensor_ops; + (void)std::transform(operations_.begin(), operations_.end(), std::back_inserter(tensor_ops), + [](std::shared_ptr op) -> std::shared_ptr { return op->Build(); }); + std::shared_ptr tensor_op = std::make_shared(tensor_ops, num_ops_); + return tensor_op; +} + +// RandomHorizontalFlipOperation +RandomHorizontalFlipOperation::RandomHorizontalFlipOperation(float probability) : probability_(probability) {} + +bool RandomHorizontalFlipOperation::ValidateParams() { return true; } + +std::shared_ptr RandomHorizontalFlipOperation::Build() { + std::shared_ptr tensor_op = std::make_shared(probability_); + return tensor_op; +} + +// RandomVerticalFlipOperation +RandomVerticalFlipOperation::RandomVerticalFlipOperation(float probability) : probability_(probability) {} + +bool RandomVerticalFlipOperation::ValidateParams() { return true; } + +std::shared_ptr RandomVerticalFlipOperation::Build() { + std::shared_ptr tensor_op = std::make_shared(probability_); + return tensor_op; +} + +// Function to create RandomRotationOperation. +RandomRotationOperation::RandomRotationOperation(std::vector degrees, InterpolationMode interpolation_mode, + bool expand, std::vector center, + std::vector fill_value) + : degrees_(degrees), + interpolation_mode_(interpolation_mode), + expand_(expand), + center_(center), + fill_value_(fill_value) {} + +bool RandomRotationOperation::ValidateParams() { + if (degrees_.empty() || degrees_.size() != 2) { + MS_LOG(ERROR) << "RandomRotation: degrees vector has incorrect size: degrees.size()"; + return false; + } + if (center_.empty() || center_.size() != 2) { + MS_LOG(ERROR) << "RandomRotation: center vector has incorrect size: center.size()"; + return false; + } + if (fill_value_.empty() || fill_value_.size() != 3) { + MS_LOG(ERROR) << "RandomRotation: fill_value vector has incorrect size: fill_value.size()"; + return false; + } + return true; +} + +std::shared_ptr RandomRotationOperation::Build() { + std::shared_ptr tensor_op = + std::make_shared(degrees_[0], degrees_[1], center_[0], center_[1], interpolation_mode_, expand_, + fill_value_[0], fill_value_[1], fill_value_[2]); + return tensor_op; +} + +// PadOperation +PadOperation::PadOperation(std::vector padding, std::vector fill_value, BorderType padding_mode) + : padding_(padding), fill_value_(fill_value), padding_mode_(padding_mode) {} + +bool PadOperation::ValidateParams() { + if (padding_.empty() || padding_.size() == 3 || padding_.size() > 4) { + MS_LOG(ERROR) << "Pad: padding vector has incorrect size: padding.size()"; + return false; + } + + if (fill_value_.empty() || (fill_value_.size() != 1 && fill_value_.size() != 3)) { + MS_LOG(ERROR) << "Pad: fill_value vector has incorrect size: fill_value.size()"; + return false; + } + return true; +} + +std::shared_ptr PadOperation::Build() { + int32_t pad_top, pad_bottom, pad_left, pad_right; + switch (padding_.size()) { + case 1: + pad_left = padding_[0]; + pad_top = padding_[0]; + pad_right = padding_[0]; + pad_bottom = padding_[0]; + break; + case 2: + pad_left = padding_[0]; + pad_top = padding_[1]; + pad_right = padding_[0]; + pad_bottom = padding_[1]; + break; + default: + pad_left = padding_[0]; + pad_top = padding_[1]; + pad_right = padding_[2]; + pad_bottom = padding_[3]; + } + uint8_t fill_r, fill_g, fill_b; + + fill_r = fill_value_[0]; + fill_g = fill_value_[0]; + fill_b = fill_value_[0]; + + if (fill_value_.size() == 3) { + fill_r = fill_value_[0]; + fill_g = fill_value_[1]; + fill_b = fill_value_[2]; + } + + std::shared_ptr tensor_op = + std::make_shared(pad_top, pad_bottom, pad_left, pad_right, padding_mode_, fill_r, fill_g, fill_b); + return tensor_op; +} + +// CutOutOperation +CutOutOperation::CutOutOperation(int32_t length, int32_t num_patches) : length_(length), num_patches_(num_patches) {} + +bool CutOutOperation::ValidateParams() { + if (length_ < 0) { + MS_LOG(ERROR) << "CutOut: length cannot be negative"; + return false; + } + if (num_patches_ < 0) { + MS_LOG(ERROR) << "CutOut: number of patches cannot be negative"; + return false; + } + return true; +} + +std::shared_ptr CutOutOperation::Build() { + std::shared_ptr tensor_op = std::make_shared(length_, length_, num_patches_, false, 0, 0, 0); + return tensor_op; +} + +// RandomColorAdjustOperation. +RandomColorAdjustOperation::RandomColorAdjustOperation(std::vector brightness, std::vector contrast, + std::vector saturation, std::vector hue) + : brightness_(brightness), contrast_(contrast), saturation_(saturation), hue_(hue) {} + +bool RandomColorAdjustOperation::ValidateParams() { + // Do some input validation. + if (brightness_.empty() || brightness_.size() > 2) { + MS_LOG(ERROR) << "RandomColorAdjust: brightness must be a vector of one or two values"; + return false; + } + if (contrast_.empty() || contrast_.size() > 2) { + MS_LOG(ERROR) << "RandomColorAdjust: contrast must be a vector of one or two values"; + return false; + } + if (saturation_.empty() || saturation_.size() > 2) { + MS_LOG(ERROR) << "RandomColorAdjust: saturation must be a vector of one or two values"; + return false; + } + if (hue_.empty() || hue_.size() > 2) { + MS_LOG(ERROR) << "RandomColorAdjust: hue must be a vector of one or two values"; + return false; + } + return true; +} + +std::shared_ptr RandomColorAdjustOperation::Build() { + float brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub; + + brightness_lb = brightness_[0]; + brightness_ub = brightness_[0]; + + if (brightness_.size() == 2) brightness_ub = brightness_[1]; + + contrast_lb = contrast_[0]; + contrast_ub = contrast_[0]; + + if (contrast_.size() == 2) contrast_ub = contrast_[1]; + + saturation_lb = saturation_[0]; + saturation_ub = saturation_[0]; + + if (saturation_.size() == 2) saturation_ub = saturation_[1]; + + hue_lb = hue_[0]; + hue_ub = hue_[0]; + + if (hue_.size() == 2) hue_ub = hue_[1]; + + std::shared_ptr tensor_op = std::make_shared( + brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub); + return tensor_op; +} + +} // namespace vision +} // namespace api +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/dataset/core/CMakeLists.txt b/mindspore/ccsrc/dataset/core/CMakeLists.txt index 27b9f0e13b8..bfe6e675631 100644 --- a/mindspore/ccsrc/dataset/core/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/core/CMakeLists.txt @@ -1,10 +1,6 @@ -ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto) -ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto) file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) -add_library(core OBJECT - ${EXAMPLE_SRCS} - ${FEATURE_SRCS} +set(DATASET_CORE_SRC_FILES client.cc config_manager.cc cv_tensor.cc @@ -13,6 +9,13 @@ add_library(core OBJECT tensor.cc tensor_row.cc tensor_shape.cc - ) +) + +ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto) +ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto) +add_library(core OBJECT ${DATASET_CORE_SRC_FILES} ${EXAMPLE_SRCS} ${FEATURE_SRCS}) add_dependencies(core mindspore::protobuf) -target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS}) + +if (ENABLE_PYTHON) + target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS}) +endif() diff --git a/mindspore/ccsrc/dataset/core/client.h b/mindspore/ccsrc/dataset/core/client.h index a10cb4596ea..96553c9169d 100644 --- a/mindspore/ccsrc/dataset/core/client.h +++ b/mindspore/ccsrc/dataset/core/client.h @@ -25,21 +25,25 @@ #include "dataset/core/tensor_shape.h" #include "dataset/engine/data_schema.h" #include "dataset/engine/dataset_iterator.h" +#include "dataset/engine/datasetops/source/mindrecord_op.h" +#include "dataset/engine/datasetops/source/tf_reader_op.h" + +#ifdef ENABLE_PYTHON #include "dataset/engine/datasetops/barrier_op.h" -#include "dataset/engine/datasetops/batch_op.h" +#include "dataset/engine/datasetops/filter_op.h" +#include "dataset/engine/datasetops/source/generator_op.h" #include "dataset/engine/datasetops/build_vocab_op.h" +#endif + +#include "dataset/engine/datasetops/batch_op.h" #include "dataset/engine/datasetops/dataset_op.h" #include "dataset/engine/datasetops/device_queue_op.h" #include "dataset/engine/datasetops/map_op.h" #include "dataset/engine/datasetops/project_op.h" #include "dataset/engine/datasetops/rename_op.h" -#include "dataset/engine/datasetops/filter_op.h" #include "dataset/engine/datasetops/repeat_op.h" #include "dataset/engine/datasetops/skip_op.h" #include "dataset/engine/datasetops/shuffle_op.h" -#include "dataset/engine/datasetops/source/generator_op.h" -#include "dataset/engine/datasetops/source/mindrecord_op.h" -#include "dataset/engine/datasetops/source/tf_reader_op.h" #include "dataset/engine/datasetops/take_op.h" #include "dataset/engine/datasetops/zip_op.h" #include "dataset/engine/datasetops/concat_op.h" diff --git a/mindspore/ccsrc/dataset/core/constants.h b/mindspore/ccsrc/dataset/core/constants.h index 34d2f2583c1..c85ef52bf5f 100644 --- a/mindspore/ccsrc/dataset/core/constants.h +++ b/mindspore/ccsrc/dataset/core/constants.h @@ -32,6 +32,12 @@ enum class DatasetType { kUnknown, kArrow, kTf }; // Possible flavours of Tensor implementations enum class TensorImpl { kNone, kFlexible, kCv, kNP }; +// Possible values for Border types +enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 }; + +// Possible interpolation modes +enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 }; + // convenience functions for 32bit int bitmask inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; } diff --git a/mindspore/ccsrc/dataset/core/data_type.cc b/mindspore/ccsrc/dataset/core/data_type.cc index bb10fae52f5..dd97c10bae6 100644 --- a/mindspore/ccsrc/dataset/core/data_type.cc +++ b/mindspore/ccsrc/dataset/core/data_type.cc @@ -14,11 +14,12 @@ * limitations under the License. */ #include "dataset/core/data_type.h" +#ifdef ENABLE_PYTHON +#include "dataset/core/pybind_support.h" +#endif #include "utils/log_adapter.h" -#include "dataset/core/pybind_support.h" - namespace mindspore { namespace dataset { @@ -29,12 +30,14 @@ uint8_t DataType::SizeInBytes() const { return 0; } +#ifdef ENABLE_PYTHON py::dtype DataType::AsNumpyType() const { if (type_ < DataType::NUM_OF_TYPES) return py::dtype(kTypeInfo[type_].pybindType_); else return py::dtype("unknown"); } +#endif uint8_t DataType::AsCVType() const { uint8_t res = kCVInvalidType; @@ -112,6 +115,7 @@ std::string DataType::ToString() const { return "unknown"; } +#ifdef ENABLE_PYTHON DataType DataType::FromNpArray(const py::array &arr) { if (py::isinstance>(arr)) { return DataType(DataType::DE_BOOL); @@ -156,6 +160,7 @@ std::string DataType::GetPybindFormat() const { } return res; } +#endif } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/core/data_type.h b/mindspore/ccsrc/dataset/core/data_type.h index a487f3300e7..e15b6ed272b 100644 --- a/mindspore/ccsrc/dataset/core/data_type.h +++ b/mindspore/ccsrc/dataset/core/data_type.h @@ -19,14 +19,16 @@ #include #include - +#ifdef ENABLE_PYTHON #include "pybind11/numpy.h" #include "pybind11/pybind11.h" - -#include "dataset/core/constants.h" #include "dataset/core/pybind_support.h" - namespace py = pybind11; +#else +#include "Eigen/Core" +using float16 = Eigen::half; +#endif +#include "dataset/core/constants.h" namespace mindspore { namespace dataset { @@ -59,6 +61,7 @@ class DataType { const uint8_t cvType_; // OpenCv matching type }; +#ifdef ENABLE_PYTHON static inline const TypeInfo kTypeInfo[] = { // name, sizeInBytes, pybindTypem formatDescriptor, openCV {"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN @@ -76,19 +79,38 @@ class DataType { {"float64", 8, "double", py::format_descriptor::format(), CV_64F}, // DE_FLOAT64 {"string", 0, "bytes", "S", kCVInvalidType} // DE_STRING }; +#else + static inline const TypeInfo kTypeInfo[] = { + // name, sizeInBytes, pybindTypem formatDescriptor, openCV + {"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN + {"bool", 1, "bool", "", CV_8U}, // DE_BOOL + {"int8", 1, "int8", "", CV_8S}, // DE_INT8 + {"uint8", 1, "uint8", "", CV_8U}, // DE_UINT8 + {"int16", 2, "int16", "", CV_16S}, // DE_INT16 + {"uint16", 2, "uint16", "", CV_16U}, // DE_UINT16 + {"int32", 4, "int32", "", CV_32S}, // DE_INT32 + {"uint32", 4, "uint32", "", kCVInvalidType}, // DE_UINT32 + {"int64", 8, "int64", "", kCVInvalidType}, // DE_INT64 + {"uint64", 8, "uint64", "", kCVInvalidType}, // DE_UINT64 + {"float16", 2, "float16", "", CV_16F}, // DE_FLOAT16 + {"float32", 4, "float32", "", CV_32F}, // DE_FLOAT32 + {"float64", 8, "double", "", CV_64F}, // DE_FLOAT64 + {"string", 0, "bytes", "", kCVInvalidType} // DE_STRING + }; +#endif // No arg constructor to create an unknown shape DataType() : type_(DE_UNKNOWN) {} // Create a type from a given string - // @param type_str + /// \param type_str explicit DataType(const std::string &type_str); // Default destructor ~DataType() = default; // Create a type from a given enum - // @param d + /// \param d constexpr explicit DataType(Type d) : type_(d) {} constexpr bool operator==(const DataType a) const { return type_ == a.type_; } @@ -100,49 +122,49 @@ class DataType { constexpr bool operator!=(const Type a) const { return type_ != a; } // Disable this usage `if(d)` where d is of type DataType - // @return + /// \return operator bool() = delete; // To be used in Switch/case - // @return + /// \return operator Type() const { return type_; } // The number of bytes needed to store one value of this type - // @return + /// \return uint8_t SizeInBytes() const; // Convert from DataType to OpenCV type - // @return + /// \return uint8_t AsCVType() const; // Convert from OpenCV type to DataType - // @param cv_type - // @return + /// \param cv_type + /// \return static DataType FromCVType(int cv_type); // Returns a string representation of the type - // @return + /// \return std::string ToString() const; // returns true if the template type is the same as the Tensor type_ - // @tparam T - // @return true or false + /// \tparam T + /// \return true or false template bool IsCompatible() const { return type_ == FromCType(); } // returns true if the template type is the same as the Tensor type_ - // @tparam T - // @return true or false + /// \tparam T + /// \return true or false template bool IsLooselyCompatible() const; // << Stream output operator overload - // @notes This allows you to print the info using stream operators - // @param out - reference to the output stream being overloaded - // @param rO - reference to the DataType to display - // @return - the output stream must be returned + /// \notes This allows you to print the info using stream operators + /// \param out - reference to the output stream being overloaded + /// \param rO - reference to the DataType to display + /// \return - the output stream must be returned friend std::ostream &operator<<(std::ostream &out, const DataType &so) { out << so.ToString(); return out; @@ -151,22 +173,24 @@ class DataType { template static DataType FromCType(); +#ifdef ENABLE_PYTHON // Convert from DataType to Pybind type - // @return + /// \return py::dtype AsNumpyType() const; // Convert from NP type to DataType - // @param type - // @return + /// \param type + /// \return static DataType FromNpType(const py::dtype &type); // Convert from NP array to DataType - // @param py array - // @return + /// \param py array + /// \return static DataType FromNpArray(const py::array &arr); +#endif // Get the buffer string format of the current type. Used in pybind buffer protocol. - // @return + /// \return std::string GetPybindFormat() const; bool IsSignedInt() const { diff --git a/mindspore/ccsrc/dataset/core/tensor.cc b/mindspore/ccsrc/dataset/core/tensor.cc index ce5aaa5d658..eda52398525 100644 --- a/mindspore/ccsrc/dataset/core/tensor.cc +++ b/mindspore/ccsrc/dataset/core/tensor.cc @@ -28,10 +28,12 @@ #include "dataset/core/constants.h" #include "dataset/core/cv_tensor.h" #include "dataset/core/global_context.h" +#ifdef ENABLE_PYTHON #include "dataset/core/pybind_support.h" +namespace py = pybind11; +#endif #include "dataset/core/tensor_shape.h" -namespace py = pybind11; namespace mindspore { namespace dataset { // Helper macros for printing tensor elements @@ -155,6 +157,7 @@ Tensor::Tensor(const std::vector &strings, const TensorShape &shape MS_ASSERT(num_bytes == 0); if (shape.known()) Tensor::Reshape(shape); } + Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape) : Tensor(TensorShape({static_cast(bytes_list.value_size())}), DataType(DataType::DE_STRING)) { // total bytes needed = offset array + strings @@ -194,6 +197,7 @@ Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape MS_ASSERT(num_bytes == 0); if (shape.known()) Tensor::Reshape(shape); } + Status Tensor::CreateTensor(std::shared_ptr *ptr, TensorImpl tensor_impl, const TensorShape &shape, DataType type, const unsigned char *data) { if (!shape.known()) { @@ -223,6 +227,7 @@ Status Tensor::CreateTensor(std::shared_ptr *ptr, TensorImpl tensor_impl return Status::OK(); // returns base-class shared_ptr } +#ifdef ENABLE_PYTHON Status Tensor::CreateTensorFromNumpyString(std::shared_ptr *ptr, py::array arr) { std::vector shape; for (dsize_t i = 0; i < arr.ndim(); i++) { @@ -297,6 +302,7 @@ Status Tensor::CreateTensor(std::shared_ptr *ptr, py::array arr) { return Status::OK(); // returns base-class shared_ptr } +#endif Status Tensor::CreateTensor(std::shared_ptr *ptr, const std::vector &strings, const TensorShape &shape) { @@ -698,21 +704,24 @@ std::vector Tensor::Strides() { return strides; } -Status Tensor::GetBufferInfo(Tensor &t, py::buffer_info *out) { - CHECK_FAIL_RETURN_UNEXPECTED(t.type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings."); +#ifdef ENABLE_PYTHON +Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) { + RETURN_UNEXPECTED_IF_NULL(t); + CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings."); - std::string format_desc = t.type().GetPybindFormat(); + std::string format_desc = t->type().GetPybindFormat(); if (format_desc.empty()) { RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format"); } - *out = py::buffer_info(t.GetMutableBuffer(), /* Pointer to buffer */ - t.type().SizeInBytes(), /* Size of one scalar */ - format_desc, /* Python struct-style format descriptor */ - t.Rank(), /* Number of dimensions */ - t.shape().AsVector(), /* Buffer dimensions */ - t.Strides()); + *out = py::buffer_info(t->GetMutableBuffer(), /* Pointer to buffer */ + t->type().SizeInBytes(), /* Size of one scalar */ + format_desc, /* Python struct-style format descriptor */ + t->Rank(), /* Number of dimensions */ + t->shape().AsVector(), /* Buffer dimensions */ + t->Strides()); return Status::OK(); } +#endif template Status Tensor::GetItemAt(T *o, const std::vector &index) const { @@ -752,6 +761,8 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector &index) o->swap(sv); return Status::OK(); } + +#ifdef ENABLE_PYTHON // return data as numpy, should return status Status Tensor::GetDataAsNumpy(py::array *data) { RETURN_UNEXPECTED_IF_NULL(data_); @@ -815,6 +826,7 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) { data_allocator_->deallocate(reinterpret_cast(tmp_data)); return Status::OK(); } +#endif void Tensor::Squeeze() { shape_ = shape_.Squeeze(); } diff --git a/mindspore/ccsrc/dataset/core/tensor.h b/mindspore/ccsrc/dataset/core/tensor.h index 899098faaf6..337535a2c33 100644 --- a/mindspore/ccsrc/dataset/core/tensor.h +++ b/mindspore/ccsrc/dataset/core/tensor.h @@ -26,20 +26,27 @@ #undef HAVE_STDDEF_H #undef HAVE_STDLIB_H #endif + +#ifdef ENABLE_PYTHON #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" +#endif + #include "dataset/core/constants.h" #include "dataset/core/data_type.h" #include "dataset/core/tensor_shape.h" -#include "dataset/util/allocator.h" #include "dataset/util/status.h" #include "proto/example.pb.h" +#ifdef ENABLE_PYTHON namespace py = pybind11; +#endif namespace mindspore { namespace dataset { class Tensor; +template +class Allocator; using CharAllocPtr = std::unique_ptr>; using TensorAllocPtr = std::shared_ptr>; // An allocator shared_ptr for Tensors @@ -114,16 +121,17 @@ class Tensor { static Status CreateTensor(std::shared_ptr *, TensorImpl tensor_impl, const TensorShape &shape, DataType type, const unsigned char *data = nullptr); - /// Create a copy of the input tensor - /// \param out [out] output tensor to be generated - /// \param in [in] orginal tensor to be copied - /// \return Status + // Create a copy of the input tensor + // @param out [out] output tensor to be generated + // @param in [in] orginal tensor to be copied + // @return Status static Status CreateTensor(std::shared_ptr *out, const std::shared_ptr &in) { const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); *out = std::allocate_shared(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes()); return Status::OK(); } +#ifdef ENABLE_PYTHON // A static factory method to create a Tensor from a given py::array. // @param ptr output argument to hold the created Tensor // @param arr py::array @@ -132,6 +140,7 @@ class Tensor { // Helper function to create a tensor from Numpy of strings static Status CreateTensorFromNumpyString(std::shared_ptr *ptr, py::array arr); +#endif // A static factory method to create a Tensor from a given list of strings. // @param ptr output argument to hold the created Tensor @@ -170,6 +179,7 @@ class Tensor { static Status CreateTensor(std::shared_ptr *ptr, const T &item) { return CreateTensor(ptr, {item}, TensorShape::CreateScalar()); } + // Create tensor from protobuf bytelist with uint8 or int8 types static Status CreateTensor(std::shared_ptr *ptr, const dataengine::BytesList &bytes_list, const TensorShape &shape, const DataType &type, dsize_t pad_size); @@ -346,12 +356,12 @@ class Tensor { virtual void Squeeze(); - /// Calculates the strides of the Tensor - /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) - /// The strides will be {6,2,1}. - /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) - /// The strides will be {24,8,4}. - /// @return vector of integers + // Calculates the strides of the Tensor + // Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) + // The strides will be {6,2,1}. + // Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) + // The strides will be {24,8,4}. + // @return vector of integers std::vector Strides(); std::string ToString() { @@ -376,6 +386,7 @@ class Tensor { // Slice string tensors Status SliceString(std::shared_ptr *out, const std::vector &indices); +#ifdef ENABLE_PYTHON // Constructs numpy array from input tensor // @param data this data is the location of python data // @return Status code @@ -383,7 +394,8 @@ class Tensor { Status GetDataAsNumpyStrings(py::array *data); - static Status GetBufferInfo(Tensor &t, py::buffer_info *out); + static Status GetBufferInfo(Tensor *t, py::buffer_info *out); +#endif // Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor Status Concatenate(const std::vector &index, const std::shared_ptr &input); @@ -570,7 +582,7 @@ class Tensor { // Return a TensorIterator that points to the start of the Tensor. // It's the user responsibility to use the correct type that matches the Tensor type - // @tparam T The type of values in the Tensor + // @param T The type of values in the Tensor // @return TensorIterator template TensorIterator begin() { diff --git a/mindspore/ccsrc/dataset/core/tensor_row.cc b/mindspore/ccsrc/dataset/core/tensor_row.cc index 882f6728bfb..930608d1083 100644 --- a/mindspore/ccsrc/dataset/core/tensor_row.cc +++ b/mindspore/ccsrc/dataset/core/tensor_row.cc @@ -18,7 +18,6 @@ #include "dataset/core/tensor_row.h" -namespace py = pybind11; namespace mindspore { namespace dataset { diff --git a/mindspore/ccsrc/dataset/core/tensor_shape.cc b/mindspore/ccsrc/dataset/core/tensor_shape.cc index a0d6b9cd8d7..953b9dfc9f6 100644 --- a/mindspore/ccsrc/dataset/core/tensor_shape.cc +++ b/mindspore/ccsrc/dataset/core/tensor_shape.cc @@ -77,6 +77,7 @@ TensorShape::TensorShape(const TensorShape &shape) known_ = shape.known_; // override with the input shape in case of unknown-rank tensor shape. } +#ifdef ENABLE_PYTHON TensorShape::TensorShape(py::list l) : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) { std::vector list_c; @@ -89,6 +90,7 @@ TensorShape::TensorShape(py::list l) } AddListToShape(list_c); } +#endif TensorShape::TensorShape(cv::MatSize cv_size, uint32_t type) : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) { @@ -197,6 +199,7 @@ TensorShape TensorShape::AppendDim(dsize_t dim) const { return TensorShape(vec); } +#ifdef ENABLE_PYTHON py::list TensorShape::AsPyList() { py::list list; for (auto i : raw_shape_) { @@ -204,6 +207,7 @@ py::list TensorShape::AsPyList() { } return list; } +#endif TensorShape TensorShape::Squeeze() const { std::vector new_shape; diff --git a/mindspore/ccsrc/dataset/core/tensor_shape.h b/mindspore/ccsrc/dataset/core/tensor_shape.h index c83e43cd7dc..3d2681271ab 100644 --- a/mindspore/ccsrc/dataset/core/tensor_shape.h +++ b/mindspore/ccsrc/dataset/core/tensor_shape.h @@ -24,13 +24,16 @@ #include +#ifdef ENABLE_PYTHON #include "pybind11/pybind11.h" +namespace py = pybind11; +#endif #include "dataset/core/constants.h" +#include "dataset/util/status.h" #include "dataset/core/global_context.h" #include "dataset/util/allocator.h" -namespace py = pybind11; namespace mindspore { namespace dataset { // Class that represents a shape of a Tensor. A shape can be: @@ -43,7 +46,8 @@ namespace dataset { // -# one or more dim is unknown --> not empty vector --> where di is unknown\n // Example: <3,?> (the 1st dim is unknown)\n // <2,?,?,?> (all dims but the 0th dim are unknown) -// TensorShape supports any dim > 0 and < 2^31-1 + +/// \brief TensorShape supports any dim > 0 and < 2^31-1 class TensorShape { public: static constexpr dsize_t kDimUnknown = -1; // constant for an unknown dimension @@ -51,57 +55,59 @@ class TensorShape { // Force the compiler to not create a no-arg constructor TensorShape() = delete; - // Create a Shape from an initialization list (e.g., TensorShape s = {2,2}). - // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown - // @param list + /// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}). + /// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown + /// \param[in] list explicit TensorShape(const std::initializer_list &list); - // Create a Shape from a vector (e.g., TensorShape s = std::vector({2,2}) ). - // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown - // @param list + /// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector({2,2}) ). + /// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown + /// \param[in] list explicit TensorShape(const std::vector &list); - // Copy constructor - // @param shape + /// \brief Copy constructor + /// \param[in] shape TensorShape(const TensorShape &shape); - // construct a TensorShape via a python list - // @param py::list l - a list object from python +#ifdef ENABLE_PYTHON + /// \brief construct a TensorShape via a python list + /// \param[in] py::list l - a list object from python explicit TensorShape(py::list l); +#endif ~TensorShape() = default; - // Create a scalar Shape (i.e., empty shape with mKnown = true) - // @return TensorShape + /// \brief Create a scalar Shape (i.e., empty shape with mKnown = true) + /// \return TensorShape static TensorShape CreateScalar() { return TensorShape({}); } - // Create a shape with an unknown rank. - // @return TensorShape + /// \brief Create a shape with an unknown rank. + /// \return TensorShape static TensorShape CreateUnknownRankShape(); - // Create a shape with a known rank . - // @return TensorShape + /// \brief Create a shape with a known rank . + /// \return TensorShape static TensorShape CreateUnknownShapeWithRank(dsize_t rank); - // Insert a new dim into a copy of the current shape. - // @param dim to be added - // @param axis the index where dim should be added - // @return New modified shape + /// \brief Insert a new dim into a copy of the current shape. + /// \param[in] dim to be added + /// \param[in] axis the index where dim should be added + /// \return New modified shape TensorShape InsertDim(dsize_t axis, dsize_t dim) const; - // Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4> - // @param dim - // @return + /// \brief Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4> + /// \param[in] dim + /// \return TensorShape PrependDim(dsize_t dim) const; - // Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4> - // @param dim - // @return + /// \brief Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4> + /// \param[in] dim + /// \return TensorShape AppendDim(dsize_t dim) const; - // Create a shape based on OpenCV shape and type - // @param cv_size - // @param type int that represent the type in OpenCV, example CV_8U, CV_64S + /// \brief Create a shape based on OpenCV shape and type + /// \param[in] cv_size + /// \param[in] type int that represent the type in OpenCV, example CV_8U, CV_64S TensorShape(cv::MatSize cv_size, uint32_t type); dsize_t Size() const { return raw_shape_.size(); } @@ -123,47 +129,50 @@ class TensorShape { return raw_shape_[index]; } - // Return the Shape as a vector - // @return + /// \brief Return the Shape as a vector + /// \return std::vector AsVector() const; - // Returns the class info as a string - // @return + /// \brief Returns the class info as a string + /// \return std::string ToString() const { std::stringstream ss; ss << *this; return ss.str(); } - // Actual print function used by operator<< - // @param out output string stream + /// \brief Actual print function used by operator<< + /// \param out output string stream void Print(std::ostream &out) const; - // << Stream output operator overload - // @notes This allows you to print the info using stream operators - // @param out - reference to the output stream being overloaded - // @param rO - reference to the TensorShape to display - // @return - the output stream must be returned + /// \brief << Stream output operator overload + /// This allows you to print the info using stream operators + /// \param[in] out - reference to the output stream being overloaded + /// \param[in] rO - reference to the TensorShape to display + /// \return - the output stream must be returned friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) { so.Print(out); return out; } +#ifdef ENABLE_PYTHON py::list AsPyList(); +#endif - // Checks if the given index is a valid index for this tensor. - // For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not. - // @param index - // @return bool + /// \brief Checks if the given index is a valid index for this tensor. + /// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not. + /// \param[in] index + /// \return bool bool IsValidIndex(const std::vector &index) const; TensorShape Squeeze() const; std::vector Strides() const; - // Returns the location of the item assuming row major memory layout. - // @param index - // @return + /// \brief Returns the location of the item assuming row major memory layout. + /// \param[in] index + /// \param[out] flat_index + /// \return Status ToFlatIndex(const std::vector &index, dsize_t *flat_index) const; private: @@ -174,11 +183,11 @@ class TensorShape { // Vector to keep the strides of the shape. The size is rank+1 std::vector strides_; - // Internal utility function to iterate over a list, check if the dim is valid and then insert it into the shape. - // @tparam T list - // @param list Iterable list - // @return true if the shape is valid and no overflow would be generated when counting the number of elements. - // False otherwise. + /// \brief Internal utility function to iterate over a list, + /// check if the dim is valid and then insert it into the shape. + /// \param[in] list Iterable list + /// \return true if the shape is valid and no overflow would be generated when counting the number of elements. + /// False otherwise. template void AddListToShape(const T &list); }; diff --git a/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt index ed574210304..2dbdb82d26b 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt @@ -2,13 +2,12 @@ add_subdirectory(source) file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) -add_library(engine-datasetops OBJECT + +set(DATASET_ENGINE_DATASETOPS_SRC_FILES dataset_op.cc parallel_op.cc pipeline_op.cc - barrier_op.cc batch_op.cc - bucket_batch_by_length_op.cc device_queue_op.cc map_op.cc project_op.cc @@ -18,8 +17,18 @@ add_library(engine-datasetops OBJECT take_op.cc shuffle_op.cc zip_op.cc - concat_op.cc - filter_op.cc - build_vocab_op.cc + concat_op.cc ) +if (ENABLE_PYTHON) + set(DATASET_ENGINE_DATASETOPS_SRC_FILES + ${DATASET_ENGINE_DATASETOPS_SRC_FILES} + bucket_batch_by_length_op.cc + barrier_op.cc + filter_op.cc + build_vocab_op.cc + ) +endif() + +add_library(engine-datasetops OBJECT ${DATASET_ENGINE_DATASETOPS_SRC_FILES}) + diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc index 8bfa8c287c0..93b48640405 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc @@ -19,7 +19,9 @@ #include #include "common/utils.h" +#ifdef ENABLE_PYTHON #include "dataset/core/pybind_support.h" +#endif #include "dataset/engine/data_buffer.h" #include "dataset/engine/db_connector.h" #include "dataset/engine/opt/pass.h" @@ -38,9 +40,14 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa Status BatchOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); +#ifdef ENABLE_PYTHON *ptr = std::make_shared(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_, builder_num_workers_, builder_cols_to_map_, builder_batch_size_func_, builder_batch_map_func_, builder_pad_map_); +#else + *ptr = std::make_shared(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_, + builder_num_workers_, builder_cols_to_map_, builder_pad_map_); +#endif return Status::OK(); } @@ -52,6 +59,7 @@ Status BatchOp::Builder::SanityCheck() { return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err)); } +#ifdef ENABLE_PYTHON BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, const std::vector &cols_to_map, py::function batch_size_func, py::function batch_map_func, PadInfo pad_map) @@ -65,6 +73,18 @@ BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, pad_info_(pad_map) { worker_queues_.Init(num_workers, op_queue_size); } +#else +BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, + const std::vector &cols_to_map, PadInfo pad_map) + : ParallelOp(num_workers, op_queue_size), + start_batch_size_(batch_size), + drop_(drop), + pad_(pad), + pyfunc_column_names_(cols_to_map), + pad_info_(pad_map) { + worker_queues_.Init(num_workers, op_queue_size); +} +#endif Status BatchOp::operator()() { Status rc = LaunchThreadsAndInitOp(); @@ -206,7 +226,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) { Status BatchOp::MakeBatchedBuffer(std::pair, CBatchInfo> table_pair, std::unique_ptr *db) { RETURN_UNEXPECTED_IF_NULL(table_pair.first); - if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc +#ifdef ENABLE_PYTHON + if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc +#endif if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair.first, pad_info_, column_name_id_map_)); // do padding if needed (*db) = std::make_unique(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone); std::unique_ptr dest_table = std::make_unique(); @@ -229,6 +251,7 @@ Status BatchOp::EoeReceived(int32_t) { return Status::OK(); } +#ifdef ENABLE_PYTHON Status BatchOp::MapColumns(std::pair, CBatchInfo> *table_pair) { TensorBatchTable input_table; input_table.reserve(pyfunc_column_names_.size()); @@ -259,16 +282,22 @@ Status BatchOp::MapColumns(std::pair, CBatchInfo> } return Status::OK(); } +#endif Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) { +#ifdef ENABLE_PYTHON if (batch_size_func_ != nullptr) { RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info)); } else { (*batch_size) = start_batch_size_; } +#else + (*batch_size) = start_batch_size_; +#endif return Status::OK(); } +#ifdef ENABLE_PYTHON Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { { // Acquire Python GIL @@ -336,6 +365,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou } return Status(StatusCode::kOK); } +#endif Status BatchOp::PadColumns(std::unique_ptr *table, const PadInfo &pad_info, const std::unordered_map &column_name_id_map) { diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h index 28df5e7e817..acf2e5a0c02 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h @@ -89,6 +89,7 @@ class BatchOp : public ParallelOp { return *this; } +#ifdef ENABLE_PYTHON // set columns to perform map on // @param const std::vector & cols_to_map - name of columns to perform map on // @return Builder & reference to builder class object @@ -104,6 +105,7 @@ class BatchOp : public ParallelOp { builder_batch_size_func_ = batch_size_func; return *this; } +#endif // @param std::shared_ptr *ptr pointer to shared_ptr, actual return arg // @return Status - The error code return @@ -121,8 +123,10 @@ class BatchOp : public ParallelOp { int32_t builder_op_connector_size_; std::vector builder_cols_to_map_; PadInfo builder_pad_map_; +#ifdef ENABLE_PYTHON py::function builder_batch_size_func_; py::function builder_batch_map_func_; +#endif }; enum batchCtrl : int8_t { kNoCtrl = 0, kEOE = 1, kEOF = 2, kQuit = 3 }; @@ -144,6 +148,7 @@ class BatchOp : public ParallelOp { const int64_t get_epoch_num() const { return epoch_num_; } }; +#ifdef ENABLE_PYTHON // BatchOp constructor // @param int32_t batch_size // @param bool drop @@ -152,6 +157,10 @@ class BatchOp : public ParallelOp { // @param int32_t num_workers BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, const std::vector &, py::function batch_size_func, py::function batch_map_func, PadInfo pad_map); +#else + BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers, + const std::vector &, PadInfo pad_map); +#endif // BatchOp destructor ~BatchOp() {} @@ -219,10 +228,13 @@ class BatchOp : public ParallelOp { // @return Status - The error code return Status MakeBatchedBuffer(std::pair, CBatchInfo> table_pair, std::unique_ptr *db); + +#ifdef ENABLE_PYTHON // Function that calls pyfunc to perform map on batch // @param (std::pair, batch_stats> *table_pair - contains un-batched tensor // @return Status - The error code return Status MapColumns(std::pair, CBatchInfo> *table_pair); +#endif // @param const PadInfo &pad_info pad info to unpack // @param const std::unordered_map& column_name_id_map - column names to index mapping @@ -247,6 +259,7 @@ class BatchOp : public ParallelOp { // @return Status - The error code return Status LaunchThreadsAndInitOp(); +#ifdef ENABLE_PYTHON // Invoke batch size function with current BatchInfo to generate batch size. // @return Status - The error code return Status InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info); @@ -254,6 +267,7 @@ class BatchOp : public ParallelOp { // Invoke batch map function with current BatchInfo to generate tensors to batch. // @return Status - The error code return Status InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info); +#endif int32_t start_batch_size_; bool drop_; // bool for whether to drop remainder or not @@ -262,8 +276,10 @@ class BatchOp : public ParallelOp { PadInfo pad_info_; // column names to perform padding on std::unique_ptr child_iterator_; // child iterator for fetching TensorRows 1 by 1 QueueList, CBatchInfo>> worker_queues_; // internal queue for syncing worker +#ifdef ENABLE_PYTHON py::function batch_size_func_; // Function pointer of batch size function py::function batch_map_func_; // Function pointer of per batch map function +#endif }; } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt index b78ddcd87bc..389e3f5af6d 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt @@ -1,19 +1,32 @@ add_subdirectory(sampler) file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) -add_library(engine-datasetops-source OBJECT - generator_op.cc + +set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES io_block.cc - mindrecord_op.cc - tf_reader_op.cc image_folder_op.cc mnist_op.cc - voc_op.cc coco_op.cc - manifest_op.cc cifar_op.cc random_data_op.cc celeba_op.cc text_file_op.cc clue_op.cc - ) \ No newline at end of file + ) + +set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES + ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES} + mindrecord_op.cc + tf_reader_op.cc + ) + +if (ENABLE_PYTHON) + set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES + ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES} + generator_op.cc + voc_op.cc + manifest_op.cc + ) +endif() + +add_library(engine-datasetops-source OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}) \ No newline at end of file diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt index 5209d9ba4ad..1335d987e8b 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt @@ -1,12 +1,21 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) -add_library(engine-datasetops-source-sampler OBJECT + +set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES distributed_sampler.cc pk_sampler.cc - python_sampler.cc random_sampler.cc sampler.cc sequential_sampler.cc subset_random_sampler.cc weighted_random_sampler.cc ) + +if (ENABLE_PYTHON) + set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES + ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES} + python_sampler.cc + ) +endif() + +add_library(engine-datasetops-source-sampler OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES}) diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc index 1584166dc31..5f0ffd88558 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc @@ -89,6 +89,7 @@ void Sampler::Print(std::ostream &out, bool show_all) const { } } +#ifdef ENABLE_PYTHON Status Sampler::GetAllIdsThenReset(py::array *data) { std::unique_ptr db; std::shared_ptr sample_ids; @@ -120,6 +121,7 @@ Status Sampler::GetAllIdsThenReset(py::array *data) { RETURN_IF_NOT_OK(ResetSampler()); return Status::OK(); } +#endif Status Sampler::SetNumSamples(int64_t num_samples) { CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "num_samples is negative"); diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h index 34c3cb79358..d9da777a48c 100644 --- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h +++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h @@ -74,8 +74,11 @@ class Sampler { // @return - The error code return virtual Status GetNextSample(std::unique_ptr *out_buffer) = 0; +// This function only called by python layer. Not needed by Android. +#ifdef ENABLE_PYTHON // return all ids in one epoch as a numpy array, then call reset Status GetAllIdsThenReset(py::array *data); +#endif // for next epoch of sampleIds // @return - The error code return @@ -155,5 +158,4 @@ class Sampler { }; } // namespace dataset } // namespace mindspore - #endif // DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SAMPLER_H_ diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph.cc b/mindspore/ccsrc/dataset/engine/gnn/graph.cc index b3a8aed8f58..8603fe6c9fb 100644 --- a/mindspore/ccsrc/dataset/engine/gnn/graph.cc +++ b/mindspore/ccsrc/dataset/engine/gnn/graph.cc @@ -387,6 +387,7 @@ Status Graph::GetMetaInfo(MetaInfo *meta_info) { return Status::OK(); } +#ifdef ENABLE_PYTHON Status Graph::GraphInfo(py::dict *out) { MetaInfo meta_info; RETURN_IF_NOT_OK(GetMetaInfo(&meta_info)); @@ -398,6 +399,7 @@ Status Graph::GraphInfo(py::dict *out) { (*out)["edge_feature_type"] = py::cast(meta_info.edge_feature_type); return Status::OK(); } +#endif Status Graph::LoadNodeAndEdge() { GraphLoader gl(dataset_file_, num_workers_); diff --git a/mindspore/ccsrc/dataset/engine/gnn/graph.h b/mindspore/ccsrc/dataset/engine/gnn/graph.h index 68bdfcc9dc9..40b6d0b6d49 100644 --- a/mindspore/ccsrc/dataset/engine/gnn/graph.h +++ b/mindspore/ccsrc/dataset/engine/gnn/graph.h @@ -140,8 +140,10 @@ class Graph { // @return Status - The error code return Status GetMetaInfo(MetaInfo *meta_info); +#ifdef ENABLE_PYTHON // Return meta information to python layer Status GraphInfo(py::dict *out); +#endif Status Init(); diff --git a/mindspore/ccsrc/dataset/engine/opt/pass.cc b/mindspore/ccsrc/dataset/engine/opt/pass.cc index 27769f056b8..aa33e59b8ff 100644 --- a/mindspore/ccsrc/dataset/engine/opt/pass.cc +++ b/mindspore/ccsrc/dataset/engine/opt/pass.cc @@ -21,13 +21,15 @@ #include "dataset/engine/datasetops/map_op.h" #include "dataset/engine/datasetops/project_op.h" #include "dataset/engine/datasetops/rename_op.h" -#include "dataset/engine/datasetops/filter_op.h" #include "dataset/engine/datasetops/repeat_op.h" #include "dataset/engine/datasetops/skip_op.h" #include "dataset/engine/datasetops/shuffle_op.h" -#include "dataset/engine/datasetops/source/generator_op.h" #include "dataset/engine/datasetops/source/mindrecord_op.h" #include "dataset/engine/datasetops/source/tf_reader_op.h" +#ifdef ENABLE_PYTHON +#include "dataset/engine/datasetops/filter_op.h" +#include "dataset/engine/datasetops/source/generator_op.h" +#endif #include "dataset/engine/datasetops/source/image_folder_op.h" #include "dataset/engine/datasetops/take_op.h" #include "dataset/engine/datasetops/zip_op.h" @@ -111,11 +113,6 @@ Status NodePass::RunOnNode(std::shared_ptr node, bool *modified) { return RunOnNode(std::static_pointer_cast(node), modified); } -Status NodePass::RunOnNode(std::shared_ptr node, bool *modified) { - // Fallback to base class visitor by default - return RunOnNode(std::static_pointer_cast(node), modified); -} - Status NodePass::RunOnNode(std::shared_ptr node, bool *modified) { // Fallback to base class visitor by default return RunOnNode(std::static_pointer_cast(node), modified); @@ -126,11 +123,6 @@ Status NodePass::RunOnNode(std::shared_ptr node, bool *modified) { return RunOnNode(std::static_pointer_cast(node), modified); } -Status NodePass::RunOnNode(std::shared_ptr node, bool *modified) { - // Fallback to base class visitor by default - return RunOnNode(std::static_pointer_cast(node), modified); -} - Status NodePass::RunOnNode(std::shared_ptr node, bool *modified) { // Fallback to base class visitor by default return RunOnNode(std::static_pointer_cast(node), modified); @@ -141,6 +133,18 @@ Status NodePass::RunOnNode(std::shared_ptr node, bool *modified) { return RunOnNode(std::static_pointer_cast(node), modified); } +#ifdef ENABLE_PYTHON +Status NodePass::RunOnNode(std::shared_ptr node, bool *modified) { + // Fallback to base class visitor by default + return RunOnNode(std::static_pointer_cast(node), modified); +} + +Status NodePass::RunOnNode(std::shared_ptr node, bool *modified) { + // Fallback to base class visitor by default + return RunOnNode(std::static_pointer_cast(node), modified); +} +#endif + Status NodePass::RunOnNode(std::shared_ptr node, bool *modified) { // Fallback to base class visitor by default return RunOnNode(std::static_pointer_cast(node), modified); diff --git a/mindspore/ccsrc/dataset/engine/opt/pass.h b/mindspore/ccsrc/dataset/engine/opt/pass.h index 129c2fab37c..dd9b65b283b 100644 --- a/mindspore/ccsrc/dataset/engine/opt/pass.h +++ b/mindspore/ccsrc/dataset/engine/opt/pass.h @@ -33,18 +33,20 @@ class ProjectOp; class RenameOp; -class FilterOp; - class SkipOp; class ShuffleOp; -class GeneratorOp; - class MindRecordOp; class TFReaderOp; +#ifdef ENABLE_PYTHON +class FilterOp; + +class GeneratorOp; +#endif + class TakeOp; class ZipOp; @@ -122,18 +124,20 @@ class NodePass : public Pass { virtual Status RunOnNode(std::shared_ptr node, bool *modified); - virtual Status RunOnNode(std::shared_ptr node, bool *modified); - virtual Status RunOnNode(std::shared_ptr node, bool *modified); virtual Status RunOnNode(std::shared_ptr node, bool *modified); - virtual Status RunOnNode(std::shared_ptr node, bool *modified); - virtual Status RunOnNode(std::shared_ptr node, bool *modified); virtual Status RunOnNode(std::shared_ptr node, bool *modified); +#ifdef ENABLE_PYTHON + virtual Status RunOnNode(std::shared_ptr node, bool *modified); + + virtual Status RunOnNode(std::shared_ptr node, bool *modified); +#endif + virtual Status RunOnNode(std::shared_ptr node, bool *modified); virtual Status RunOnNode(std::shared_ptr node, bool *modified); diff --git a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc b/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc index 852bc018b20..305c3ce121c 100644 --- a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc +++ b/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.cc @@ -50,12 +50,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr node, bool *modified) { return Status::OK(); } -Status PrinterPass::RunOnNode(std::shared_ptr node, bool *modified) { - *modified = false; - std::cout << "Visiting FilterOp" << '\n'; - return Status::OK(); -} - Status PrinterPass::RunOnNode(std::shared_ptr node, bool *modified) { *modified = false; std::cout << "Visiting SkipOp" << '\n'; @@ -67,11 +61,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr node, bool *modified) { return Status::OK(); } -Status PrinterPass::RunOnNode(std::shared_ptr node, bool *modified) { - *modified = false; - std::cout << "Visiting GeneratorOp" << '\n'; - return Status::OK(); -} Status PrinterPass::RunOnNode(std::shared_ptr node, bool *modified) { *modified = false; std::cout << "Visiting MindRecordOp" << '\n'; @@ -84,6 +73,20 @@ Status PrinterPass::RunOnNode(std::shared_ptr node, bool *modified) return Status::OK(); } +#ifdef ENABLE_PYTHON +Status PrinterPass::RunOnNode(std::shared_ptr node, bool *modified) { + *modified = false; + std::cout << "Visiting FilterOp" << '\n'; + return Status::OK(); +} + +Status PrinterPass::RunOnNode(std::shared_ptr node, bool *modified) { + *modified = false; + std::cout << "Visiting GeneratorOp" << '\n'; + return Status::OK(); +} +#endif + Status PrinterPass::RunOnNode(std::shared_ptr node, bool *modified) { *modified = false; std::cout << "Visiting TakeOp" << '\n'; diff --git a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h b/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h index fa04a88277e..2552476ebd2 100644 --- a/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h +++ b/mindspore/ccsrc/dataset/engine/opt/util/printer_pass.h @@ -35,18 +35,20 @@ class PrinterPass : public NodePass { Status RunOnNode(std::shared_ptr node, bool *modified) override; - Status RunOnNode(std::shared_ptr node, bool *modified) override; - Status RunOnNode(std::shared_ptr node, bool *modified) override; Status RunOnNode(std::shared_ptr node, bool *modified) override; - Status RunOnNode(std::shared_ptr node, bool *modified) override; - Status RunOnNode(std::shared_ptr node, bool *modified) override; Status RunOnNode(std::shared_ptr node, bool *modified) override; +#ifdef ENABLE_PYTHON + Status RunOnNode(std::shared_ptr node, bool *modified) override; + + Status RunOnNode(std::shared_ptr node, bool *modified) override; +#endif + Status RunOnNode(std::shared_ptr node, bool *modified) override; Status RunOnNode(std::shared_ptr node, bool *modified) override; diff --git a/mindspore/ccsrc/dataset/include/dataset/core/constants.h b/mindspore/ccsrc/dataset/include/dataset/core/constants.h new file mode 120000 index 00000000000..22fe6d07e1e --- /dev/null +++ b/mindspore/ccsrc/dataset/include/dataset/core/constants.h @@ -0,0 +1 @@ +../../../core/constants.h \ No newline at end of file diff --git a/mindspore/ccsrc/dataset/include/dataset/core/data_type.h b/mindspore/ccsrc/dataset/include/dataset/core/data_type.h new file mode 120000 index 00000000000..37a0e1b686e --- /dev/null +++ b/mindspore/ccsrc/dataset/include/dataset/core/data_type.h @@ -0,0 +1 @@ +../../../core/data_type.h \ No newline at end of file diff --git a/mindspore/ccsrc/dataset/include/dataset/core/tensor_shape.h b/mindspore/ccsrc/dataset/include/dataset/core/tensor_shape.h new file mode 120000 index 00000000000..1fb7a24d912 --- /dev/null +++ b/mindspore/ccsrc/dataset/include/dataset/core/tensor_shape.h @@ -0,0 +1 @@ +../../../core/tensor_shape.h \ No newline at end of file diff --git a/mindspore/ccsrc/dataset/include/dataset/util/status.h b/mindspore/ccsrc/dataset/include/dataset/util/status.h new file mode 120000 index 00000000000..b06279c05b5 --- /dev/null +++ b/mindspore/ccsrc/dataset/include/dataset/util/status.h @@ -0,0 +1 @@ +../../../util/status.h \ No newline at end of file diff --git a/mindspore/ccsrc/dataset/include/datasets.h b/mindspore/ccsrc/dataset/include/datasets.h new file mode 100644 index 00000000000..586fff21079 --- /dev/null +++ b/mindspore/ccsrc/dataset/include/datasets.h @@ -0,0 +1,357 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_INCLUDE_DATASETS_H_ +#define DATASET_INCLUDE_DATASETS_H_ + +#include +#include +#include +#include +#include +#include +#include "dataset/include/tensor.h" +#include "dataset/include/iterator.h" +#include "dataset/include/samplers.h" + +namespace mindspore { +namespace dataset { + +// Forward declare +class DatasetOp; +class DataSchema; +class Tensor; +class TensorShape; + +namespace api { + +class TensorOperation; +class SamplerObj; +class ImageFolderDataset; +class MnistDataset; +class BatchDataset; +class RepeatDataset; +class MapDataset; +class ShuffleDataset; +class Cifar10Dataset; +class ProjectDataset; + +/// \brief Function to create an ImageFolderDataset +/// \notes A source dataset that reads images from a tree of directories +/// All images within one folder have the same label +/// The generated dataset has two columns ['image', 'label'] +/// \param[in] dataset_dir Path to the root directory that contains the dataset +/// \param[in] decode A flag to decode in ImageFolder +/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, +/// A `RandomSampler` will be used to randomly iterate the entire dataset +/// \param[in] extensions File extensions to be read +/// \param[in] class_indexing a class name to label map +/// \return Shared pointer to the current ImageFolderDataset +std::shared_ptr ImageFolder(std::string dataset_dir, bool decode = false, + std::shared_ptr sampler = nullptr, + std::set extensions = {}, + std::map class_indexing = {}); + +/// \brief Function to create a MnistDataset +/// \notes The generated dataset has two columns ['image', 'label'] +/// \param[in] dataset_dir Path to the root directory that contains the dataset +/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, +/// A `RandomSampler` will be used to randomly iterate the entire dataset +/// \return Shared pointer to the current MnistDataset +std::shared_ptr Mnist(std::string dataset_dir, std::shared_ptr sampler = nullptr); + +/// \brief Function to create a Cifar10 Dataset +/// \notes The generated dataset has two columns ['image', 'label'] +/// \param[in] dataset_dir Path to the root directory that contains the dataset +/// \param[in] num_samples The number of images to be included in the dataset +/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` +/// will be used to randomly iterate the entire dataset +/// \return Shared pointer to the current Dataset +std::shared_ptr Cifar10(const std::string &dataset_dir, int32_t num_samples, + std::shared_ptr sampler); + +/// \class Dataset datasets.h +/// \brief A base class to represent a dataset in the data pipeline. +class Dataset : public std::enable_shared_from_this { + public: + friend class Iterator; + + /// \brief Constructor + Dataset(); + + /// \brief Destructor + ~Dataset() = default; + + /// \brief Pure virtual function to convert a Dataset class into a runtime dataset object + /// \return shared pointer to the list of newly created DatasetOps + virtual std::shared_ptr>> Build() = 0; + + /// \brief Pure virtual function for derived class to implement parameters validation + /// \return bool True if all the params are valid + virtual bool ValidateParams() = 0; + + /// \brief Setter function for runtime number of workers + /// \param[in] num_workers The number of threads in this operator + /// \return Shared pointer to the original object + std::shared_ptr SetNumWorkers(int32_t num_workers) { + num_workers_ = num_workers; + return shared_from_this(); + } + + /// \brief Function to create an Iterator over the Dataset pipeline + /// \return Shared pointer to the Iterator + std::shared_ptr CreateIterator(); + + /// \brief Function to create a BatchDataset + /// \notes Combines batch_size number of consecutive rows into batches + /// \param[in] batch_size Path to the root directory that contains the dataset + /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete + /// batch. If true, and if there are less than batch_size rows + /// available to make the last batch, then those rows will + /// be dropped and not propagated to the next node + /// \return Shared pointer to the current BatchDataset + std::shared_ptr Batch(int32_t batch_size, bool drop_remainder = false); + + /// \brief Function to create a RepeatDataset + /// \notes Repeats this dataset count times. Repeat indefinitely if count is -1 + /// \param[in] count Number of times the dataset should be repeated + /// \return Shared pointer to the current Dataset + /// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset` + /// due to a limitation in the current implementation + std::shared_ptr Repeat(int32_t count = -1); + + /// \brief Function to create a MapDataset + /// \notes Applies each operation in operations to this dataset + /// \param[in] operations Vector of operations to be applied on the dataset. Operations are + /// applied in the order they appear in this list + /// \param[in] input_columns Vector of the names of the columns that will be passed to the first + /// operation as input. The size of this list must match the number of + /// input columns expected by the first operator. The default input_columns + /// is the first column + /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation + /// This parameter is mandatory if len(input_columns) != len(output_columns) + /// The size of this list must match the number of output columns of the + /// last operation. The default output_columns will have the same + /// name as the input columns, i.e., the columns will be replaced + /// \param[in] project_columns A list of column names to project + /// \return Shared pointer to the current MapDataset + std::shared_ptr Map(std::vector> operations, + std::vector input_columns = {}, + std::vector output_columns = {}, + const std::vector &project_columns = {}); + + /// \brief Function to create a Shuffle Dataset + /// \notes Randomly shuffles the rows of this dataset + /// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling + /// \return Shared pointer to the current ShuffleDataset + std::shared_ptr Shuffle(int32_t shuffle_size); + + /// \brief Function to create a Project Dataset + /// \notes Applies project to the dataset + /// \param[in] columns The name of columns to project + /// \return Shared pointer to the current Dataset + std::shared_ptr Project(const std::vector &columns); + + protected: + std::vector> children; + std::shared_ptr parent; + + int32_t num_workers_; + int32_t rows_per_buffer_; + int32_t connector_que_size_; +}; + +/* ####################################### Derived Dataset classes ################################# */ + +/// \class ImageFolderDataset +/// \brief A Dataset derived class to represent ImageFolder dataset +class ImageFolderDataset : public Dataset { + public: + /// \brief Constructor + ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr sampler, bool recursive, + std::set extensions, std::map class_indexing); + + /// \brief Destructor + ~ImageFolderDataset() = default; + + /// \brief a base class override function to create the required runtime dataset op objects for this class + /// \return shared pointer to the list of newly created DatasetOps + std::shared_ptr>> Build() override; + + /// \brief Parameters validation + /// \return bool true if all the params are valid + bool ValidateParams() override; + + private: + std::string dataset_dir_; + bool decode_; + bool recursive_; + std::shared_ptr sampler_; + std::map class_indexing_; + std::set exts_; +}; + +class MnistDataset : public Dataset { + public: + /// \brief Constructor + MnistDataset(std::string dataset_dir, std::shared_ptr sampler); + + /// \brief Destructor + ~MnistDataset() = default; + + /// \brief a base class override function to create the required runtime dataset op objects for this class + /// \return shared pointer to the list of newly created DatasetOps + std::shared_ptr>> Build() override; + + /// \brief Parameters validation + /// \return bool true if all the params are valid + bool ValidateParams() override; + + private: + std::string dataset_dir_; + std::shared_ptr sampler_; +}; + +class BatchDataset : public Dataset { + public: + /// \brief Constructor + BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector cols_to_map, + std::map>> pad_map); + + /// \brief Destructor + ~BatchDataset() = default; + + /// \brief a base class override function to create the required runtime dataset op objects for this class + /// \return shared pointer to the list of newly created DatasetOps + std::shared_ptr>> Build() override; + + /// \brief Parameters validation + /// \return bool true if all the params are valid + bool ValidateParams() override; + + private: + int32_t batch_size_; + bool drop_remainder_; + bool pad_; + std::vector cols_to_map_; + std::map>> pad_map_; +}; + +class RepeatDataset : public Dataset { + public: + /// \brief Constructor + explicit RepeatDataset(uint32_t count); + + /// \brief Destructor + ~RepeatDataset() = default; + + /// \brief a base class override function to create the required runtime dataset op objects for this class + /// \return shared pointer to the list of newly created DatasetOps + std::shared_ptr>> Build() override; + + /// \brief Parameters validation + /// \return bool true if all the params are valid + bool ValidateParams() override; + + private: + uint32_t repeat_count_; +}; + +class ShuffleDataset : public Dataset { + public: + ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch); + + ~ShuffleDataset() = default; + + std::shared_ptr>> Build() override; + + bool ValidateParams() override; + + private: + int32_t shuffle_size_; + uint32_t shuffle_seed_; + bool reset_every_epoch_; +}; + +class MapDataset : public Dataset { + public: + /// \brief Constructor + MapDataset(std::vector> operations, std::vector input_columns = {}, + std::vector output_columns = {}, const std::vector &columns = {}); + + /// \brief Destructor + ~MapDataset() = default; + + /// \brief a base class override function to create the required runtime dataset op objects for this class + /// \return shared pointer to the list of newly created DatasetOps + std::shared_ptr>> Build() override; + + /// \brief Parameters validation + /// \return bool true if all the params are valid + bool ValidateParams() override; + + private: + std::vector> operations_; + std::vector input_columns_; + std::vector output_columns_; + std::vector project_columns_; +}; + +class Cifar10Dataset : public Dataset { + public: + /// \brief Constructor + Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr sampler); + + /// \brief Destructor + ~Cifar10Dataset() = default; + + /// \brief a base class override function to create the required runtime dataset op objects for this class + /// \return shared pointer to the list of newly created DatasetOps + std::shared_ptr>> Build() override; + + /// \brief Parameters validation + /// \return bool true if all the params are valid + bool ValidateParams() override; + + private: + std::string dataset_dir_; + int32_t num_samples_; + std::shared_ptr sampler_; +}; + +class ProjectDataset : public Dataset { + public: + /// \brief Constructor + explicit ProjectDataset(const std::vector &columns); + + /// \brief Destructor + ~ProjectDataset() = default; + + /// \brief a base class override function to create the required runtime dataset op objects for this class + /// \return shared pointer to the list of newly created DatasetOps + std::shared_ptr>> Build() override; + + /// \brief Parameters validation + /// \return bool true if all the params are valid + bool ValidateParams() override; + + private: + std::vector columns_; +}; +} // namespace api +} // namespace dataset +} // namespace mindspore +#endif // DATASET_INCLUDE_DATASETS_H_ diff --git a/mindspore/ccsrc/dataset/include/iterator.h b/mindspore/ccsrc/dataset/include/iterator.h new file mode 100644 index 00000000000..1c78031771e --- /dev/null +++ b/mindspore/ccsrc/dataset/include/iterator.h @@ -0,0 +1,115 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_INCLUDE_ITERATOR_H_ +#define DATASET_INCLUDE_ITERATOR_H_ + +#include +#include +#include +#include +#include "dataset/include/status.h" + +namespace mindspore { +namespace dataset { + +// Forward declare +class ExecutionTree; +class DatasetIterator; +class DatasetOp; +class Tensor; + +namespace api { + +class Dataset; + +using TensorMap = std::unordered_map>; + +// Abstract class for iterating over the dataset. +class Iterator { + public: + /// \brief Constructor + Iterator() = default; + + /// \brief Destructor + ~Iterator() = default; + + /// \brief Method for building and launching the pipeline. + /// \param[in] ops - a vector of DatasetOp in the data pipeline. + /// \return - a Status error code, returns OK if no error encountered. + Status BuildAndLaunchTree(std::shared_ptr ds); + + /// \brief Function to get the next row from the data pipeline. + /// \param[out] row - the output tensor row. + void GetNextRow(TensorMap *row); + + /// \brief Function to shut down the data pipeline. + void Stop(); + + class _Iterator { + public: + explicit _Iterator(Iterator *lt) : lt_{lt}, cur_row_{nullptr} { + if (lt_) { + cur_row_ = new TensorMap(); + lt_->GetNextRow(cur_row_); + } + } + + // Destructor + ~_Iterator() { + if (cur_row_) { + delete cur_row_; + } + } + + _Iterator &operator++() { + if (lt_) { + ++ind_; + lt_->GetNextRow(cur_row_); + } + if (cur_row_ && cur_row_->size() == 0) { + delete cur_row_; + cur_row_ = nullptr; + } + return *this; + } // prefix ++ overload + TensorMap &operator*() { return *cur_row_; } // dereference operator + TensorMap *operator->() { return cur_row_; } + + bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; } + + private: + int ind_; // the cur node our Iterator points to + Iterator *lt_; + TensorMap *cur_row_; + }; + + _Iterator begin() { return _Iterator(this); } + + _Iterator end() { return _Iterator(nullptr); } + + private: + // Runtime tree. + // Use shared_ptr instead of unique_ptr because the DatasetIterator constructor takes in a shared_ptr type. + std::shared_ptr tree_; + + // Runtime iterator + std::unique_ptr iterator_; +}; +} // namespace api +} // namespace dataset +} // namespace mindspore +#endif // DATASET_INCLUDE_ITERATOR_H_ diff --git a/mindspore/ccsrc/dataset/include/samplers.h b/mindspore/ccsrc/dataset/include/samplers.h new file mode 100644 index 00000000000..3d57e67059c --- /dev/null +++ b/mindspore/ccsrc/dataset/include/samplers.h @@ -0,0 +1,199 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_API_SAMPLERS_H_ +#define DATASET_API_SAMPLERS_H_ + +#include +#include + +namespace mindspore { +namespace dataset { + +// Internal Sampler class forward declaration +class Sampler; + +namespace api { + +class SamplerObj : public std::enable_shared_from_this { + public: + SamplerObj(); + + ~SamplerObj() = default; + + virtual std::shared_ptr Build() = 0; + virtual bool ValidateParams() = 0; +}; + +class DistributedSamplerObj; +class PKSamplerObj; +class RandomSamplerObj; +class SequentialSamplerObj; +class SubsetRandomSamplerObj; +class WeightedRandomSamplerObj; + +/// Function to create a Distributed Sampler. +/// \notes A Sampler that access a shard of the dataset. +/// \param[in] num_shards - Number of shards to divide the dataset into. +/// \param[in] shard_id - Shard ID of the current shard within num_shards. +/// \param[in] shuffle - If true, the indices are shuffled. +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \param[in] seed - The seed in use when shuffle is true. +/// \return Shared pointer to the current Sampler. +std::shared_ptr DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true, + int64_t num_samples = 0, uint32_t seed = 1); + +/// Function to create a PK Sampler. +/// \notes Samples K elements for each P class in the dataset. +/// This will sample all classes. +/// \param[in] num_val - Number of elements to sample for each class. +/// \param[in] shuffle - If true, the class IDs are shuffled. +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \return Shared pointer to the current Sampler. +std::shared_ptr PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0); + +/// Function to create a Random Sampler. +/// \notes Samples the elements randomly. +/// \param[in] replacement - If True, put the sample ID back for the next draw. +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \return Shared pointer to the current Sampler. +std::shared_ptr RandomSampler(bool replacement = false, int64_t num_samples = 0); + +/// Function to create a Sequential Sampler. +/// \notes Samples the dataset elements sequentially, same as not having a sampler. +/// \param[in] start_index - Index to start sampling at (dafault to start at first id). +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \return Shared pointer to the current Sampler. +std::shared_ptr SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0); + +/// Function to create a Subset Random Sampler. +/// \notes Samples the elements randomly from a sequence of indices. +/// \param[in] indices - A vector sequence of indices. +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \return Shared pointer to the current Sampler. +std::shared_ptr SubsetRandomSampler(const std::vector &indices, + int64_t num_samples = 0); + +/// Function to create a Weighted Random Sampler. +/// \notes Samples the elements from [0, len(weights) - 1] randomly with the given +/// weights (probabilities). +/// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1. +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \param[in] replacement - If True, put the sample ID back for the next draw. +/// \return Shared pointer to the current Sampler. +std::shared_ptr WeightedRandomSampler(const std::vector &weights, + int64_t num_samples = 0, bool replacement = true); + +/* ####################################### Derived Sampler classes ################################# */ +class DistributedSamplerObj : public SamplerObj { + public: + DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, uint32_t seed); + + ~DistributedSamplerObj() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + int64_t num_shards_; + int64_t shard_id_; + bool shuffle_; + int64_t num_samples_; + uint32_t seed_; +}; + +class PKSamplerObj : public SamplerObj { + public: + PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples); + + ~PKSamplerObj() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + int64_t num_val_; + bool shuffle_; + int64_t num_samples_; +}; + +class RandomSamplerObj : public SamplerObj { + public: + RandomSamplerObj(bool replacement, int64_t num_samples); + + ~RandomSamplerObj() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + bool replacement_; + int64_t num_samples_; +}; + +class SequentialSamplerObj : public SamplerObj { + public: + SequentialSamplerObj(int64_t start_index, int64_t num_samples); + + ~SequentialSamplerObj() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + int64_t start_index_; + int64_t num_samples_; +}; + +class SubsetRandomSamplerObj : public SamplerObj { + public: + SubsetRandomSamplerObj(const std::vector &indices, int64_t num_samples); + + ~SubsetRandomSamplerObj() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + const std::vector &indices_; + int64_t num_samples_; +}; + +class WeightedRandomSamplerObj : public SamplerObj { + public: + explicit WeightedRandomSamplerObj(const std::vector &weights, int64_t num_samples = 0, + bool replacement = true); + + ~WeightedRandomSamplerObj() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + const std::vector &weights_; + int64_t num_samples_; + bool replacement_; +}; +} // namespace api +} // namespace dataset +} // namespace mindspore +#endif // DATASET_API_SAMPLERS_H_ diff --git a/mindspore/ccsrc/dataset/include/status.h b/mindspore/ccsrc/dataset/include/status.h new file mode 120000 index 00000000000..bba92b63ad9 --- /dev/null +++ b/mindspore/ccsrc/dataset/include/status.h @@ -0,0 +1 @@ +../util/status.h \ No newline at end of file diff --git a/mindspore/ccsrc/dataset/include/tensor.h b/mindspore/ccsrc/dataset/include/tensor.h new file mode 120000 index 00000000000..34b5e020a9f --- /dev/null +++ b/mindspore/ccsrc/dataset/include/tensor.h @@ -0,0 +1 @@ +../core/tensor.h \ No newline at end of file diff --git a/mindspore/ccsrc/dataset/include/transforms.h b/mindspore/ccsrc/dataset/include/transforms.h new file mode 100644 index 00000000000..c3a1540ae87 --- /dev/null +++ b/mindspore/ccsrc/dataset/include/transforms.h @@ -0,0 +1,380 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_API_TRANSFORMS_H_ +#define DATASET_API_TRANSFORMS_H_ + +#include +#include +#include "dataset/core/constants.h" + +namespace mindspore { +namespace dataset { + +class TensorOp; + +namespace api { +// Abstract class to represent a dataset in the data pipeline. +class TensorOperation : public std::enable_shared_from_this { + public: + /// \brief Constructor + TensorOperation(); + + /// \brief Destructor + ~TensorOperation() = default; + + /// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object. + /// \return shared pointer to the newly created TensorOp. + virtual std::shared_ptr Build() = 0; + + virtual bool ValidateParams() = 0; +}; + +// Transform operations for performing computer vision. +namespace vision { + +class NormalizeOperation; +class DecodeOperation; +class ResizeOperation; +class RandomCropOperation; +class CenterCropOperation; +class UniformAugOperation; +class RandomHorizontalFlipOperation; +class RandomVerticalFlipOperation; +class RandomRotationOperation; +class PadOperation; +class CutOutOperation; +class RandomColorAdjustOperation; + +/// \brief Function to create a Normalize TensorOperation. +/// \notes Normalize the input image with respect to mean and standard deviation. +/// \param[in] mean - a vector of mean values for each channel, w.r.t channel order. +/// \param[in] std - a vector of standard deviations for each channel, w.r.t. channel order. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr Normalize(std::vector mean, std::vector std); + +/// \brief Function to create a Decode TensorOperation. +/// \notes Decode the input image in RGB mode. +/// \param[in] rgb - a boolean of whether to decode in RGB mode or not. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr Decode(bool rgb = true); + +/// \brief Function to create a Resize TensorOperation. +/// \notes Resize the input image to the given size.. +/// \param[in] size - a vector representing the output size of the resized image. +/// If size is a single value, the image will be resized to this value with +/// the same image aspect ratio. If size has 2 values, it should be (height, width). +/// \param[in] interpolation An enum for the mode of interpolation +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr Resize(std::vector size, + InterpolationMode interpolation = InterpolationMode::kLinear); + +/// \brief Function to create a RandomCrop TensorOperation. +/// \notes Crop the input image at a random location. +/// \param[in] size - a vector representing the output size of the cropped image. +/// If size is a single value, a square crop of size (size, size) is returned. +/// If size has 2 values, it should be (height, width). +/// \param[in] padding - a vector with the value of pixels to pad the image. If 4 values are provided, +/// it pads the left, top, right and bottom respectively. +/// \param[in] pad_if_needed - a boolean whether to pad the image if either side is smaller than +/// the given output size. +/// \param[in] fill_value - a vector representing the pixel intensity of the borders, it is used to +/// fill R, G, B channels respectively. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr RandomCrop(std::vector size, std::vector padding = {0, 0, 0, 0}, + bool pad_if_needed = false, + std::vector fill_value = {0, 0, 0}); + +/// \brief Function to create a CenterCrop TensorOperation. +/// \notes Crops the input image at the center to the given size. +/// \param[in] size - a vector representing the output size of the cropped image. +/// If size is a single value, a square crop of size (size, size) is returned. +/// If size has 2 values, it should be (height, width). +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr CenterCrop(std::vector size); + +/// \brief Function to create a UniformAugment TensorOperation. +/// \notes Tensor operation to perform randomly selected augmentation. +/// \param[in] operations - a vector of TensorOperation operations. +/// \param[in] num_ops - integer representing the number of OPs to be selected and applied. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr UniformAugment(std::vector> operations, + int32_t num_ops = 2); + +/// \brief Function to create a RandomHorizontalFlip TensorOperation. +/// \notes Tensor operation to perform random horizontal flip. +/// \param[in] prob - float representing the probability of flip. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr RandomHorizontalFlip(float prob = 0.5); + +/// \brief Function to create a RandomVerticalFlip TensorOperation. +/// \notes Tensor operation to perform random vertical flip. +/// \param[in] prob - float representing the probability of flip. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr RandomVerticalFlip(float prob = 0.5); + +/// \brief Function to create a RandomRotation TensorOp +/// \notes Rotates the image according to parameters +/// \param[in] degrees A float vector size 2, representing the starting and ending degree +/// \param[in] resample An enum for the mode of interpolation +/// \param[in] expand A boolean representing whether the image is expanded after rotation +/// \param[in] center A float vector size 2, representing the x and y center of rotation. +/// \param[in] fill_value A uint8_t vector size 3, representing the rgb value of the fill color +/// \return Shared pointer to the current TensorOp +std::shared_ptr RandomRotation( + std::vector degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false, + std::vector center = {-1, -1}, std::vector fill_value = {0, 0, 0}); + +/// \brief Function to create a Pad TensorOp +/// \notes Pads the image according to padding parameters +/// \param[in] padding A vector representing the number of pixels to pad the image +/// If vector has one value, it pads all sides of the image with that value +/// If vector has two values, it pads left and right with the first and +/// top and bottom with the second value +/// If vector has four values, it pads left, top, right, and bottom with +/// those values respectively +/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is +/// BorderType.kConstant. If 3 values are provided, +/// it is used to fill R, G, B channels respectively +/// \param[in] padding_mode The method of padding (default=BorderType.kConstant) +/// Can be any of +/// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric] +/// - BorderType.kConstant, means it fills the border with constant values +/// - BorderType.kEdge, means it pads with the last value on the edge +/// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge +/// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge +/// \return Shared pointer to the current TensorOp +std::shared_ptr Pad(std::vector padding, std::vector fill_value = {0}, + BorderType padding_mode = BorderType::kConstant); + +/// \brief Function to create a CutOut TensorOp +/// \notes Randomly cut (mask) out a given number of square patches from the input image +/// \param[in] length Integer representing the side length of each square patch +/// \param[in] num_patches Integer representing the number of patches to be cut out of an image +/// \return Shared pointer to the current TensorOp +std::shared_ptr CutOut(int32_t length, int32_t num_patches = 1); + +/// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image +/// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values +/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} +/// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values +/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} +/// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values +/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} +/// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values +/// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5 +/// Default value is {0, 0} +/// \return Shared pointer to the current TensorOp +std::shared_ptr RandomColorAdjust(std::vector brightness = {1.0, 1.0}, + std::vector contrast = {1.0, 1.0}, + std::vector saturation = {1.0, 1.0}, + std::vector hue = {0.0, 0.0}); + +/* ####################################### Derived TensorOperation classes ################################# */ + +class NormalizeOperation : public TensorOperation { + public: + NormalizeOperation(std::vector mean, std::vector std); + + ~NormalizeOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + std::vector mean_; + std::vector std_; +}; + +class DecodeOperation : public TensorOperation { + public: + explicit DecodeOperation(bool rgb = true); + + ~DecodeOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + bool rgb_; +}; + +class ResizeOperation : public TensorOperation { + public: + explicit ResizeOperation(std::vector size, + InterpolationMode interpolation_mode = InterpolationMode::kLinear); + + ~ResizeOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + std::vector size_; + InterpolationMode interpolation_; +}; + +class RandomCropOperation : public TensorOperation { + public: + RandomCropOperation(std::vector size, std::vector padding = {0, 0, 0, 0}, + bool pad_if_needed = false, std::vector fill_value = {0, 0, 0}); + + ~RandomCropOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + std::vector size_; + std::vector padding_; + bool pad_if_needed_; + std::vector fill_value_; +}; + +class CenterCropOperation : public TensorOperation { + public: + explicit CenterCropOperation(std::vector size); + + ~CenterCropOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + std::vector size_; +}; + +class UniformAugOperation : public TensorOperation { + public: + explicit UniformAugOperation(std::vector> operations, int32_t num_ops = 2); + + ~UniformAugOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + std::vector> operations_; + int32_t num_ops_; +}; + +class RandomHorizontalFlipOperation : public TensorOperation { + public: + explicit RandomHorizontalFlipOperation(float probability = 0.5); + + ~RandomHorizontalFlipOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + float probability_; +}; + +class RandomVerticalFlipOperation : public TensorOperation { + public: + explicit RandomVerticalFlipOperation(float probability = 0.5); + + ~RandomVerticalFlipOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + float probability_; +}; + +class RandomRotationOperation : public TensorOperation { + public: + RandomRotationOperation(std::vector degrees, InterpolationMode interpolation_mode, bool expand, + std::vector center, std::vector fill_value); + + ~RandomRotationOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + std::vector degrees_; + InterpolationMode interpolation_mode_; + std::vector center_; + bool expand_; + std::vector fill_value_; +}; + +class PadOperation : public TensorOperation { + public: + PadOperation(std::vector padding, std::vector fill_value = {0}, + BorderType padding_mode = BorderType::kConstant); + + ~PadOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + std::vector padding_; + std::vector fill_value_; + BorderType padding_mode_; +}; + +class CutOutOperation : public TensorOperation { + public: + explicit CutOutOperation(int32_t length, int32_t num_patches = 1); + + ~CutOutOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + int32_t length_; + int32_t num_patches_; +}; + +class RandomColorAdjustOperation : public TensorOperation { + public: + RandomColorAdjustOperation(std::vector brightness = {1.0, 1.0}, std::vector contrast = {1.0, 1.0}, + std::vector saturation = {1.0, 1.0}, std::vector hue = {0.0, 0.0}); + + ~RandomColorAdjustOperation() = default; + + std::shared_ptr Build() override; + + bool ValidateParams() override; + + private: + std::vector brightness_; + std::vector contrast_; + std::vector saturation_; + std::vector hue_; +}; +} // namespace vision +} // namespace api +} // namespace dataset +} // namespace mindspore +#endif // DATASET_API_TRANSFORMS_H_ diff --git a/mindspore/ccsrc/dataset/include/utils/log_adapter.h b/mindspore/ccsrc/dataset/include/utils/log_adapter.h new file mode 120000 index 00000000000..5cecc459381 --- /dev/null +++ b/mindspore/ccsrc/dataset/include/utils/log_adapter.h @@ -0,0 +1 @@ +../../../utils/log_adapter.h \ No newline at end of file diff --git a/mindspore/ccsrc/dataset/include/utils/overload.h b/mindspore/ccsrc/dataset/include/utils/overload.h new file mode 120000 index 00000000000..d163e527483 --- /dev/null +++ b/mindspore/ccsrc/dataset/include/utils/overload.h @@ -0,0 +1 @@ +../../../utils/overload.h \ No newline at end of file diff --git a/mindspore/ccsrc/dataset/kernels/CMakeLists.txt b/mindspore/ccsrc/dataset/kernels/CMakeLists.txt index 2ebdd15e3c9..8a9096ff23d 100644 --- a/mindspore/ccsrc/dataset/kernels/CMakeLists.txt +++ b/mindspore/ccsrc/dataset/kernels/CMakeLists.txt @@ -2,7 +2,13 @@ add_subdirectory(image) add_subdirectory(data) file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) -add_library(kernels OBJECT - py_func_op.cc - tensor_op.cc) -target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS}) +if (ENABLE_PYTHON) + add_library(kernels OBJECT + py_func_op.cc + tensor_op.cc) + target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS}) +else() + add_library(kernels OBJECT + tensor_op.cc) +endif() + diff --git a/mindspore/ccsrc/dataset/kernels/data/data_utils.cc b/mindspore/ccsrc/dataset/kernels/data/data_utils.cc index 8dd5a159394..91165dedf3e 100644 --- a/mindspore/ccsrc/dataset/kernels/data/data_utils.cc +++ b/mindspore/ccsrc/dataset/kernels/data/data_utils.cc @@ -23,7 +23,9 @@ #include "dataset/core/constants.h" #include "dataset/core/data_type.h" +#ifdef ENABLE_PYTHON #include "dataset/core/pybind_support.h" +#endif #include "dataset/core/tensor.h" #include "dataset/core/tensor_shape.h" #include "dataset/kernels/data/type_cast_op.h" diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc index a852a450141..27d380511c6 100644 --- a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc +++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc @@ -729,7 +729,6 @@ Status Pad(const std::shared_ptr &input, std::shared_ptr *output int num_channels = input_cv->shape()[2]; if (input_cv->Rank() == 3 && num_channels == 1 && output_cv->Rank() == 2) output_cv->ExpandDim(2); *output = std::static_pointer_cast(output_cv); - return Status::OK(); } catch (const cv::Exception &e) { RETURN_STATUS_UNEXPECTED("Unexpected error in pad"); diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.h b/mindspore/ccsrc/dataset/kernels/image/image_utils.h index 6ebd13ad551..212d81f7fc6 100644 --- a/mindspore/ccsrc/dataset/kernels/image/image_utils.h +++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.h @@ -35,10 +35,6 @@ namespace mindspore { namespace dataset { -enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 }; - -enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 }; - void JpegErrorExitCustom(j_common_ptr cinfo); struct JpegErrorManagerCustom { diff --git a/mindspore/ccsrc/dataset/kernels/image/pad_op.cc b/mindspore/ccsrc/dataset/kernels/image/pad_op.cc index b4d9c2bbf0b..baeceeed779 100644 --- a/mindspore/ccsrc/dataset/kernels/image/pad_op.cc +++ b/mindspore/ccsrc/dataset/kernels/image/pad_op.cc @@ -16,6 +16,7 @@ #include "dataset/kernels/image/pad_op.h" #include "dataset/kernels/image/image_utils.h" +#include "dataset/core/constants.h" #include "dataset/util/status.h" namespace mindspore { diff --git a/mindspore/ccsrc/dataset/kernels/image/pad_op.h b/mindspore/ccsrc/dataset/kernels/image/pad_op.h index 76d99d01626..e0725c84ca9 100644 --- a/mindspore/ccsrc/dataset/kernels/image/pad_op.h +++ b/mindspore/ccsrc/dataset/kernels/image/pad_op.h @@ -21,7 +21,7 @@ #include "dataset/core/tensor.h" #include "dataset/kernels/tensor_op.h" -#include "dataset/kernels/image/image_utils.h" +#include "dataset/core/constants.h" #include "dataset/util/status.h" namespace mindspore { diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc index 7c0fe82fc75..cf8a4640ff9 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc +++ b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.cc @@ -18,7 +18,6 @@ #include "dataset/kernels/image/image_utils.h" #include "dataset/util/status.h" #include "dataset/core/cv_tensor.h" -#include "dataset/core/pybind_support.h" namespace mindspore { namespace dataset { diff --git a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h index 06c96e11ae9..f208aabd025 100644 --- a/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h +++ b/mindspore/ccsrc/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h @@ -16,8 +16,6 @@ #ifndef DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ #define DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_ -#include -#include #include #include #include @@ -26,8 +24,6 @@ #include "dataset/kernels/tensor_op.h" #include "dataset/util/random.h" #include "dataset/util/status.h" -#include "pybind11/pybind11.h" -#include "pybind11/stl_bind.h" namespace mindspore { namespace dataset { diff --git a/mindspore/ccsrc/dataset/text/kernels/ngram_op.h b/mindspore/ccsrc/dataset/text/kernels/ngram_op.h index 3d2c547f793..7804f2f0ce7 100644 --- a/mindspore/ccsrc/dataset/text/kernels/ngram_op.h +++ b/mindspore/ccsrc/dataset/text/kernels/ngram_op.h @@ -27,7 +27,6 @@ namespace mindspore { namespace dataset { -namespace py = pybind11; class NgramOp : public TensorOp { public: diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt index e4d52f6eeec..e7c0869f1f4 100644 --- a/tests/ut/cpp/CMakeLists.txt +++ b/tests/ut/cpp/CMakeLists.txt @@ -32,7 +32,15 @@ if(ENABLE_MINDDATA) endif() # fetch ut test files if(ENABLE_MINDDATA) - file(GLOB_RECURSE UT_SRCS ./*.cc) + file(GLOB_RECURSE UT_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./*.cc) + if(NOT ENABLE_PYTHON) + set(PYTHON_RELATED_SRCS + dataset/filter_op_test.cc + dataset/voc_op_test.cc + dataset/manifest_op_test.cc + ) + list(REMOVE_ITEM UT_SRCS ${PYTHON_RELATED_SRCS}) + endif() else() file(GLOB_RECURSE TEMP_UT_SRCS ./*.cc) foreach(OBJ ${TEMP_UT_SRCS}) diff --git a/tests/ut/cpp/dataset/CMakeLists.txt b/tests/ut/cpp/dataset/CMakeLists.txt index ab4b307e450..9a2e790d2b8 100644 --- a/tests/ut/cpp/dataset/CMakeLists.txt +++ b/tests/ut/cpp/dataset/CMakeLists.txt @@ -90,6 +90,7 @@ SET(DE_UT_SRCS concatenate_op_test.cc cyclic_array_test.cc perf_data_test.cc + c_api_test.cc ) add_executable(de_ut_tests ${DE_UT_SRCS}) diff --git a/tests/ut/cpp/dataset/c_api_test.cc b/tests/ut/cpp/dataset/c_api_test.cc new file mode 100644 index 00000000000..7a3b6d552b9 --- /dev/null +++ b/tests/ut/cpp/dataset/c_api_test.cc @@ -0,0 +1,771 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include "utils/log_adapter.h" +#include "common/utils.h" +#include "common/common.h" +#include "gtest/gtest.h" +#include "securec.h" +#include "dataset/include/datasets.h" +#include "dataset/include/status.h" +#include "dataset/include/transforms.h" +#include "dataset/include/iterator.h" +#include "dataset/core/constants.h" +#include "dataset/include/samplers.h" + +using namespace mindspore::dataset::api; +using mindspore::MsLogLevel::ERROR; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::LogStream; +using mindspore::dataset::Tensor; +using mindspore::dataset::Status; +using mindspore::dataset::BorderType; + + +class MindDataTestPipeline : public UT::DatasetOpTesting { + protected: +}; + + +TEST_F(MindDataTestPipeline, TestBatchAndRepeat) { + // Create a Mnist Dataset + std::string folder_path = datasets_root_path_ + "/testMnistData/"; + std::shared_ptr ds = Mnist(folder_path, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 2; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 10); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) { + // Create a Mnist Dataset + std::string folder_path = datasets_root_path_ + "/testMnistData/"; + std::shared_ptr ds = Mnist(folder_path, RandomSampler(false, 20)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create objects for the tensor ops + std::shared_ptr resize_op = vision::Resize({30, 30}); + EXPECT_TRUE(resize_op != nullptr); + + std::shared_ptr center_crop_op = vision::CenterCrop({16, 16}); + EXPECT_TRUE(center_crop_op != nullptr); + + // Create a Map operation on ds + ds = ds->Map({resize_op, center_crop_op}); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 40); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestUniformAugWithOps) { + // Create a Mnist Dataset + std::string folder_path = datasets_root_path_ + "/testMnistData/"; + std::shared_ptr ds = Mnist(folder_path, RandomSampler(false, 20)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 1; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create objects for the tensor ops + std::shared_ptr resize_op = vision::Resize({30, 30}); + EXPECT_TRUE(resize_op != nullptr); + + std::shared_ptr random_crop_op = vision::RandomCrop({28, 28}); + EXPECT_TRUE(random_crop_op != nullptr); + + std::shared_ptr center_crop_op = vision::CenterCrop({16, 16}); + EXPECT_TRUE(center_crop_op != nullptr); + + std::shared_ptr uniform_aug_op = vision::UniformAugment({random_crop_op, center_crop_op}, 2); + EXPECT_TRUE(uniform_aug_op != nullptr); + + // Create a Map operation on ds + ds = ds->Map({resize_op, uniform_aug_op}); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 20); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestRandomFlip) { + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create objects for the tensor ops + std::shared_ptr random_vertical_flip_op = vision::RandomVerticalFlip(0.5); + EXPECT_TRUE(random_vertical_flip_op != nullptr); + + std::shared_ptr random_horizontal_flip_op = vision::RandomHorizontalFlip(0.5); + EXPECT_TRUE(random_horizontal_flip_op != nullptr); + + // Create a Map operation on ds + ds = ds->Map({random_vertical_flip_op, random_horizontal_flip_op}); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 20); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) { + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 2; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 10); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) { + std::shared_ptr sampl = DistributedSampler(2, 1); + EXPECT_NE(sampl, nullptr); + + sampl = PKSampler(3); + EXPECT_NE(sampl, nullptr); + + sampl = RandomSampler(false, 12); + EXPECT_NE(sampl, nullptr); + + sampl = SequentialSampler(0, 12); + EXPECT_NE(sampl, nullptr); + + std::vector weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1}; + sampl = WeightedRandomSampler(weights, 12); + EXPECT_NE(sampl, nullptr); + + std::vector indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23}; + sampl = SubsetRandomSampler(indices); + EXPECT_NE(sampl, nullptr); + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, false, sampl); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 2; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 12); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestPad) { + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create objects for the tensor ops + std::shared_ptr pad_op1 = vision::Pad({1, 2, 3, 4}, {0}, BorderType::kSymmetric); + EXPECT_TRUE(pad_op1 != nullptr); + + std::shared_ptr pad_op2 = vision::Pad({1}, {1, 1, 1}, BorderType::kEdge); + EXPECT_TRUE(pad_op2 != nullptr); + + std::shared_ptr pad_op3 = vision::Pad({1, 4}); + EXPECT_TRUE(pad_op3 != nullptr); + + // Create a Map operation on ds + ds = ds->Map({pad_op1, pad_op2, pad_op3}); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 20); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestCutOut) { + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create objects for the tensor ops + std::shared_ptr cut_out1 = vision::CutOut(30, 5); + EXPECT_TRUE(cut_out1!= nullptr); + + std::shared_ptr cut_out2 = vision::CutOut(30); + EXPECT_TRUE(cut_out2 != nullptr); + + // Create a Map operation on ds + ds = ds->Map({cut_out1, cut_out2}); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 20); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestNormalize) { + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create objects for the tensor ops + std::shared_ptr normalize = vision::Normalize({121.0, 115.0, 100.0}, {70.0, 68.0, 71.0}); + EXPECT_TRUE(normalize != nullptr); + + // Create a Map operation on ds + ds = ds->Map({normalize}); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 20); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestDecode) { + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, false, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create objects for the tensor ops + std::shared_ptr decode = vision::Decode(true); + EXPECT_TRUE(decode != nullptr); + + // Create a Map operation on ds + ds = ds->Map({decode}); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + EXPECT_EQ(i, 20); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestShuffleDataset) { + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Shuffle operation on ds + int32_t shuffle_size = 10; + ds = ds->Shuffle(shuffle_size); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 2; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 10); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestCifar10Dataset) { + + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, 0, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 2; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 10); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestRandomColorAdjust) { + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create objects for the tensor ops + std::shared_ptr random_color_adjust1 = vision::RandomColorAdjust({1.0}, {0.0}, {0.5}, {0.5}); + EXPECT_TRUE(random_color_adjust1 != nullptr); + + std::shared_ptr random_color_adjust2 = vision::RandomColorAdjust({1.0, 1.0}, {0.0, 0.0}, {0.5, 0.5}, + {0.5, 0.5}); + EXPECT_TRUE(random_color_adjust2 != nullptr); + + std::shared_ptr random_color_adjust3 = vision::RandomColorAdjust({0.5, 1.0}, {0.0, 0.5}, {0.25, 0.5}, + {0.25, 0.5}); + EXPECT_TRUE(random_color_adjust3 != nullptr); + + std::shared_ptr random_color_adjust4 = vision::RandomColorAdjust(); + EXPECT_TRUE(random_color_adjust4 != nullptr); + + // Create a Map operation on ds + ds = ds->Map({random_color_adjust1, random_color_adjust2, random_color_adjust3, random_color_adjust4}); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 20); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestRandomRotation) { + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create objects for the tensor ops + std::shared_ptr random_rotation_op = vision::RandomRotation({-180, 180}); + EXPECT_TRUE(random_rotation_op != nullptr); + + // Create a Map operation on ds + ds = ds->Map({random_rotation_op}); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 20); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestProjectMap) { + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_TRUE(ds != nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_TRUE(ds != nullptr); + + // Create objects for the tensor ops + std::shared_ptr random_vertical_flip_op = vision::RandomVerticalFlip(0.5); + EXPECT_TRUE(random_vertical_flip_op != nullptr); + + // Create a Map operation on ds + ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"}); + EXPECT_TRUE(ds != nullptr); + + // Create a Project operation on ds + std::vector column_project = {"label"}; + ds = ds->Project(column_project); + EXPECT_TRUE(ds != nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 1; + ds = ds->Batch(batch_size); + EXPECT_TRUE(ds != nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_TRUE(iter != nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_TRUE(i == 20); + + // Manually terminate the pipeline + iter->Stop(); +} \ No newline at end of file diff --git a/tests/ut/cpp/dataset/datatype_test.cc b/tests/ut/cpp/dataset/datatype_test.cc index a55853c4c59..8cb22102283 100644 --- a/tests/ut/cpp/dataset/datatype_test.cc +++ b/tests/ut/cpp/dataset/datatype_test.cc @@ -23,8 +23,6 @@ using namespace mindspore::dataset; -namespace py = pybind11; - class MindDataTestDatatype : public UT::Common { public: MindDataTestDatatype() = default;