From d233c541396f5b176eb20416d72997d6cc0038b6 Mon Sep 17 00:00:00 2001 From: Zirui Wu Date: Tue, 7 Jul 2020 15:58:46 -0400 Subject: [PATCH] implemented cpp random choice, apply and compos python part of random ops added random select sub policy validators added comments added, remaining issues addressed add more python test cases fix ci fix CI fix order of include files addr review cmts addr review cmts reorg file fix compile err address review cmts address review cmts --- .../ccsrc/minddata/dataset/api/de_pipeline.cc | 2 - .../minddata/dataset/api/python_bindings.cc | 83 +++++++++++++++- .../minddata/dataset/kernels/CMakeLists.txt | 7 +- .../minddata/dataset/kernels/compose_op.cc | 66 +++++++++++++ .../minddata/dataset/kernels/compose_op.h | 70 +++++++++++++ .../dataset/kernels/image/CMakeLists.txt | 1 + .../image/random_select_subpolicy_op.cc | 96 ++++++++++++++++++ .../image/random_select_subpolicy_op.h | 79 +++++++++++++++ .../dataset/kernels/random_apply_op.cc | 68 +++++++++++++ .../dataset/kernels/random_apply_op.h | 79 +++++++++++++++ .../dataset/kernels/random_choice_op.cc | 97 +++++++++++++++++++ .../dataset/kernels/random_choice_op.h | 77 +++++++++++++++ .../minddata/dataset/kernels/tensor_op.h | 4 + mindspore/dataset/core/validator_helpers.py | 8 ++ mindspore/dataset/transforms/c_transforms.py | 51 +++++++++- mindspore/dataset/transforms/validators.py | 22 ++++- .../dataset/transforms/vision/c_transforms.py | 8 +- .../dataset/transforms/vision/validators.py | 25 ++++- tests/ut/python/dataset/test_c_compose.py | 50 ++++++++++ .../ut/python/dataset/test_c_random_apply.py | 48 +++++++++ .../ut/python/dataset/test_c_random_choice.py | 48 +++++++++ .../dataset/test_random_select_subpolicy.py | 51 ++++++++++ 22 files changed, 1029 insertions(+), 11 deletions(-) create mode 100644 mindspore/ccsrc/minddata/dataset/kernels/compose_op.cc create mode 100644 mindspore/ccsrc/minddata/dataset/kernels/compose_op.h create mode 100644 mindspore/ccsrc/minddata/dataset/kernels/image/random_select_subpolicy_op.cc create mode 100644 mindspore/ccsrc/minddata/dataset/kernels/image/random_select_subpolicy_op.h create mode 100644 mindspore/ccsrc/minddata/dataset/kernels/random_apply_op.cc create mode 100644 mindspore/ccsrc/minddata/dataset/kernels/random_apply_op.h create mode 100644 mindspore/ccsrc/minddata/dataset/kernels/random_choice_op.cc create mode 100644 mindspore/ccsrc/minddata/dataset/kernels/random_choice_op.h create mode 100644 tests/ut/python/dataset/test_c_compose.py create mode 100644 tests/ut/python/dataset/test_c_random_apply.py create mode 100644 tests/ut/python/dataset/test_c_random_choice.py create mode 100644 tests/ut/python/dataset/test_random_select_subpolicy.py diff --git a/mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc b/mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc index 2a6166f868c..c780d8f645b 100644 --- a/mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc +++ b/mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc @@ -42,8 +42,6 @@ #include "minddata/dataset/util/status.h" #include "minddata/mindrecord/include/shard_category.h" #include "minddata/mindrecord/include/shard_distributed_sample.h" -#include "minddata/mindrecord/include/shard_sample.h" -#include "minddata/mindrecord/include/shard_shuffle.h" #include "pybind11/stl.h" #include "utils/log_adapter.h" diff --git a/mindspore/ccsrc/minddata/dataset/api/python_bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python_bindings.cc index 145291ec3be..173c1af2f22 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python_bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python_bindings.cc @@ -16,6 +16,7 @@ #include #include "minddata/dataset/api/de_pipeline.h" +#include "minddata/dataset/engine/cache/cache_client.h" #include "minddata/dataset/engine/datasetops/source/cifar_op.h" #include "minddata/dataset/engine/datasetops/source/clue_op.h" #include "minddata/dataset/engine/datasetops/source/coco_op.h" @@ -35,9 +36,9 @@ #include "minddata/dataset/engine/datasetops/source/text_file_op.h" #include "minddata/dataset/engine/datasetops/source/tf_reader_op.h" #include "minddata/dataset/engine/datasetops/source/voc_op.h" -#include "minddata/dataset/engine/cache/cache_client.h" #include "minddata/dataset/engine/gnn/graph.h" #include "minddata/dataset/engine/jagged_connector.h" +#include "minddata/dataset/kernels/compose_op.h" #include "minddata/dataset/kernels/data/concatenate_op.h" #include "minddata/dataset/kernels/data/duplicate_op.h" #include "minddata/dataset/kernels/data/fill_op.h" @@ -61,11 +62,12 @@ #include "minddata/dataset/kernels/image/random_crop_decode_resize_op.h" #include "minddata/dataset/kernels/image/random_crop_op.h" #include "minddata/dataset/kernels/image/random_crop_with_bbox_op.h" -#include "minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h" #include "minddata/dataset/kernels/image/random_horizontal_flip_op.h" +#include "minddata/dataset/kernels/image/random_horizontal_flip_with_bbox_op.h" #include "minddata/dataset/kernels/image/random_resize_op.h" #include "minddata/dataset/kernels/image/random_resize_with_bbox_op.h" #include "minddata/dataset/kernels/image/random_rotation_op.h" +#include "minddata/dataset/kernels/image/random_select_subpolicy_op.h" #include "minddata/dataset/kernels/image/random_vertical_flip_op.h" #include "minddata/dataset/kernels/image/random_vertical_flip_with_bbox_op.h" #include "minddata/dataset/kernels/image/rescale_op.h" @@ -74,6 +76,9 @@ #include "minddata/dataset/kernels/image/resize_with_bbox_op.h" #include "minddata/dataset/kernels/image/uniform_aug_op.h" #include "minddata/dataset/kernels/no_op.h" +#include "minddata/dataset/kernels/py_func_op.h" +#include "minddata/dataset/kernels/random_apply_op.h" +#include "minddata/dataset/kernels/random_choice_op.h" #include "minddata/dataset/text/kernels/jieba_tokenizer_op.h" #include "minddata/dataset/text/kernels/lookup_op.h" #include "minddata/dataset/text/kernels/ngram_op.h" @@ -88,6 +93,7 @@ #include "minddata/mindrecord/include/shard_sample.h" #include "minddata/mindrecord/include/shard_sequential_sample.h" #include "mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.h" + #include "pybind11/pybind11.h" #include "pybind11/stl.h" #include "pybind11/stl_bind.h" @@ -113,6 +119,24 @@ namespace dataset { if (rc.IsError()) throw std::runtime_error(rc.ToString()); \ } while (false) +Status PyListToTensorOps(const py::list &py_ops, std::vector> *ops) { + RETURN_UNEXPECTED_IF_NULL(ops); + for (auto op : py_ops) { + if (py::isinstance(op)) { + ops->emplace_back(op.cast>()); + } else if (py::isinstance(op)) { + ops->emplace_back(std::make_shared(op.cast())); + } else { + RETURN_STATUS_UNEXPECTED("element is neither a TensorOp nor a pyfunc."); + } + } + CHECK_FAIL_RETURN_UNEXPECTED(!ops->empty(), "TensorOp list is empty."); + for (auto const &op : *ops) { + RETURN_UNEXPECTED_IF_NULL(op); + } + return Status::OK(); +} + void bindDEPipeline(py::module *m) { (void)py::class_(*m, "DEPipeline") .def(py::init<>()) @@ -623,7 +647,7 @@ void bindTokenizerOps(py::module *m) { WordIdType default_id = vocab->Lookup(word); if (default_id == Vocab::kNoTokenExists) { THROW_IF_ERROR( - Status(StatusCode::kUnexpectedError, "default unknown token:" + word + " doesn't exist in vocab.")); + Status(StatusCode::kUnexpectedError, "default unknown token: " + word + " doesn't exist in vocab.")); } return std::make_shared(vocab, default_id); })); @@ -868,6 +892,58 @@ void bindGraphData(py::module *m) { }); } +void bindRandomTransformTensorOps(py::module *m) { + (void)py::class_>(*m, "ComposeOp") + .def(py::init([](const py::list &ops) { + std::vector> t_ops; + THROW_IF_ERROR(PyListToTensorOps(ops, &t_ops)); + return std::make_shared(t_ops); + })); + (void)py::class_>(*m, "RandomChoiceOp") + .def(py::init([](const py::list &ops) { + std::vector> t_ops; + THROW_IF_ERROR(PyListToTensorOps(ops, &t_ops)); + return std::make_shared(t_ops); + })); + (void)py::class_>(*m, "RandomApplyOp") + .def(py::init([](double prob, const py::list &ops) { + std::vector> t_ops; + THROW_IF_ERROR(PyListToTensorOps(ops, &t_ops)); + if (prob < 0 || prob > 1) { + THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, "prob needs to be within [0,1].")); + } + return std::make_shared(prob, t_ops); + })); + (void)py::class_>( + *m, "RandomSelectSubpolicyOp") + .def(py::init([](const py::list &py_policy) { + std::vector cpp_policy; + for (auto &py_sub : py_policy) { + cpp_policy.push_back({}); + for (auto handle : py_sub.cast()) { + py::tuple tp = handle.cast(); + if (tp.is_none() || tp.size() != 2) { + THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, "Each tuple in subpolicy should be (op, prob).")); + } + std::shared_ptr t_op; + if (py::isinstance(tp[0])) { + t_op = (tp[0]).cast>(); + } else if (py::isinstance(tp[0])) { + t_op = std::make_shared((tp[0]).cast()); + } else { + THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, "op is neither a tensorOp nor a pyfunc.")); + } + double prob = (tp[1]).cast(); + if (prob < 0 || prob > 1) { + THROW_IF_ERROR(Status(StatusCode::kUnexpectedError, "prob needs to be with [0,1].")); + } + cpp_policy.back().emplace_back(std::make_pair(t_op, prob)); + } + } + return std::make_shared(cpp_policy); + })); +} + // This is where we externalize the C logic as python modules PYBIND11_MODULE(_c_dataengine, m) { m.doc() = "pybind11 for _c_dataengine"; @@ -949,6 +1025,7 @@ PYBIND11_MODULE(_c_dataengine, m) { bindVocabObjects(&m); bindGraphData(&m); bindDependIcuTokenizerOps(&m); + bindRandomTransformTensorOps(&m); } } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/CMakeLists.txt index 8a9096ff23d..0d072d1d854 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/kernels/CMakeLists.txt @@ -4,11 +4,16 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc" set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) if (ENABLE_PYTHON) add_library(kernels OBJECT + compose_op.cc + random_apply_op.cc + random_choice_op.cc py_func_op.cc tensor_op.cc) target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS}) else() add_library(kernels OBJECT + compose_op.cc + random_apply_op.cc + random_choice_op.cc tensor_op.cc) endif() - diff --git a/mindspore/ccsrc/minddata/dataset/kernels/compose_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/compose_op.cc new file mode 100644 index 00000000000..35128d3e886 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/kernels/compose_op.cc @@ -0,0 +1,66 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "minddata/dataset/kernels/compose_op.h" + +#include + +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/kernels/py_func_op.h" +#include "minddata/dataset/kernels/tensor_op.h" +#include "minddata/dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +Status ComposeOp::OutputShape(const std::vector &inputs, std::vector &outputs) { + std::vector in_shapes = inputs; + for (auto &op : ops_) { + RETURN_IF_NOT_OK(op->OutputShape(in_shapes, outputs)); + in_shapes = std::move(outputs); // outputs become empty after move + } + outputs = std::move(in_shapes); + return Status::OK(); +} +Status ComposeOp::OutputType(const std::vector &inputs, std::vector &outputs) { + std::vector in_types = inputs; + for (auto &op : ops_) { + RETURN_IF_NOT_OK(op->OutputType(in_types, outputs)); + in_types = std::move(outputs); // outputs become empty after move + } + outputs = std::move(in_types); + return Status::OK(); +} +Status ComposeOp::Compute(const TensorRow &inputs, TensorRow *outputs) { + IO_CHECK_VECTOR(inputs, outputs); + TensorRow in_rows = inputs; + for (auto &op : ops_) { + RETURN_IF_NOT_OK(op->Compute(in_rows, outputs)); + in_rows = std::move(*outputs); // after move, *outputs become empty + } + (*outputs) = std::move(in_rows); + return Status::OK(); +} + +ComposeOp::ComposeOp(const std::vector> &ops) : ops_(ops) { + if (ops_.empty()) { + MS_LOG(ERROR) << "op_list is empty this might lead to Segmentation Fault."; + } else if (ops_.size() == 1) { + MS_LOG(WARNING) << "op_list has only 1 op. Compose is probably not needed."; + } +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/compose_op.h b/mindspore/ccsrc/minddata/dataset/kernels/compose_op.h new file mode 100644 index 00000000000..3f0d994a7ec --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/kernels/compose_op.h @@ -0,0 +1,70 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_KERNELS_COMPOSE_OP_ +#define DATASET_KERNELS_COMPOSE_OP_ + +#include +#include +#include +#include + +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/kernels/tensor_op.h" + +namespace mindspore { +namespace dataset { +class ComposeOp : public TensorOp { + public: + /// constructor + /// \param[in] ops list of TensorOps to compose into 1 TensorOp + explicit ComposeOp(const std::vector> &ops); + + /// default destructor + ~ComposeOp() override = default; + + /// return the number of inputs the first tensorOp in compose takes + /// \return number of input tensors + uint32_t NumInput() override { return ops_.front()->NumInput(); } + + /// return the number of outputs the last tensorOp in compose produces + /// \return number of output tensors + uint32_t NumOutput() override { return ops_.back()->NumOutput(); } + + /// \param[in] inputs + /// \param[out] outputs + /// \return Status code + Status OutputShape(const std::vector &inputs, std::vector &outputs) override; + + /// \param[in] inputs + /// \param[out] outputs + /// \return Status code + Status OutputType(const std::vector &inputs, std::vector &outputs) override; + + /// \param[in] input + /// \param[out] output + /// \return Status code + Status Compute(const TensorRow &input, TensorRow *output) override; + + std::string Name() const override { return kComposeOp; } + + private: + std::vector> ops_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_COMPOSE_OP_ diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt index c0c575de9af..bfc27a920bd 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt @@ -19,6 +19,7 @@ add_library(kernels-image OBJECT bounding_box_augment_op.cc random_resize_op.cc random_rotation_op.cc + random_select_subpolicy_op.cc random_vertical_flip_op.cc random_vertical_flip_with_bbox_op.cc rescale_op.cc diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_select_subpolicy_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_select_subpolicy_op.cc new file mode 100644 index 00000000000..3a789ab3444 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_select_subpolicy_op.cc @@ -0,0 +1,96 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "minddata/dataset/kernels/image/random_select_subpolicy_op.h" + +#include + +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/kernels/tensor_op.h" +#include "minddata/dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +Status RandomSelectSubpolicyOp::Compute(const TensorRow &input, TensorRow *output) { + TensorRow in_row = input; + size_t rand_num = rand_int_(gen_); + CHECK_FAIL_RETURN_UNEXPECTED(rand_num < policy_.size(), "invalid rand_num:" + std::to_string(rand_num)); + for (auto &sub : policy_[rand_num]) { + if (rand_double_(gen_) <= sub.second) { + RETURN_IF_NOT_OK(sub.first->Compute(in_row, output)); + in_row = std::move(*output); + } + } + *output = std::move(in_row); + return Status::OK(); +} + +uint32_t RandomSelectSubpolicyOp::NumInput() { + uint32_t num_in = policy_.front().front().first->NumInput(); + for (auto &sub : policy_) { + for (auto p : sub) { + if (num_in != p.first->NumInput()) { + MS_LOG(WARNING) << "Unable to determine numInput."; + return 0; + } + } + } + return num_in; +} + +uint32_t RandomSelectSubpolicyOp::NumOutput() { + uint32_t num_out = policy_.front().front().first->NumOutput(); + for (auto &sub : policy_) { + for (auto p : sub) { + if (num_out != p.first->NumOutput()) { + MS_LOG(WARNING) << "Unable to determine numInput."; + return 0; + } + } + } + return num_out; +} + +Status RandomSelectSubpolicyOp::OutputShape(const std::vector &inputs, std::vector &outputs) { + outputs.clear(); + outputs.resize(NumOutput(), TensorShape::CreateUnknownRankShape()); + return Status::OK(); +} + +Status RandomSelectSubpolicyOp::OutputType(const std::vector &inputs, std::vector &outputs) { + RETURN_IF_NOT_OK(policy_.front().front().first->OutputType(inputs, outputs)); + for (auto &sub : policy_) { + for (auto p : sub) { + std::vector tmp_types; + RETURN_IF_NOT_OK(p.first->OutputType(inputs, tmp_types)); + if (outputs != tmp_types) { + outputs.clear(); + outputs.resize(NumOutput(), DataType(DataType::DE_UNKNOWN)); + return Status::OK(); + } + } + } + return Status::OK(); +} +RandomSelectSubpolicyOp::RandomSelectSubpolicyOp(const std::vector &policy) + : gen_(GetSeed()), policy_(policy), rand_int_(0, policy.size() - 1), rand_double_(0, 1) { + if (policy_.empty()) { + MS_LOG(ERROR) << "policy in RandomSelectSubpolicyOp is empty."; + } +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_select_subpolicy_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/random_select_subpolicy_op.h new file mode 100644 index 00000000000..55482a9818a --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_select_subpolicy_op.h @@ -0,0 +1,79 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_KERNELS_IMAGE_RANDOM_SELECT_SUBPOLICY_OP_ +#define DATASET_KERNELS_IMAGE_RANDOM_SELECT_SUBPOLICY_OP_ + +#include +#include +#include +#include + +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/kernels/tensor_op.h" +#include "minddata/dataset/util/random.h" + +namespace mindspore { +namespace dataset { + +using Subpolicy = std::vector, double>>; + +class RandomSelectSubpolicyOp : public TensorOp { + public: + /// constructor + /// \param[in] policy policy to choose subpolicy from + explicit RandomSelectSubpolicyOp(const std::vector &policy); + + /// destructor + ~RandomSelectSubpolicyOp() override = default; + + /// return number of input tensors + /// \return number of inputs if all ops in policy have the same NumInput, otherwise return 0 + uint32_t NumInput() override; + + /// return number of output tensors + /// \return number of outputs if all ops in policy have the same NumOutput, otherwise return 0 + uint32_t NumOutput() override; + + /// return unknown shapes + /// \param[in] inputs + /// \param[out] outputs + /// \return Status Code + Status OutputShape(const std::vector &inputs, std::vector &outputs) override; + + /// return output type if all ops in policy return the same type, otherwise return unknown type + /// \param[in] inputs + /// \param[out] outputs + /// \return Status Code + Status OutputType(const std::vector &inputs, std::vector &outputs) override; + + /// \param[in] input + /// \param[out] output + /// \return Status code + Status Compute(const TensorRow &input, TensorRow *output) override; + + std::string Name() const override { return kRandomSelectSubpolicyOp; } + + private: + std::vector policy_; + std::mt19937 gen_; // mersenne_twister_engine + std::uniform_int_distribution rand_int_; + std::uniform_real_distribution rand_double_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_IMAGE_RANDOM_SELECT_SUBPOLICY_OP_ diff --git a/mindspore/ccsrc/minddata/dataset/kernels/random_apply_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/random_apply_op.cc new file mode 100644 index 00000000000..783d5077ccb --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/kernels/random_apply_op.cc @@ -0,0 +1,68 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "minddata/dataset/kernels/random_apply_op.h" + +#include +#include + +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/kernels/tensor_op.h" +#include "minddata/dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +uint32_t RandomApplyOp::NumOutput() { + if (compose_->NumOutput() != NumInput()) { + MS_LOG(WARNING) << "NumOutput!=NumInput (randomApply would randomly affect number of outputs)."; + return 0; + } + return compose_->NumOutput(); +} + +Status RandomApplyOp::OutputShape(const std::vector &inputs, std::vector &outputs) { + RETURN_IF_NOT_OK(compose_->OutputShape(inputs, outputs)); + // randomApply either runs all ops or do nothing. If the two methods don't give the same result. return unknown shape. + if (inputs != outputs) { // when RandomApply is not applied, input should be the same as output + outputs.clear(); + outputs.resize(NumOutput(), TensorShape::CreateUnknownRankShape()); + } + return Status::OK(); +} +Status RandomApplyOp::OutputType(const std::vector &inputs, std::vector &outputs) { + RETURN_IF_NOT_OK(compose_->OutputType(inputs, outputs)); + if (inputs != outputs) { // when RandomApply is not applied, input should be the same as output + outputs.clear(); + outputs.resize(NumOutput(), DataType(DataType::DE_UNKNOWN)); + } + return Status::OK(); +} +Status RandomApplyOp::Compute(const TensorRow &input, TensorRow *output) { + if (rand_double_(gen_) <= prob_) { + RETURN_IF_NOT_OK(compose_->Compute(input, output)); + } else { + IO_CHECK_VECTOR(input, output); + *output = input; // copy over the tensors + } + return Status::OK(); +} +RandomApplyOp::RandomApplyOp(double prob, const std::vector> &ops) + : prob_(prob), gen_(GetSeed()), rand_double_(0, 1) { + compose_ = std::make_unique(ops); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/random_apply_op.h b/mindspore/ccsrc/minddata/dataset/kernels/random_apply_op.h new file mode 100644 index 00000000000..117b95a22e8 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/kernels/random_apply_op.h @@ -0,0 +1,79 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_KERNELS_RANDOM_APPLY_OP_ +#define DATASET_KERNELS_RANDOM_APPLY_OP_ + +#include +#include +#include +#include +#include + +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/kernels/compose_op.h" +#include "minddata/dataset/kernels/tensor_op.h" +#include "minddata/dataset/util/random.h" + +namespace mindspore { +namespace dataset { +class RandomApplyOp : public TensorOp { + public: + /// constructor + /// \param[in] prob probability whether the list of TensorOps will be applied + /// \param[in] ops the list of TensorOps to apply with prob likelihood + explicit RandomApplyOp(double prob, const std::vector> &ops); + + /// default destructor + ~RandomApplyOp() = default; + + /// return the number of inputs the first tensorOp in compose takes + /// \return number of input tensors + uint32_t NumInput() override { return compose_->NumInput(); } + + /// return the number of outputs + /// \return number of output tensors + uint32_t NumOutput() override; + + /// return output shape if randomApply won't affect the output shape, otherwise return unknown shape + /// \param[in] inputs + /// \param[out] outputs + /// \return Status code + Status OutputShape(const std::vector &inputs, std::vector &outputs) override; + + /// return output type if randomApply won't affect the output type, otherwise return unknown type + /// \param[in] inputs + /// \param[out] outputs + /// \return Status code + Status OutputType(const std::vector &inputs, std::vector &outputs) override; + + /// \param[in] input + /// \param[out] output + /// \return Status code + Status Compute(const TensorRow &input, TensorRow *output) override; + + std::string Name() const override { return kRandomApplyOp; } + + private: + double prob_; + std::shared_ptr compose_; + std::mt19937 gen_; // mersenne_twister_engine + std::uniform_real_distribution rand_double_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_RANDOM_APPLY_OP_ diff --git a/mindspore/ccsrc/minddata/dataset/kernels/random_choice_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/random_choice_op.cc new file mode 100644 index 00000000000..fc81e85741f --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/kernels/random_choice_op.cc @@ -0,0 +1,97 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "minddata/dataset/kernels/random_choice_op.h" + +#include +#include + +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/kernels/tensor_op.h" +#include "minddata/dataset/util/status.h" + +namespace mindspore { +namespace dataset { + +uint32_t RandomChoiceOp::NumInput() { + uint32_t num_input = ops_.front()->NumInput(); + for (auto &op : ops_) { + uint32_t cur_num = op->NumInput(); + if (num_input != cur_num && cur_num > 0) { + MS_LOG(WARNING) << "Unable to determine NumInput, ops in RandomChoice don't take the same number of input."; + return 0; + } + } + return num_input; +} + +uint32_t RandomChoiceOp::NumOutput() { + uint32_t num_output = ops_.front()->NumOutput(); + for (auto &op : ops_) { + uint32_t cur_num = op->NumOutput(); + if (num_output != cur_num) { + MS_LOG(WARNING) << "Unable to determine NumInput, ops in RandomChoice don't have the same number of input."; + return 0; + } + } + return num_output; +} + +Status RandomChoiceOp::OutputShape(const std::vector &inputs, std::vector &outputs) { + RETURN_IF_NOT_OK(ops_.front()->OutputShape(inputs, outputs)); + for (auto &op : ops_) { + std::vector out_shapes; + RETURN_IF_NOT_OK(op->OutputShape(inputs, out_shapes)); + if (outputs != out_shapes) { + MS_LOG(WARNING) << "TensorOp in RandomChoice don't return the same tensorShape."; + outputs.clear(); + outputs.resize(NumOutput(), TensorShape::CreateUnknownRankShape()); + return Status::OK(); + } + } + return Status::OK(); +} + +Status RandomChoiceOp::OutputType(const std::vector &inputs, std::vector &outputs) { + RETURN_IF_NOT_OK(ops_.front()->OutputType(inputs, outputs)); + for (auto &op : ops_) { + std::vector out_types; + RETURN_IF_NOT_OK(op->OutputType(inputs, out_types)); + if (outputs != out_types) { + MS_LOG(WARNING) << "TensorOp in RandomChoice don't return the same tensorType."; + outputs.clear(); + outputs.resize(NumOutput(), DataType(DataType::DE_UNKNOWN)); + return Status::OK(); + } + } + return Status::OK(); +} + +Status RandomChoiceOp::Compute(const TensorRow &input, TensorRow *output) { + size_t rand_num = rand_int_(gen_); + CHECK_FAIL_RETURN_UNEXPECTED(rand_num < ops_.size(), "invalid rand_num:" + std::to_string(rand_num)); + RETURN_IF_NOT_OK(ops_[rand_num]->Compute(input, output)); + return Status::OK(); +} +RandomChoiceOp::RandomChoiceOp(const std::vector> &ops) + : ops_(ops), gen_(GetSeed()), rand_int_(0, ops.size() - 1) { + if (ops_.empty()) { + MS_LOG(ERROR) << "op_list in RandomChoiceOp is empty."; + } else if (ops_.size() == 1) { + MS_LOG(WARNING) << "op_list has only 1 op, this op would be picked every time."; + } +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/random_choice_op.h b/mindspore/ccsrc/minddata/dataset/kernels/random_choice_op.h new file mode 100644 index 00000000000..f952046837e --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/kernels/random_choice_op.h @@ -0,0 +1,77 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DATASET_KERNELS_RANDOM_CHOICE_OP_ +#define DATASET_KERNELS_RANDOM_CHOICE_OP_ + +#include +#include +#include +#include +#include + +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/kernels/tensor_op.h" +#include "minddata/dataset/kernels/compose_op.h" +#include "minddata/dataset/util/random.h" + +namespace mindspore { +namespace dataset { +class RandomChoiceOp : public TensorOp { + public: + /// constructor + /// \param[in] ops list of TensorOps to randomly choose 1 from + explicit RandomChoiceOp(const std::vector> &ops); + + /// default destructor + ~RandomChoiceOp() = default; + + /// return the number of inputs. All op in ops_ should have the same number of inputs + /// \return number of input tensors + uint32_t NumInput() override; + + /// return the number of outputs. All op in ops_ should have the same number of outputs + /// \return number of input tensors + uint32_t NumOutput() override; + + /// return output shape if all ops in ops_ return the same shape, otherwise return unknown shape + /// \param[in] inputs + /// \param[out] outputs + /// \return Status code + Status OutputShape(const std::vector &inputs, std::vector &outputs) override; + + /// return output type if all ops in ops_ return the same type, otherwise return unknown type + /// \param[in] inputs + /// \param[out] outputs + /// \return Status code + Status OutputType(const std::vector &inputs, std::vector &outputs) override; + + /// \param[in] input + /// \param[out] output + /// \return Status code + Status Compute(const TensorRow &input, TensorRow *output) override; + + std::string Name() const override { return kRandomChoiceOp; } + + private: + std::vector> ops_; + std::mt19937 gen_; // mersenne_twister_engine + std::uniform_int_distribution rand_int_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // DATASET_KERNELS_RANDOM_CHOICE_OP_ diff --git a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h index 3bcba4b4630..00b4fa5efb3 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h @@ -129,6 +129,10 @@ constexpr char kUnicodeCharTokenizerOp[] = "UnicodeCharTokenizerOp"; constexpr char kUnicodeScriptTokenizerOp[] = "UnicodeScriptTokenizerOp"; constexpr char kWhitespaceTokenizerOp[] = "WhitespaceTokenizerOp"; constexpr char kWordpieceTokenizerOp[] = "WordpieceTokenizerOp"; +constexpr char kRandomChoiceOp[] = "RandomChoiceOp"; +constexpr char kRandomApplyOp[] = "RandomApplyOp"; +constexpr char kComposeOp[] = "ComposeOp"; +constexpr char kRandomSelectSubpolicyOp[] = "RandomSelectSubpolicyOp"; // data constexpr char kConcatenateOp[] = "kConcatenateOp"; diff --git a/mindspore/dataset/core/validator_helpers.py b/mindspore/dataset/core/validator_helpers.py index 8806babd639..1ded33a9f98 100644 --- a/mindspore/dataset/core/validator_helpers.py +++ b/mindspore/dataset/core/validator_helpers.py @@ -19,6 +19,8 @@ import inspect from multiprocessing import cpu_count import os import numpy as np + +import mindspore._c_dataengine as cde from ..engine import samplers # POS_INT_MIN is used to limit values from starting from 0 @@ -358,3 +360,9 @@ def check_gnn_list_or_ndarray(param, param_name): if not param.dtype == np.int32: raise TypeError("Each member in {0} should be of type int32. Got {1}.".format( param_name, param.dtype)) + + +def check_tensor_op(param, param_name): + """check whether param is a tensor op or a callable python function""" + if not isinstance(param, cde.TensorOp) and not callable(param): + raise TypeError("{0} is not a c_transform op (TensorOp) nor a callable pyfunc.".format(param_name)) diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py index 62496822e51..a2f9f5eee1e 100644 --- a/mindspore/dataset/transforms/c_transforms.py +++ b/mindspore/dataset/transforms/c_transforms.py @@ -22,7 +22,7 @@ import mindspore.common.dtype as mstype import mindspore._c_dataengine as cde from .validators import check_num_classes, check_de_type, check_fill_value, check_slice_op, check_mask_op, \ - check_pad_end, check_concat_type + check_pad_end, check_concat_type, check_random_transform_ops from ..core.datatypes import mstype_to_detype @@ -82,7 +82,7 @@ class Slice(cde.SliceOp): Maximum `n` number of arguments to slice a tensor of rank `n`. One object in slices can be one of: 1. :py:obj:`int`: Slice this index only. Negative index is supported. - 2. :py:obj:`list(int)`: Slice these indices ion the list only. Negative indices are supdeported. + 2. :py:obj:`list(int)`: Slice these indices ion the list only. Negative indices are supported. 3. :py:obj:`slice`: Slice the generated indices from the slice object. Similar to `start:stop:step`. 4. :py:obj:`None`: Slice the whole dimension. Similar to `:` in python indexing. 5. :py:obj:`Ellipses`: Slice all dimensions between the two slices. Similar to `...` in python indexing. @@ -232,3 +232,50 @@ class Duplicate(cde.DuplicateOp): >>> # | [1,2,3] | [1,2,3] | >>> # +---------+---------+ """ + + +class Compose(cde.ComposeOp): + """ + Compose a list of transforms into a single transform. + + Args: + transforms (list): List of transformations to be applied. + Example: + >>> compose = Compose([vision.Decode(), vision.RandomCrop()]) + >>> dataset = ds.map(operations=compose) + """ + + @check_random_transform_ops + def __init__(self, op_list): + super().__init__(op_list) + + +class RandomApply(cde.RandomApplyOp): + """ + Randomly performs a series of transforms with a given probability. + Args: + transforms (list): List of transformations to be applied. + prob (float, optional): The probability to apply the transformation list (default=0.5) + Example: + >>> rand_apply = RandomApply([vision.RandomCrop()]) + >>> dataset = ds.map(operations=rand_apply) + """ + + @check_random_transform_ops + def __init__(self, op_list, prob=0.5): + super().__init__(prob, op_list) + + +class RandomChoice(cde.RandomChoiceOp): + """ + Randomly selects one transform from a list of transforms to perform operation. + Args: + transforms (list): List of transformations to be chosen from to apply. + Example: + >>> rand_choice = RandomChoice([vision.CenterCrop(), vision.RandomCrop()]) + >>> dataset = ds.map(operations=rand_choice) + """ + + @check_random_transform_ops + def __init__(self, op_list): + super().__init__(op_list) diff --git a/mindspore/dataset/transforms/validators.py b/mindspore/dataset/transforms/validators.py index 9fe0fa5f106..f44fd918eea 100644 --- a/mindspore/dataset/transforms/validators.py +++ b/mindspore/dataset/transforms/validators.py @@ -18,7 +18,8 @@ from functools import wraps import numpy as np from mindspore._c_expression import typing -from ..core.validator_helpers import parse_user_args, type_check, check_pos_int64, check_value, check_positive +from ..core.validator_helpers import parse_user_args, type_check, check_pos_int64, check_value, check_positive, \ + check_tensor_op # POS_INT_MIN is used to limit values from starting from 0 POS_INT_MIN = 1 @@ -180,3 +181,22 @@ def check_concat_type(method): return method(self, *args, **kwargs) return new_method + + +def check_random_transform_ops(method): + """Wrapper method to check the parameters of RandomChoice, RandomApply and Compose.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + arg_list, _ = parse_user_args(method, *args, **kwargs) + type_check(arg_list[0], (list,), "op_list") + if not arg_list[0]: + raise ValueError("op_list can not be empty.") + for ind, op in enumerate(arg_list[0]): + check_tensor_op(op, "op_list[{0}]".format(ind)) + if len(arg_list) == 2: # random apply takes an additional arg + type_check(arg_list[1], (float, int), "prob") + check_value(arg_list[1], (0, 1), "prob") + return method(self, *args, **kwargs) + + return new_method diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py index 8e3b7c72141..2de575d14d6 100644 --- a/mindspore/dataset/transforms/vision/c_transforms.py +++ b/mindspore/dataset/transforms/vision/c_transforms.py @@ -47,7 +47,7 @@ from .utils import Inter, Border from .validators import check_prob, check_crop, check_resize_interpolation, check_random_resize_crop, \ check_normalize_c, check_random_crop, check_random_color_adjust, check_random_rotation, check_range, \ check_resize, check_rescale, check_pad, check_cutout, check_uniform_augment_cpp, check_bounding_box_augment_cpp, \ - FLOAT_MAX_INTEGER + check_random_select_subpolicy_op, FLOAT_MAX_INTEGER DE_C_INTER_MODE = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR, Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR, @@ -712,3 +712,9 @@ class UniformAugment(cde.UniformAugOp): self.operations = operations self.num_ops = num_ops super().__init__(operations, num_ops) + + +class RandomSelectSubpolicy(cde.RandomSelectSubpolicyOp): + @check_random_select_subpolicy_op + def __init__(self, policy): + super().__init__(policy) diff --git a/mindspore/dataset/transforms/vision/validators.py b/mindspore/dataset/transforms/vision/validators.py index 4cb66133592..0f2bc2ce2e3 100644 --- a/mindspore/dataset/transforms/vision/validators.py +++ b/mindspore/dataset/transforms/vision/validators.py @@ -21,7 +21,7 @@ from mindspore._c_dataengine import TensorOp from .utils import Inter, Border from ...core.validator_helpers import check_value, check_uint8, FLOAT_MAX_INTEGER, check_pos_float32, \ - check_2tuple, check_range, check_positive, INT32_MAX, parse_user_args, type_check, type_check_list + check_2tuple, check_range, check_positive, INT32_MAX, parse_user_args, type_check, type_check_list, check_tensor_op def check_crop_size(size): @@ -588,3 +588,26 @@ def check_compose_list(method): return method(self, *args, **kwargs) return new_method + + +def check_random_select_subpolicy_op(method): + """Wrapper method to check the parameters of RandomSelectSubpolicyOp.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + [policy], _ = parse_user_args(method, *args, **kwargs) + type_check(policy, (list,), "policy") + if not policy: + raise ValueError("policy can not be empty.") + for sub_ind, sub in enumerate(policy): + type_check(sub, (list,), "policy[{0}]".format([sub_ind])) + if not sub: + raise ValueError("policy[{0}] can not be empty.".format(sub_ind)) + for op_ind, tp in enumerate(sub): + check_2tuple(tp, "policy[{0}][{1}]".format(sub_ind, op_ind)) + check_tensor_op(tp[0], "op of (op, prob) in policy[{0}][{1}]".format(sub_ind, op_ind)) + check_value(tp[1], (0, 1), "prob of (op, prob) policy[{0}][{1}]".format(sub_ind, op_ind)) + + return method(self, *args, **kwargs) + + return new_method diff --git a/tests/ut/python/dataset/test_c_compose.py b/tests/ut/python/dataset/test_c_compose.py new file mode 100644 index 00000000000..906d787f219 --- /dev/null +++ b/tests/ut/python/dataset/test_c_compose.py @@ -0,0 +1,50 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import mindspore.common.dtype as mstype +import mindspore.dataset as ds +import mindspore.dataset.transforms.c_transforms as ops +import mindspore.dataset.transforms.py_transforms as py_ops + + +def test_compose(): + ds.config.set_seed(0) + + def test_config(arr, op_list): + try: + data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) + data = data.map(input_columns=["col"], operations=ops.Compose(op_list)) + res = [] + for i in data.create_dict_iterator(): + res.append(i["col"].tolist()) + return res + except (TypeError, ValueError) as e: + return str(e) + + # test simple compose with only 1 op, this would generate a warning + assert test_config([[1, 0], [3, 4]], [ops.Fill(2)]) == [[2, 2], [2, 2]] + # test 1 column -> 2columns -> 1 -> 2 -> 1 + assert test_config([[1, 0]], [ops.Duplicate(), ops.Concatenate(), ops.Duplicate(), ops.Concatenate()]) == [ + [1, 0] * 4] + # test one python transform followed by a C transform. type after oneHot is float (mixed use-case) + assert test_config([1, 0], [py_ops.OneHotOp(2), ops.TypeCast(mstype.int32)]) == [[[0, 1]], [[1, 0]]] + # test exceptions. compose, randomApply randomChoice use the same validator + assert "op_list[0] is not a c_transform op" in test_config([1, 0], [1, ops.TypeCast(mstype.int32)]) + # test empty op list + assert "op_list can not be empty." in test_config([1, 0], []) + + +if __name__ == "__main__": + test_compose() diff --git a/tests/ut/python/dataset/test_c_random_apply.py b/tests/ut/python/dataset/test_c_random_apply.py new file mode 100644 index 00000000000..8b4851aab57 --- /dev/null +++ b/tests/ut/python/dataset/test_c_random_apply.py @@ -0,0 +1,48 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import mindspore.common.dtype as mstype +import mindspore.dataset as ds +import mindspore.dataset.transforms.c_transforms as ops + + +def test_random_apply(): + ds.config.set_seed(0) + + def test_config(arr, op_list, prob=0.5): + try: + data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) + data = data.map(input_columns=["col"], operations=ops.RandomApply(op_list, prob)) + res = [] + for i in data.create_dict_iterator(): + res.append(i["col"].tolist()) + return res + except (TypeError, ValueError) as e: + return str(e) + + res1 = test_config([[0, 1]], [ops.Duplicate(), ops.Concatenate()]) + assert res1 in [[[0, 1]], [[0, 1, 0, 1]]] + # test single nested compose + assert test_config([[0, 1, 2]], [ops.Compose([ops.Duplicate(), ops.Concatenate(), ops.Slice([0, 1, 2])])]) == [ + [0, 1, 2]] + # test exception + assert "is not of type (" in test_config([1, 0], ops.TypeCast(mstype.int32)) + assert "Input prob is not within the required interval" in test_config([0, 1], [ops.Slice([0, 1])], 1.1) + assert "is not of type (" in test_config([1, 0], [ops.TypeCast(mstype.int32)], None) + assert "op_list with value None is not of type (" in test_config([1, 0], None) + + +if __name__ == "__main__": + test_random_apply() diff --git a/tests/ut/python/dataset/test_c_random_choice.py b/tests/ut/python/dataset/test_c_random_choice.py new file mode 100644 index 00000000000..3faedeb26e0 --- /dev/null +++ b/tests/ut/python/dataset/test_c_random_choice.py @@ -0,0 +1,48 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +import mindspore.dataset as ds +import mindspore.dataset.transforms.c_transforms as ops + + +def test_random_choice(): + ds.config.set_seed(0) + + def test_config(arr, op_list): + try: + data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) + data = data.map(input_columns=["col"], operations=ops.RandomChoice(op_list)) + res = [] + for i in data.create_dict_iterator(): + res.append(i["col"].tolist()) + return res + except (TypeError, ValueError) as e: + return str(e) + + # test whether a op would be randomly chosen. In order to prevent random failure, both results need to be checked + res1 = test_config([[0, 1, 2]], [ops.PadEnd([4], 0), ops.Slice([0, 2])]) + assert res1 in [[[0, 1, 2, 0]], [[0, 2]]] + + # test nested structure + res2 = test_config([[0, 1, 2]], [ops.Compose([ops.Duplicate(), ops.Concatenate()]), + ops.Compose([ops.Slice([0, 1]), ops.OneHot(2)])]) + assert res2 in [[[[1, 0], [0, 1]]], [[0, 1, 2, 0, 1, 2]]] + # test random_choice where there is only 1 op + assert test_config([[4, 3], [2, 1]], [ops.Slice([0])]) == [[4], [2]] + + +if __name__ == "__main__": + test_random_choice() diff --git a/tests/ut/python/dataset/test_random_select_subpolicy.py b/tests/ut/python/dataset/test_random_select_subpolicy.py new file mode 100644 index 00000000000..4248f9d0488 --- /dev/null +++ b/tests/ut/python/dataset/test_random_select_subpolicy.py @@ -0,0 +1,51 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import mindspore.dataset as ds +import mindspore.dataset.transforms.c_transforms as ops +import mindspore.dataset.transforms.vision.c_transforms as visions + + +def test_random_select_subpolicy(): + ds.config.set_seed(0) + + def test_config(arr, policy): + try: + data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) + data = data.map(input_columns=["col"], operations=visions.RandomSelectSubpolicy(policy)) + res = [] + for i in data.create_dict_iterator(): + res.append(i["col"].tolist()) + return res + except (TypeError, ValueError) as e: + return str(e) + + # 3 possible outcomes + policy1 = [[(ops.PadEnd([4], 0), 0.5), (ops.Compose([ops.Duplicate(), ops.Concatenate()]), 1)], + [(ops.Slice([0, 1]), 0.5), (ops.Duplicate(), 1), (ops.Concatenate(), 1)]] + res1 = test_config([[1, 2, 3]], policy1) + assert res1 in [[[1, 2, 1, 2]], [[1, 2, 3, 1, 2, 3]], [[1, 2, 3, 0, 1, 2, 3, 0]]] + + # test exceptions + assert "policy can not be empty." in test_config([[1, 2, 3]], []) + assert "policy[0] can not be empty." in test_config([[1, 2, 3]], [[]]) + assert "op of (op, prob) in policy[1][0] is not a c_transform op (TensorOp) nor a callable pyfunc" in test_config( + [[1, 2, 3]], [[(ops.PadEnd([4], 0), 0.5)], [(1, 0.4)]]) + assert "prob of (op, prob) policy[1][0] is not within the required interval of (0 to 1)" in test_config([[1]], [ + [(ops.Duplicate(), 0)], [(ops.Duplicate(), -0.1)]]) + + +if __name__ == "__main__": + test_random_select_subpolicy()