From 83aaed09bbeced4281b63aa8ced901a3b96d2c1a Mon Sep 17 00:00:00 2001 From: alex-yuyue Date: Wed, 7 Apr 2021 15:56:13 -0400 Subject: [PATCH] TensorOp decoupling stage 3 (transform ops) Signed-off-by: alex-yuyue --- .../bindings/dataset/kernels/data/bindings.cc | 69 ------ .../bindings/dataset/kernels/ir/bindings.cc | 88 +++++++ .../ccsrc/minddata/dataset/api/transforms.cc | 73 ++++++ .../minddata/dataset/core/tensor_helpers.h | 34 +-- .../minddata/dataset/include/constants.h | 10 + .../minddata/dataset/include/transforms.h | 144 +++++++++++ .../dataset/kernels/data/data_utils.h | 9 - .../dataset/kernels/ir/data/transforms_ir.cc | 75 ++++++ .../dataset/kernels/ir/data/transforms_ir.h | 75 ++++++ mindspore/dataset/transforms/c_transforms.py | 41 +-- tests/ut/cpp/dataset/c_api_transforms_test.cc | 233 ++++++++++++++++++ 11 files changed, 724 insertions(+), 127 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc index 32fd3d98d93..7b3f6d33d3f 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/data/bindings.cc @@ -18,91 +18,22 @@ #include "pybind11/stl_bind.h" #include "minddata/dataset/api/python/pybind_register.h" -#include "minddata/dataset/core/tensor_helpers.h" -#include "minddata/dataset/kernels/data/concatenate_op.h" #include "minddata/dataset/kernels/data/fill_op.h" -#include "minddata/dataset/kernels/data/mask_op.h" -#include "minddata/dataset/kernels/data/pad_end_op.h" -#include "minddata/dataset/kernels/data/slice_op.h" #include "minddata/dataset/kernels/data/to_float16_op.h" namespace mindspore { namespace dataset { -PYBIND_REGISTER(ConcatenateOp, 1, ([](const py::module *m) { - (void)py::class_>(*m, "ConcatenateOp") - .def(py::init, std::shared_ptr>()); - })); - PYBIND_REGISTER( FillOp, 1, ([](const py::module *m) { (void)py::class_>(*m, "FillOp").def(py::init>()); })); -PYBIND_REGISTER(MaskOp, 1, ([](const py::module *m) { - (void)py::class_>(*m, "MaskOp") - .def(py::init, DataType>()); - })); - -PYBIND_REGISTER(PadEndOp, 1, ([](const py::module *m) { - (void)py::class_>(*m, "PadEndOp") - .def(py::init>()); - })); - -PYBIND_REGISTER(SliceOption, 0, ([](const py::module *m) { - (void)py::class_(*m, "SliceOption") - .def(py::init([](const py::slice &py_slice) { - Slice c_slice; - if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none() && - !py_slice.attr("step").is_none()) { - c_slice = Slice(py::reinterpret_borrow(py_slice.attr("start")), - py::reinterpret_borrow(py_slice.attr("stop")), - py::reinterpret_borrow(py_slice.attr("step"))); - } else if (py_slice.attr("start").is_none() && py_slice.attr("step").is_none()) { - c_slice = Slice(py::reinterpret_borrow(py_slice.attr("stop"))); - } else if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none()) { - c_slice = Slice(py::reinterpret_borrow(py_slice.attr("start")), - py::reinterpret_borrow(py_slice.attr("stop"))); - } - - if (!c_slice.valid()) { - THROW_IF_ERROR( - Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Wrong slice object")); - } - return SliceOption(c_slice); - })) - .def(py::init([](const py::list &py_list) { - std::vector indices; - for (auto l : py_list) { - indices.push_back(py::reinterpret_borrow(l)); - } - return SliceOption(indices); - })) - .def(py::init()) - .def(py::init()); - })); - -PYBIND_REGISTER(SliceOp, 1, ([](const py::module *m) { - (void)py::class_>(*m, "SliceOp") - .def(py::init>()); - })); - PYBIND_REGISTER(ToFloat16Op, 1, ([](const py::module *m) { (void)py::class_>(*m, "ToFloat16Op", py::dynamic_attr()) .def(py::init<>()); })); -PYBIND_REGISTER(RelationalOp, 0, ([](const py::module *m) { - (void)py::enum_(*m, "RelationalOp", py::arithmetic()) - .value("EQ", RelationalOp::kEqual) - .value("NE", RelationalOp::kNotEqual) - .value("LT", RelationalOp::kLess) - .value("LE", RelationalOp::kLessEqual) - .value("GT", RelationalOp::kGreater) - .value("GE", RelationalOp::kGreaterEqual) - .export_values(); - })); - } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/bindings.cc index 5d1b9a687d2..be8b28c842c 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/bindings.cc @@ -15,6 +15,7 @@ */ #include "pybind11/pybind11.h" +#include "mindspore/ccsrc/minddata/dataset/include/transforms.h" #include "minddata/dataset/api/python/pybind_register.h" #include "minddata/dataset/core/global_context.h" @@ -64,6 +65,16 @@ PYBIND_REGISTER( })); })); +PYBIND_REGISTER(ConcatenateOperation, 1, ([](const py::module *m) { + (void)py::class_>(*m, "ConcatenateOperation") + .def(py::init([](int8_t axis, std::shared_ptr prepend, std::shared_ptr append) { + auto concatenate = std::make_shared(axis, prepend, append); + THROW_IF_ERROR(concatenate->ValidateParams()); + return concatenate; + })); + })); + PYBIND_REGISTER( DuplicateOperation, 1, ([](const py::module *m) { (void)py::class_>( @@ -75,6 +86,17 @@ PYBIND_REGISTER( })); })); +PYBIND_REGISTER(MaskOperation, 1, ([](const py::module *m) { + (void) + py::class_>( + *m, "MaskOperation") + .def(py::init([](RelationalOp op, std::shared_ptr constant, DataType dtype) { + auto mask = std::make_shared(op, constant, dtype); + THROW_IF_ERROR(mask->ValidateParams()); + return mask; + })); + })); + PYBIND_REGISTER( OneHotOperation, 1, ([](const py::module *m) { (void)py::class_>( @@ -86,6 +108,17 @@ PYBIND_REGISTER( })); })); +PYBIND_REGISTER( + PadEndOperation, 1, ([](const py::module *m) { + (void)py::class_>( + *m, "PadEndOperation") + .def(py::init([](TensorShape pad_shape, std::shared_ptr pad_value) { + auto pad_end = std::make_shared(pad_shape, pad_value); + THROW_IF_ERROR(pad_end->ValidateParams()); + return pad_end; + })); + })); + PYBIND_REGISTER(RandomChoiceOperation, 1, ([](const py::module *m) { (void)py::class_>(*m, "RandomChoiceOperation") @@ -110,6 +143,50 @@ PYBIND_REGISTER(RandomApplyOperation, 1, ([](const py::module *m) { })); })); +PYBIND_REGISTER( + SliceOperation, 1, ([](const py::module *m) { + (void)py::class_>( + *m, "SliceOperation") + .def(py::init([](std::vector slice_input) { + auto slice = std::make_shared(slice_input); + THROW_IF_ERROR(slice->ValidateParams()); + return slice; + })); + })); + +PYBIND_REGISTER(SliceOption, 0, ([](const py::module *m) { + (void)py::class_(*m, "SliceOption") + .def(py::init([](const py::slice &py_slice) { + Slice c_slice; + if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none() && + !py_slice.attr("step").is_none()) { + c_slice = Slice(py::reinterpret_borrow(py_slice.attr("start")), + py::reinterpret_borrow(py_slice.attr("stop")), + py::reinterpret_borrow(py_slice.attr("step"))); + } else if (py_slice.attr("start").is_none() && py_slice.attr("step").is_none()) { + c_slice = Slice(py::reinterpret_borrow(py_slice.attr("stop"))); + } else if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none()) { + c_slice = Slice(py::reinterpret_borrow(py_slice.attr("start")), + py::reinterpret_borrow(py_slice.attr("stop"))); + } + + if (!c_slice.valid()) { + THROW_IF_ERROR( + Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Wrong slice object")); + } + return SliceOption(c_slice); + })) + .def(py::init([](const py::list &py_list) { + std::vector indices; + for (auto l : py_list) { + indices.push_back(py::reinterpret_borrow(l)); + } + return SliceOption(indices); + })) + .def(py::init()) + .def(py::init()); + })); + PYBIND_REGISTER( TypeCastOperation, 1, ([](const py::module *m) { (void)py::class_>( @@ -132,5 +209,16 @@ PYBIND_REGISTER( })); })); +PYBIND_REGISTER(RelationalOp, 0, ([](const py::module *m) { + (void)py::enum_(*m, "RelationalOp", py::arithmetic()) + .value("EQ", RelationalOp::kEqual) + .value("NE", RelationalOp::kNotEqual) + .value("LT", RelationalOp::kLess) + .value("LE", RelationalOp::kLessEqual) + .value("GT", RelationalOp::kGreater) + .value("GE", RelationalOp::kGreaterEqual) + .export_values(); + })); + } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/api/transforms.cc b/mindspore/ccsrc/minddata/dataset/api/transforms.cc index b357547ee00..d3a0280f404 100644 --- a/mindspore/ccsrc/minddata/dataset/api/transforms.cc +++ b/mindspore/ccsrc/minddata/dataset/api/transforms.cc @@ -18,6 +18,7 @@ #include +#include "mindspore/ccsrc/minddata/dataset/core/type_id.h" #include "minddata/dataset/kernels/ir/data/transforms_ir.h" namespace mindspore { @@ -56,11 +57,52 @@ Compose::Compose(const std::vector> &tra std::shared_ptr Compose::Parse() { return std::make_shared(data_->transforms_); } +#ifndef ENABLE_ANDROID +// Constructor to Concatenate +struct Concatenate::Data { + explicit Data(int8_t axis, MSTensor prepend, MSTensor append) : axis_(axis), prepend_(prepend), append_(append) {} + int8_t axis_; + MSTensor prepend_; + MSTensor append_; +}; + +Concatenate::Concatenate(int8_t axis, MSTensor prepend, MSTensor append) + : data_(std::make_shared(axis, prepend, append)) {} + +std::shared_ptr Concatenate::Parse() { + std::shared_ptr out_prepend, out_append; + Tensor::CreateFromMSTensor(data_->prepend_, &out_prepend); + Tensor::CreateFromMSTensor(data_->append_, &out_append); + return std::make_shared(data_->axis_, out_prepend, out_append); +} +#endif // not ENABLE_ANDROID + // Constructor to Duplicate Duplicate::Duplicate() {} std::shared_ptr Duplicate::Parse() { return std::make_shared(); } +#ifndef ENABLE_ANDROID +// Constructor to Mask +struct Mask::Data { + explicit Data(RelationalOp op, MSTensor constant, mindspore::DataType ms_type) + : op_(op), constant_(constant), ms_type_(ms_type) {} + RelationalOp op_; + MSTensor constant_; + mindspore::DataType ms_type_; +}; + +Mask::Mask(RelationalOp op, MSTensor constant, mindspore::DataType ms_type) + : data_(std::make_shared(op, constant, ms_type)) {} + +std::shared_ptr Mask::Parse() { + std::shared_ptr out_constant; + Tensor::CreateFromMSTensor(data_->constant_, &out_constant); + DataType de_type = dataset::MSTypeToDEType(static_cast(data_->ms_type_)); + return std::make_shared(data_->op_, out_constant, de_type); +} +#endif // not ENABLE_ANDROID + // Constructor to OneHot struct OneHot::Data { explicit Data(int32_t num_classes) : num_classes_(num_classes) {} @@ -71,6 +113,25 @@ OneHot::OneHot(int32_t num_classes) : data_(std::make_shared(num_classes)) std::shared_ptr OneHot::Parse() { return std::make_shared(data_->num_classes_); } +#ifndef ENABLE_ANDROID +// Constructor to PadEnd +struct PadEnd::Data { + explicit Data(const std::vector &pad_shape, MSTensor pad_value) + : pad_shape_(pad_shape), pad_value_(pad_value) {} + std::vector pad_shape_; + MSTensor pad_value_; +}; + +PadEnd::PadEnd(const std::vector &pad_shape, MSTensor pad_value) + : data_(std::make_shared(pad_shape, pad_value)) {} + +std::shared_ptr PadEnd::Parse() { + std::shared_ptr pad_value; + Tensor::CreateFromMSTensor(data_->pad_value_, &pad_value); + return std::make_shared(TensorShape(data_->pad_shape_), pad_value); +} +#endif // not ENABLE_ANDROID + // Constructor to RandomApply. struct RandomApply::Data { std::vector> transforms_; @@ -136,6 +197,18 @@ std::shared_ptr RandomChoice::Parse() { return std::make_shared(data_->transforms_); } +#ifndef ENABLE_ANDROID +// Constructor to Slice +struct Slice::Data { + explicit Data(const std::vector &slice_input) : slice_input_(slice_input) {} + std::vector slice_input_; +}; + +Slice::Slice(const std::vector &slice_input) : data_(std::make_shared(slice_input)) {} + +std::shared_ptr Slice::Parse() { return std::make_shared(data_->slice_input_); } +#endif // not ENABLE_ANDROID + // Constructor to TypeCast struct TypeCast::Data { explicit Data(const std::vector &data_type) : data_type_(CharToString(data_type)) {} diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.h b/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.h index a0383b7c65f..18733caeb5c 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.h +++ b/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.h @@ -19,43 +19,11 @@ #include #include +#include "mindspore/ccsrc/minddata/dataset/include/transforms.h" #include "minddata/dataset/include/constants.h" namespace mindspore { namespace dataset { -class Slice { - public: - Slice() : start_(0), stop_(0), step_(0) {} - Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {} - Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {} - explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {} - Slice(Slice const &slice) = default; - - ~Slice() = default; - - bool valid() const { return step_ != 0; } - dsize_t start_; - dsize_t stop_; - dsize_t step_; -}; - -class SliceOption { - public: - explicit SliceOption(bool all) : all_(all) {} - explicit SliceOption(std::vector indices) : indices_(indices) {} - explicit SliceOption(Slice slice) : slice_(slice) {} - SliceOption(SliceOption const &slice) = default; - - ~SliceOption() = default; - - // only one of the following will be valid - // given indices to slice the Tensor. - std::vector indices_ = {}; - // Slice object. All start, stop and step are 0 if invalid. - Slice slice_; - bool all_ = false; -}; - /// Recursive helper function to generate indices based on vector of SliceOptions. It recursively iterates through each /// range represented by slice_options to generate a list of indices to be sliced. /// \param[out] matrix Generated nested vector of indices diff --git a/mindspore/ccsrc/minddata/dataset/include/constants.h b/mindspore/ccsrc/minddata/dataset/include/constants.h index d2dfedac6e0..9c6752e0877 100644 --- a/mindspore/ccsrc/minddata/dataset/include/constants.h +++ b/mindspore/ccsrc/minddata/dataset/include/constants.h @@ -71,6 +71,16 @@ enum class NormalizeForm { kNfkd, }; +// Possible values for Mask +enum class RelationalOp { + kEqual = 0, // == + kNotEqual, // != + kLess, // < + kLessEqual, // <= + kGreater, // > + kGreaterEqual, // >= +}; + // Possible values for SamplingStrategy enum class SamplingStrategy { kRandom = 0, kEdgeWeight = 1 }; diff --git a/mindspore/ccsrc/minddata/dataset/include/transforms.h b/mindspore/ccsrc/minddata/dataset/include/transforms.h index 766ad230bff..1114a99980e 100644 --- a/mindspore/ccsrc/minddata/dataset/include/transforms.h +++ b/mindspore/ccsrc/minddata/dataset/include/transforms.h @@ -75,6 +75,54 @@ class TensorTransform : public std::enable_shared_from_this { virtual std::shared_ptr Parse(const MapTargetDevice &env) { return nullptr; } }; +/// \brief Slice object used in SliceOption. +class Slice { + public: + /// \brief Constructor, with start, stop and step default to 0. + Slice() : start_(0), stop_(0), step_(0) {} + /// \brief Constructor. + /// \param[in] start Starting integer specifying where to start the slicing. + /// \param[in] stop Ending integer specifying where to stop the slicing. + /// \param[in] step An integer specifying the step of the slicing. + Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {} + /// \brief Constructor, with step=1 + /// \param[in] start Starting integer specifying where to start the slicing. + /// \param[in] stop Ending integer specifying where to stop the slicing. + Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {} + /// \brief Constructor, with start=0 and step=1 + /// \param[in] stop Ending integer specifying where to stop the slicing. + explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {} + Slice(Slice const &slice) = default; + + ~Slice() = default; + + bool valid() const { return step_ != 0; } + dsize_t start_; + dsize_t stop_; + dsize_t step_; +}; + +/// \brief SliceOption used in Slice Op. +class SliceOption { + public: + /// \param[in] all Slice the whole dimension + explicit SliceOption(bool all) : all_(all) {} + /// \param[in] indices Slice these indices along the dimension. Negative indices are supported. + explicit SliceOption(std::vector indices) : indices_(indices) {} + /// \param[in] slice Slice the generated indices from the slice object along the dimension. + explicit SliceOption(Slice slice) : slice_(slice) {} + SliceOption(SliceOption const &slice) = default; + + ~SliceOption() = default; + + // only one of the following will be valid + // given indices to slice the Tensor. + std::vector indices_ = {}; + // Slice object. All start, stop and step are 0 if invalid. + Slice slice_; + bool all_ = false; +}; + // Transform operations for performing data transformation. namespace transforms { @@ -105,6 +153,29 @@ class Compose final : public TensorTransform { std::shared_ptr data_; }; +/// \brief Concatenate Op. +/// \notes Tensor operation that concatenates all columns into a single tensor. +class Concatenate final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] axis Concatenate the tensors along given axis (Default=0). + /// \param[in] prepend MSTensor to be prepended to the already concatenated tensors (Default={}). + /// \param[in] append MSTensor to be appended to the already concatenated tensors (Default={}). + explicit Concatenate(int8_t axis = 0, MSTensor prepend = {}, MSTensor append = {}); + + /// \brief Destructor + ~Concatenate() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + /// \brief Duplicate Op. /// \notes Duplicate the input tensor to a new output tensor. /// The input tensor is carried over to the output list. @@ -122,6 +193,32 @@ class Duplicate final : public TensorTransform { std::shared_ptr Parse() override; }; +/// \brief Mask Op. +/// \notes Mask content of the input tensor with the given predicate. +/// Any element of the tensor that matches the predicate will be evaluated to True, otherwise False. +class Mask final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] op One of the relational operators EQ, NE LT, GT, LE or GE. + /// \param[in] constant Constant to be compared to. + /// Can only be MSTensor of str, int, float, bool. + /// \param[in] de_type Type of the generated mask (Default to be mindspore::DataType::kNumberTypeBool). + explicit Mask(RelationalOp op, MSTensor constant, + mindspore::DataType ms_type = mindspore::DataType(mindspore::DataType::kNumberTypeBool)); + + /// \brief Destructor + ~Mask() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + /// \brief OneHot Op. /// \notes Convert the labels into OneHot format. class OneHot final : public TensorTransform { @@ -143,6 +240,30 @@ class OneHot final : public TensorTransform { std::shared_ptr data_; }; +/// \brief PadEnd Op. +/// \notes Pad input tensor according to pad_shape, need to have same rank. +class PadEnd final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] pad_shape List of integers representing the shape needed. + /// Dimensions that set to `None` will not be padded (i.e., original dim will be used). + /// Shorter dimensions will truncate the values. + /// \param[in] pad_value Value used to pad. Default to be {}. + explicit PadEnd(const std::vector &pad_shape, MSTensor pad_value = {}); + + /// \brief Destructor + ~PadEnd() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + /// \brief RandomApply Op. /// \notes Randomly perform a series of transforms with a given probability. class RandomApply final : public TensorTransform { @@ -200,6 +321,29 @@ class RandomChoice final : public TensorTransform { std::shared_ptr data_; }; +/// \brief Slice Op. +/// \notes Slice operation to extract a tensor out using the given n slices. +/// The functionality of Slice is similar to NumPy's indexing feature. +/// (Currently only rank-1 tensors are supported). +class Slice final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] slice_input Vector of SliceOption + explicit Slice(const std::vector &slice_input); + + /// \brief Destructor + ~Slice() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + /// \brief TypeCast Op. /// \notes Tensor operation to cast to a given MindSpore data type. class TypeCast final : public TensorTransform { diff --git a/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h index d1c1578a0ab..88b08112198 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h @@ -124,15 +124,6 @@ Status PadEndStringHelper(const std::shared_ptr &src, std::vector cur_ind, size_t cur_dim, const std::string &pad_value); -enum class RelationalOp { - kEqual = 0, // == - kNotEqual, // != - kLess, // < - kLessEqual, // <= - kGreater, // > - kGreaterEqual, // >= -}; - /// Helper method that masks the input tensor /// @tparam T type of the tensor /// @param input[in] input tensor diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc index 5152b44bcfd..bbe09ff94e8 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc @@ -20,10 +20,22 @@ // Kernel data headers (in alphabetical order) #include "minddata/dataset/kernels/data/compose_op.h" +#ifndef ENABLE_ANDROID +#include "minddata/dataset/kernels/data/concatenate_op.h" +#endif #include "minddata/dataset/kernels/data/duplicate_op.h" +#ifndef ENABLE_ANDROID +#include "minddata/dataset/kernels/data/mask_op.h" +#endif #include "minddata/dataset/kernels/data/one_hot_op.h" +#ifndef ENABLE_ANDROID +#include "minddata/dataset/kernels/data/pad_end_op.h" +#endif #include "minddata/dataset/kernels/data/random_apply_op.h" #include "minddata/dataset/kernels/data/random_choice_op.h" +#ifndef ENABLE_ANDROID +#include "minddata/dataset/kernels/data/slice_op.h" +#endif #include "minddata/dataset/kernels/data/type_cast_op.h" #ifndef ENABLE_ANDROID #include "minddata/dataset/kernels/data/unique_op.h" @@ -58,11 +70,55 @@ std::shared_ptr ComposeOperation::Build() { return std::make_shared(tensor_ops); } +#ifndef ENABLE_ANDROID +// ConcatenateOperation +ConcatenateOperation::ConcatenateOperation(int8_t axis, const std::shared_ptr &prepend, + const std::shared_ptr &append) + : axis_(axis), prepend_(prepend), append_(append) {} + +Status ConcatenateOperation::ValidateParams() { + if (axis_ != 0 && axis_ != -1) { + std::string err_msg = "Concatenate: Only 1D concatenation supported."; + MS_LOG(ERROR) << err_msg; + RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + if (prepend_) { + if (prepend_->shape().Size() != 1) { + std::string err_msg = "Concatenate: Can only prepend 1D arrays."; + MS_LOG(ERROR) << err_msg; + RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + } + if (append_) { + if (append_->shape().Size() != 1) { + std::string err_msg = "Concatenate: Can only append 1D arrays."; + MS_LOG(ERROR) << err_msg; + RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + } + return Status::OK(); +} + +std::shared_ptr ConcatenateOperation::Build() { + return std::make_shared(axis_, prepend_, append_); +} +#endif + // DuplicateOperation Status DuplicateOperation::ValidateParams() { return Status::OK(); } std::shared_ptr DuplicateOperation::Build() { return std::make_shared(); } +#ifndef ENABLE_ANDROID +// MaskOperation +MaskOperation::MaskOperation(RelationalOp op, const std::shared_ptr &constant, DataType dtype) + : op_(op), constant_(constant), dtype_(dtype) {} + +Status MaskOperation::ValidateParams() { return Status::OK(); } + +std::shared_ptr MaskOperation::Build() { return std::make_shared(op_, constant_, dtype_); } +#endif + // OneHotOperation OneHotOperation::OneHotOperation(int32_t num_classes) : num_classes_(num_classes) {} @@ -85,6 +141,16 @@ Status OneHotOperation::to_json(nlohmann::json *out_json) { return Status::OK(); } +#ifndef ENABLE_ANDROID +// PadEndOperation +PadEndOperation::PadEndOperation(const TensorShape &pad_shape, const std::shared_ptr &pad_value) + : pad_shape_(pad_shape), pad_value_(pad_value) {} + +Status PadEndOperation::ValidateParams() { return Status::OK(); } + +std::shared_ptr PadEndOperation::Build() { return std::make_shared(pad_shape_, pad_value_); } +#endif + // PreBuiltOperation PreBuiltOperation::PreBuiltOperation(std::shared_ptr tensor_op) : op_(tensor_op) { #ifdef ENABLE_PYTHON @@ -137,6 +203,15 @@ std::shared_ptr RandomChoiceOperation::Build() { return std::make_shared(tensor_ops); } +#ifndef ENABLE_ANDROID +// SliceOperation +SliceOperation::SliceOperation(const std::vector &slice_input) : slice_input_(slice_input) {} + +Status SliceOperation::ValidateParams() { return Status::OK(); } + +std::shared_ptr SliceOperation::Build() { return std::make_shared(slice_input_); } +#endif + // TypeCastOperation TypeCastOperation::TypeCastOperation(std::string data_type) : data_type_(data_type) {} diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h index ed2013438d1..d52d07e7b51 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h @@ -28,9 +28,13 @@ namespace mindspore { namespace dataset { // Char arrays storing name of corresponding classes (in alphabetical order) constexpr char kComposeOperation[] = "Compose"; +constexpr char kConcatenateOperation[] = "Concatenate"; constexpr char kDuplicateOperation[] = "Duplicate"; +constexpr char kMaskOperation[] = "Mask"; constexpr char kOneHotOperation[] = "OneHot"; +constexpr char kPadEndOperation[] = "PadEnd"; constexpr char kPreBuiltOperation[] = "PreBuilt"; +constexpr char kSliceOperation[] = "Slice"; constexpr char kRandomApplyOperation[] = "RandomApply"; constexpr char kRandomChoiceOperation[] = "RandomChoice"; constexpr char kTypeCastOperation[] = "TypeCast"; @@ -56,6 +60,25 @@ class ComposeOperation : public TensorOperation { std::vector> transforms_; }; +class ConcatenateOperation : public TensorOperation { + public: + explicit ConcatenateOperation(int8_t axis, const std::shared_ptr &prepend, + const std::shared_ptr &append); + + ~ConcatenateOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kConcatenateOperation; } + + private: + int8_t axis_; + std::shared_ptr prepend_; + std::shared_ptr append_; +}; + class DuplicateOperation : public TensorOperation { public: DuplicateOperation() = default; @@ -69,6 +92,24 @@ class DuplicateOperation : public TensorOperation { std::string Name() const override { return kDuplicateOperation; } }; +class MaskOperation : public TensorOperation { + public: + explicit MaskOperation(RelationalOp op, const std::shared_ptr &constant, DataType dtype); + + ~MaskOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kMaskOperation; } + + private: + RelationalOp op_; + std::shared_ptr constant_; + DataType dtype_; +}; + class OneHotOperation : public TensorOperation { public: explicit OneHotOperation(int32_t num_classes); @@ -87,6 +128,23 @@ class OneHotOperation : public TensorOperation { int32_t num_classes_; }; +class PadEndOperation : public TensorOperation { + public: + explicit PadEndOperation(const TensorShape &pad_shape, const std::shared_ptr &pad_value); + + ~PadEndOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kPadEndOperation; } + + private: + TensorShape pad_shape_; + std::shared_ptr pad_value_; +}; + class PreBuiltOperation : public TensorOperation { public: explicit PreBuiltOperation(std::shared_ptr tensor_op); @@ -137,6 +195,23 @@ class RandomChoiceOperation : public TensorOperation { private: std::vector> transforms_; }; + +class SliceOperation : public TensorOperation { + public: + explicit SliceOperation(const std::vector &slice_input); + + ~SliceOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kSliceOperation; } + + private: + std::vector slice_input_; +}; + class TypeCastOperation : public TensorOperation { public: explicit TypeCastOperation(std::string data_type); diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py index f775a16ca2d..8de8ef3b296 100644 --- a/mindspore/dataset/transforms/c_transforms.py +++ b/mindspore/dataset/transforms/c_transforms.py @@ -160,7 +160,7 @@ class _SliceOption(cde.SliceOption): super().__init__(slice_option) -class Slice(cde.SliceOp): +class Slice(): """ Slice operation to extract a tensor out using the given n slices. @@ -200,7 +200,10 @@ class Slice(cde.SliceOp): def __init__(self, *slices): slice_input_ = list(slices) slice_input_ = [_SliceOption(slice_dim) for slice_dim in slice_input_] - super().__init__(slice_input_) + self.slice_input_ = slice_input_ + + def parse(self): + return cde.SliceOperation(self.slice_input_) class Relational(IntEnum): @@ -220,7 +223,7 @@ DE_C_RELATIONAL = {Relational.EQ: cde.RelationalOp.EQ, Relational.LE: cde.RelationalOp.LE} -class Mask(cde.MaskOp): +class Mask(): """ Mask content of the input tensor with the given predicate. Any element of the tensor that matches the predicate will be evaluated to True, otherwise False. @@ -250,12 +253,15 @@ class Mask(cde.MaskOp): @check_mask_op def __init__(self, operator, constant, dtype=mstype.bool_): - dtype = mstype_to_detype(dtype) - constant = cde.Tensor(np.array(constant)) - super().__init__(DE_C_RELATIONAL[operator], constant, dtype) + self.operator = operator + self.dtype = mstype_to_detype(dtype) + self.constant = cde.Tensor(np.array(constant)) + + def parse(self): + return cde.MaskOperation(DE_C_RELATIONAL[self.operator], self.constant, self.dtype) -class PadEnd(cde.PadEndOp): +class PadEnd(): """ Pad input tensor according to pad_shape, need to have same rank. @@ -284,12 +290,14 @@ class PadEnd(cde.PadEndOp): @check_pad_end def __init__(self, pad_shape, pad_value=None): - if pad_value is not None: - pad_value = cde.Tensor(np.array(pad_value)) - super().__init__(cde.TensorShape(pad_shape), pad_value) + self.pad_shape = cde.TensorShape(pad_shape) + self.pad_value = cde.Tensor(np.array(pad_value)) if pad_value is not None else pad_value + + def parse(self): + return cde.PadEndOperation(self.pad_shape, self.pad_value) -class Concatenate(cde.ConcatenateOp): +class Concatenate(): """ Tensor operation that concatenates all columns into a single tensor. @@ -311,11 +319,12 @@ class Concatenate(cde.ConcatenateOp): @check_concat_type def __init__(self, axis=0, prepend=None, append=None): - if prepend is not None: - prepend = cde.Tensor(np.array(prepend)) - if append is not None: - append = cde.Tensor(np.array(append)) - super().__init__(axis, prepend, append) + self.axis = axis + self.prepend = cde.Tensor(np.array(prepend)) if prepend is not None else prepend + self.append = cde.Tensor(np.array(append)) if append is not None else append + + def parse(self): + return cde.ConcatenateOperation(self.axis, self.prepend, self.append) class Duplicate(TensorOperation): diff --git a/tests/ut/cpp/dataset/c_api_transforms_test.cc b/tests/ut/cpp/dataset/c_api_transforms_test.cc index d9d3fa0ecc8..5c45f684905 100644 --- a/tests/ut/cpp/dataset/c_api_transforms_test.cc +++ b/tests/ut/cpp/dataset/c_api_transforms_test.cc @@ -137,6 +137,70 @@ TEST_F(MindDataTestPipeline, TestComposeFail3) { EXPECT_EQ(iter, nullptr); } +TEST_F(MindDataTestPipeline, TestConcatenateSuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatenateSuccess."; + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1}); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create Concatenate op + std::vector prepend_vector = {1, 2}; + std::shared_ptr prepend_tensor; + ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); + mindspore::MSTensor prepend_MSTensor = + mindspore::MSTensor(std::make_shared(prepend_tensor)); + + std::vector append_vector = {3}; + std::shared_ptr append_tensor; + ASSERT_OK(Tensor::CreateFromVector(append_vector, &append_tensor)); + mindspore::MSTensor append_MSTensor = + mindspore::MSTensor(std::make_shared(append_tensor)); + + transforms::Concatenate concatenate = transforms::Concatenate(0, prepend_MSTensor, append_MSTensor); + + // Create a Map operation on ds + ds = ds->Map({concatenate}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + std::vector> expected = { + {1, 2, 31354, 3}, {1, 2, -5655, 3}, {1, 2, -17734, 3}, {1, 2, -17220, 3}}; + + // Check concatnate results + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + TEST_F(MindDataTestPipeline, TestDuplicateSuccess) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDuplicateSuccess."; @@ -177,6 +241,59 @@ TEST_F(MindDataTestPipeline, TestDuplicateSuccess) { iter->Stop(); } +TEST_F(MindDataTestPipeline, TestMaskSuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskSuccess."; + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {4}); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create Mask op + std::shared_ptr constant_tensor; + ASSERT_OK(Tensor::CreateScalar(0, &constant_tensor)); + mindspore::MSTensor constant_MSTensor = + mindspore::MSTensor(std::make_shared(constant_tensor)); + transforms::Mask mask = transforms::Mask(RelationalOp::kGreater, constant_MSTensor); + ds = ds->Map({mask}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + std::vector> expected = { + {true, true, true, true}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + TEST_F(MindDataTestPipeline, TestOneHotSuccess1) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestOneHotSuccess1."; // Testing CutMixBatch on a batch of CHW images @@ -330,6 +447,59 @@ TEST_F(MindDataTestPipeline, TestOneHotFail2) { EXPECT_EQ(iter, nullptr); } +TEST_F(MindDataTestPipeline, TestPadEndSuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPadEndSuccess."; + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1}); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create PadEnd op + std::shared_ptr pad_value; + ASSERT_OK(Tensor::CreateScalar(0, &pad_value)); + mindspore::MSTensor pad_value_MSTensor = + mindspore::MSTensor(std::make_shared(pad_value)); + + transforms::PadEnd pad_end = transforms::PadEnd({3}, pad_value_MSTensor); + ds = ds->Map({pad_end}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + std::vector> expected = {{31354, 0, 0}, {-5655, 0, 0}, {-17734, 0, 0}, {-17220, 0, 0}}; + + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + TEST_F(MindDataTestPipeline, TestRandomApplySuccess) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplySuccess."; @@ -565,6 +735,69 @@ TEST_F(MindDataTestPipeline, TestRandomChoiceFail3) { EXPECT_EQ(iter, nullptr); } +TEST_F(MindDataTestPipeline, TestSliceSuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSliceSuccess."; + + // Create a RandomDataset + u_int32_t curr_seed = GlobalContext::config_manager()->seed(); + GlobalContext::config_manager()->set_seed(246); + std::shared_ptr schema = Schema(); + schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1}); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + ds = ds->SetNumWorkers(2); + EXPECT_NE(ds, nullptr); + + // Create concatenate op + std::vector prepend_vector = {1, 2, 3}; + std::shared_ptr prepend_tensor; + ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor)); + mindspore::MSTensor prepend_MSTensor = + mindspore::MSTensor(std::make_shared(prepend_tensor)); + + transforms::Concatenate concatenate = transforms::Concatenate(0, prepend_MSTensor); + + // Create a Map operation on ds + ds = ds->Map({concatenate}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Apply Slice op on ds, get the first and third elements in each row. + SliceOption slice_option = SliceOption(Slice(0, 3, 2)); + transforms::Slice slice = transforms::Slice({slice_option}); + ds = ds->Map({slice}, {"col1"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map row; + iter->GetNextRow(&row); + + std::vector> expected = {{1, 3}, {1, 3}, {1, 3}, {1, 3}}; + + // Check slice results + uint64_t i = 0; + while (row.size() != 0) { + auto ind = row["col1"]; + std::shared_ptr de_expected_tensor; + ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor)); + mindspore::MSTensor expected_tensor = + mindspore::MSTensor(std::make_shared(de_expected_tensor)); + EXPECT_MSTENSOR_EQ(ind, expected_tensor); + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); + GlobalContext::config_manager()->set_seed(curr_seed); +} + TEST_F(MindDataTestPipeline, TestTypeCastSuccess) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTypeCastSuccess.";