!14579 TensorOp decoupling stage 3 (transform ops)

From: @alexyuyue
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2021-04-08 21:41:26 +08:00 committed by Gitee
commit bdc5a9c88b
11 changed files with 724 additions and 127 deletions

View File

@ -18,91 +18,22 @@
#include "pybind11/stl_bind.h"
#include "minddata/dataset/api/python/pybind_register.h"
#include "minddata/dataset/core/tensor_helpers.h"
#include "minddata/dataset/kernels/data/concatenate_op.h"
#include "minddata/dataset/kernels/data/fill_op.h"
#include "minddata/dataset/kernels/data/mask_op.h"
#include "minddata/dataset/kernels/data/pad_end_op.h"
#include "minddata/dataset/kernels/data/slice_op.h"
#include "minddata/dataset/kernels/data/to_float16_op.h"
namespace mindspore {
namespace dataset {
PYBIND_REGISTER(ConcatenateOp, 1, ([](const py::module *m) {
(void)py::class_<ConcatenateOp, TensorOp, std::shared_ptr<ConcatenateOp>>(*m, "ConcatenateOp")
.def(py::init<int8_t, std::shared_ptr<Tensor>, std::shared_ptr<Tensor>>());
}));
PYBIND_REGISTER(
FillOp, 1, ([](const py::module *m) {
(void)py::class_<FillOp, TensorOp, std::shared_ptr<FillOp>>(*m, "FillOp").def(py::init<std::shared_ptr<Tensor>>());
}));
PYBIND_REGISTER(MaskOp, 1, ([](const py::module *m) {
(void)py::class_<MaskOp, TensorOp, std::shared_ptr<MaskOp>>(*m, "MaskOp")
.def(py::init<RelationalOp, std::shared_ptr<Tensor>, DataType>());
}));
PYBIND_REGISTER(PadEndOp, 1, ([](const py::module *m) {
(void)py::class_<PadEndOp, TensorOp, std::shared_ptr<PadEndOp>>(*m, "PadEndOp")
.def(py::init<TensorShape, std::shared_ptr<Tensor>>());
}));
PYBIND_REGISTER(SliceOption, 0, ([](const py::module *m) {
(void)py::class_<SliceOption>(*m, "SliceOption")
.def(py::init([](const py::slice &py_slice) {
Slice c_slice;
if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none() &&
!py_slice.attr("step").is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice.attr("start")),
py::reinterpret_borrow<py::int_>(py_slice.attr("stop")),
py::reinterpret_borrow<py::int_>(py_slice.attr("step")));
} else if (py_slice.attr("start").is_none() && py_slice.attr("step").is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice.attr("stop")));
} else if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice.attr("start")),
py::reinterpret_borrow<py::int_>(py_slice.attr("stop")));
}
if (!c_slice.valid()) {
THROW_IF_ERROR(
Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Wrong slice object"));
}
return SliceOption(c_slice);
}))
.def(py::init([](const py::list &py_list) {
std::vector<dsize_t> indices;
for (auto l : py_list) {
indices.push_back(py::reinterpret_borrow<py::int_>(l));
}
return SliceOption(indices);
}))
.def(py::init<bool>())
.def(py::init<SliceOption>());
}));
PYBIND_REGISTER(SliceOp, 1, ([](const py::module *m) {
(void)py::class_<SliceOp, TensorOp, std::shared_ptr<SliceOp>>(*m, "SliceOp")
.def(py::init<std::vector<SliceOption>>());
}));
PYBIND_REGISTER(ToFloat16Op, 1, ([](const py::module *m) {
(void)py::class_<ToFloat16Op, TensorOp, std::shared_ptr<ToFloat16Op>>(*m, "ToFloat16Op",
py::dynamic_attr())
.def(py::init<>());
}));
PYBIND_REGISTER(RelationalOp, 0, ([](const py::module *m) {
(void)py::enum_<RelationalOp>(*m, "RelationalOp", py::arithmetic())
.value("EQ", RelationalOp::kEqual)
.value("NE", RelationalOp::kNotEqual)
.value("LT", RelationalOp::kLess)
.value("LE", RelationalOp::kLessEqual)
.value("GT", RelationalOp::kGreater)
.value("GE", RelationalOp::kGreaterEqual)
.export_values();
}));
} // namespace dataset
} // namespace mindspore

View File

@ -15,6 +15,7 @@
*/
#include "pybind11/pybind11.h"
#include "mindspore/ccsrc/minddata/dataset/include/transforms.h"
#include "minddata/dataset/api/python/pybind_register.h"
#include "minddata/dataset/core/global_context.h"
@ -64,6 +65,16 @@ PYBIND_REGISTER(
}));
}));
PYBIND_REGISTER(ConcatenateOperation, 1, ([](const py::module *m) {
(void)py::class_<transforms::ConcatenateOperation, TensorOperation,
std::shared_ptr<transforms::ConcatenateOperation>>(*m, "ConcatenateOperation")
.def(py::init([](int8_t axis, std::shared_ptr<Tensor> prepend, std::shared_ptr<Tensor> append) {
auto concatenate = std::make_shared<transforms::ConcatenateOperation>(axis, prepend, append);
THROW_IF_ERROR(concatenate->ValidateParams());
return concatenate;
}));
}));
PYBIND_REGISTER(
DuplicateOperation, 1, ([](const py::module *m) {
(void)py::class_<transforms::DuplicateOperation, TensorOperation, std::shared_ptr<transforms::DuplicateOperation>>(
@ -75,6 +86,17 @@ PYBIND_REGISTER(
}));
}));
PYBIND_REGISTER(MaskOperation, 1, ([](const py::module *m) {
(void)
py::class_<transforms::MaskOperation, TensorOperation, std::shared_ptr<transforms::MaskOperation>>(
*m, "MaskOperation")
.def(py::init([](RelationalOp op, std::shared_ptr<Tensor> constant, DataType dtype) {
auto mask = std::make_shared<transforms::MaskOperation>(op, constant, dtype);
THROW_IF_ERROR(mask->ValidateParams());
return mask;
}));
}));
PYBIND_REGISTER(
OneHotOperation, 1, ([](const py::module *m) {
(void)py::class_<transforms::OneHotOperation, TensorOperation, std::shared_ptr<transforms::OneHotOperation>>(
@ -86,6 +108,17 @@ PYBIND_REGISTER(
}));
}));
PYBIND_REGISTER(
PadEndOperation, 1, ([](const py::module *m) {
(void)py::class_<transforms::PadEndOperation, TensorOperation, std::shared_ptr<transforms::PadEndOperation>>(
*m, "PadEndOperation")
.def(py::init([](TensorShape pad_shape, std::shared_ptr<Tensor> pad_value) {
auto pad_end = std::make_shared<transforms::PadEndOperation>(pad_shape, pad_value);
THROW_IF_ERROR(pad_end->ValidateParams());
return pad_end;
}));
}));
PYBIND_REGISTER(RandomChoiceOperation, 1, ([](const py::module *m) {
(void)py::class_<transforms::RandomChoiceOperation, TensorOperation,
std::shared_ptr<transforms::RandomChoiceOperation>>(*m, "RandomChoiceOperation")
@ -110,6 +143,50 @@ PYBIND_REGISTER(RandomApplyOperation, 1, ([](const py::module *m) {
}));
}));
PYBIND_REGISTER(
SliceOperation, 1, ([](const py::module *m) {
(void)py::class_<transforms::SliceOperation, TensorOperation, std::shared_ptr<transforms::SliceOperation>>(
*m, "SliceOperation")
.def(py::init([](std::vector<SliceOption> slice_input) {
auto slice = std::make_shared<transforms::SliceOperation>(slice_input);
THROW_IF_ERROR(slice->ValidateParams());
return slice;
}));
}));
PYBIND_REGISTER(SliceOption, 0, ([](const py::module *m) {
(void)py::class_<SliceOption>(*m, "SliceOption")
.def(py::init([](const py::slice &py_slice) {
Slice c_slice;
if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none() &&
!py_slice.attr("step").is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice.attr("start")),
py::reinterpret_borrow<py::int_>(py_slice.attr("stop")),
py::reinterpret_borrow<py::int_>(py_slice.attr("step")));
} else if (py_slice.attr("start").is_none() && py_slice.attr("step").is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice.attr("stop")));
} else if (!py_slice.attr("start").is_none() && !py_slice.attr("stop").is_none()) {
c_slice = Slice(py::reinterpret_borrow<py::int_>(py_slice.attr("start")),
py::reinterpret_borrow<py::int_>(py_slice.attr("stop")));
}
if (!c_slice.valid()) {
THROW_IF_ERROR(
Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Wrong slice object"));
}
return SliceOption(c_slice);
}))
.def(py::init([](const py::list &py_list) {
std::vector<dsize_t> indices;
for (auto l : py_list) {
indices.push_back(py::reinterpret_borrow<py::int_>(l));
}
return SliceOption(indices);
}))
.def(py::init<bool>())
.def(py::init<SliceOption>());
}));
PYBIND_REGISTER(
TypeCastOperation, 1, ([](const py::module *m) {
(void)py::class_<transforms::TypeCastOperation, TensorOperation, std::shared_ptr<transforms::TypeCastOperation>>(
@ -132,5 +209,16 @@ PYBIND_REGISTER(
}));
}));
PYBIND_REGISTER(RelationalOp, 0, ([](const py::module *m) {
(void)py::enum_<RelationalOp>(*m, "RelationalOp", py::arithmetic())
.value("EQ", RelationalOp::kEqual)
.value("NE", RelationalOp::kNotEqual)
.value("LT", RelationalOp::kLess)
.value("LE", RelationalOp::kLessEqual)
.value("GT", RelationalOp::kGreater)
.value("GE", RelationalOp::kGreaterEqual)
.export_values();
}));
} // namespace dataset
} // namespace mindspore

View File

@ -18,6 +18,7 @@
#include <algorithm>
#include "mindspore/ccsrc/minddata/dataset/core/type_id.h"
#include "minddata/dataset/kernels/ir/data/transforms_ir.h"
namespace mindspore {
@ -56,11 +57,52 @@ Compose::Compose(const std::vector<std::reference_wrapper<TensorTransform>> &tra
std::shared_ptr<TensorOperation> Compose::Parse() { return std::make_shared<ComposeOperation>(data_->transforms_); }
#ifndef ENABLE_ANDROID
// Constructor to Concatenate
struct Concatenate::Data {
explicit Data(int8_t axis, MSTensor prepend, MSTensor append) : axis_(axis), prepend_(prepend), append_(append) {}
int8_t axis_;
MSTensor prepend_;
MSTensor append_;
};
Concatenate::Concatenate(int8_t axis, MSTensor prepend, MSTensor append)
: data_(std::make_shared<Data>(axis, prepend, append)) {}
std::shared_ptr<TensorOperation> Concatenate::Parse() {
std::shared_ptr<Tensor> out_prepend, out_append;
Tensor::CreateFromMSTensor(data_->prepend_, &out_prepend);
Tensor::CreateFromMSTensor(data_->append_, &out_append);
return std::make_shared<ConcatenateOperation>(data_->axis_, out_prepend, out_append);
}
#endif // not ENABLE_ANDROID
// Constructor to Duplicate
Duplicate::Duplicate() {}
std::shared_ptr<TensorOperation> Duplicate::Parse() { return std::make_shared<DuplicateOperation>(); }
#ifndef ENABLE_ANDROID
// Constructor to Mask
struct Mask::Data {
explicit Data(RelationalOp op, MSTensor constant, mindspore::DataType ms_type)
: op_(op), constant_(constant), ms_type_(ms_type) {}
RelationalOp op_;
MSTensor constant_;
mindspore::DataType ms_type_;
};
Mask::Mask(RelationalOp op, MSTensor constant, mindspore::DataType ms_type)
: data_(std::make_shared<Data>(op, constant, ms_type)) {}
std::shared_ptr<TensorOperation> Mask::Parse() {
std::shared_ptr<Tensor> out_constant;
Tensor::CreateFromMSTensor(data_->constant_, &out_constant);
DataType de_type = dataset::MSTypeToDEType(static_cast<TypeId>(data_->ms_type_));
return std::make_shared<MaskOperation>(data_->op_, out_constant, de_type);
}
#endif // not ENABLE_ANDROID
// Constructor to OneHot
struct OneHot::Data {
explicit Data(int32_t num_classes) : num_classes_(num_classes) {}
@ -71,6 +113,25 @@ OneHot::OneHot(int32_t num_classes) : data_(std::make_shared<Data>(num_classes))
std::shared_ptr<TensorOperation> OneHot::Parse() { return std::make_shared<OneHotOperation>(data_->num_classes_); }
#ifndef ENABLE_ANDROID
// Constructor to PadEnd
struct PadEnd::Data {
explicit Data(const std::vector<dsize_t> &pad_shape, MSTensor pad_value)
: pad_shape_(pad_shape), pad_value_(pad_value) {}
std::vector<dsize_t> pad_shape_;
MSTensor pad_value_;
};
PadEnd::PadEnd(const std::vector<dsize_t> &pad_shape, MSTensor pad_value)
: data_(std::make_shared<Data>(pad_shape, pad_value)) {}
std::shared_ptr<TensorOperation> PadEnd::Parse() {
std::shared_ptr<Tensor> pad_value;
Tensor::CreateFromMSTensor(data_->pad_value_, &pad_value);
return std::make_shared<PadEndOperation>(TensorShape(data_->pad_shape_), pad_value);
}
#endif // not ENABLE_ANDROID
// Constructor to RandomApply.
struct RandomApply::Data {
std::vector<std::shared_ptr<TensorOperation>> transforms_;
@ -136,6 +197,18 @@ std::shared_ptr<TensorOperation> RandomChoice::Parse() {
return std::make_shared<RandomChoiceOperation>(data_->transforms_);
}
#ifndef ENABLE_ANDROID
// Constructor to Slice
struct Slice::Data {
explicit Data(const std::vector<SliceOption> &slice_input) : slice_input_(slice_input) {}
std::vector<SliceOption> slice_input_;
};
Slice::Slice(const std::vector<SliceOption> &slice_input) : data_(std::make_shared<Data>(slice_input)) {}
std::shared_ptr<TensorOperation> Slice::Parse() { return std::make_shared<SliceOperation>(data_->slice_input_); }
#endif // not ENABLE_ANDROID
// Constructor to TypeCast
struct TypeCast::Data {
explicit Data(const std::vector<char> &data_type) : data_type_(CharToString(data_type)) {}

View File

@ -19,43 +19,11 @@
#include <memory>
#include <vector>
#include "mindspore/ccsrc/minddata/dataset/include/transforms.h"
#include "minddata/dataset/include/constants.h"
namespace mindspore {
namespace dataset {
class Slice {
public:
Slice() : start_(0), stop_(0), step_(0) {}
Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {}
Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {}
explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {}
Slice(Slice const &slice) = default;
~Slice() = default;
bool valid() const { return step_ != 0; }
dsize_t start_;
dsize_t stop_;
dsize_t step_;
};
class SliceOption {
public:
explicit SliceOption(bool all) : all_(all) {}
explicit SliceOption(std::vector<dsize_t> indices) : indices_(indices) {}
explicit SliceOption(Slice slice) : slice_(slice) {}
SliceOption(SliceOption const &slice) = default;
~SliceOption() = default;
// only one of the following will be valid
// given indices to slice the Tensor.
std::vector<dsize_t> indices_ = {};
// Slice object. All start, stop and step are 0 if invalid.
Slice slice_;
bool all_ = false;
};
/// Recursive helper function to generate indices based on vector of SliceOptions. It recursively iterates through each
/// range represented by slice_options to generate a list of indices to be sliced.
/// \param[out] matrix Generated nested vector of indices

View File

@ -71,6 +71,16 @@ enum class NormalizeForm {
kNfkd,
};
// Possible values for Mask
enum class RelationalOp {
kEqual = 0, // ==
kNotEqual, // !=
kLess, // <
kLessEqual, // <=
kGreater, // >
kGreaterEqual, // >=
};
// Possible values for SamplingStrategy
enum class SamplingStrategy { kRandom = 0, kEdgeWeight = 1 };

View File

@ -75,6 +75,54 @@ class TensorTransform : public std::enable_shared_from_this<TensorTransform> {
virtual std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) { return nullptr; }
};
/// \brief Slice object used in SliceOption.
class Slice {
public:
/// \brief Constructor, with start, stop and step default to 0.
Slice() : start_(0), stop_(0), step_(0) {}
/// \brief Constructor.
/// \param[in] start Starting integer specifying where to start the slicing.
/// \param[in] stop Ending integer specifying where to stop the slicing.
/// \param[in] step An integer specifying the step of the slicing.
Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {}
/// \brief Constructor, with step=1
/// \param[in] start Starting integer specifying where to start the slicing.
/// \param[in] stop Ending integer specifying where to stop the slicing.
Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {}
/// \brief Constructor, with start=0 and step=1
/// \param[in] stop Ending integer specifying where to stop the slicing.
explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {}
Slice(Slice const &slice) = default;
~Slice() = default;
bool valid() const { return step_ != 0; }
dsize_t start_;
dsize_t stop_;
dsize_t step_;
};
/// \brief SliceOption used in Slice Op.
class SliceOption {
public:
/// \param[in] all Slice the whole dimension
explicit SliceOption(bool all) : all_(all) {}
/// \param[in] indices Slice these indices along the dimension. Negative indices are supported.
explicit SliceOption(std::vector<dsize_t> indices) : indices_(indices) {}
/// \param[in] slice Slice the generated indices from the slice object along the dimension.
explicit SliceOption(Slice slice) : slice_(slice) {}
SliceOption(SliceOption const &slice) = default;
~SliceOption() = default;
// only one of the following will be valid
// given indices to slice the Tensor.
std::vector<dsize_t> indices_ = {};
// Slice object. All start, stop and step are 0 if invalid.
Slice slice_;
bool all_ = false;
};
// Transform operations for performing data transformation.
namespace transforms {
@ -105,6 +153,29 @@ class Compose final : public TensorTransform {
std::shared_ptr<Data> data_;
};
/// \brief Concatenate Op.
/// \notes Tensor operation that concatenates all columns into a single tensor.
class Concatenate final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] axis Concatenate the tensors along given axis (Default=0).
/// \param[in] prepend MSTensor to be prepended to the already concatenated tensors (Default={}).
/// \param[in] append MSTensor to be appended to the already concatenated tensors (Default={}).
explicit Concatenate(int8_t axis = 0, MSTensor prepend = {}, MSTensor append = {});
/// \brief Destructor
~Concatenate() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Duplicate Op.
/// \notes Duplicate the input tensor to a new output tensor.
/// The input tensor is carried over to the output list.
@ -122,6 +193,32 @@ class Duplicate final : public TensorTransform {
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief Mask Op.
/// \notes Mask content of the input tensor with the given predicate.
/// Any element of the tensor that matches the predicate will be evaluated to True, otherwise False.
class Mask final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] op One of the relational operators EQ, NE LT, GT, LE or GE.
/// \param[in] constant Constant to be compared to.
/// Can only be MSTensor of str, int, float, bool.
/// \param[in] de_type Type of the generated mask (Default to be mindspore::DataType::kNumberTypeBool).
explicit Mask(RelationalOp op, MSTensor constant,
mindspore::DataType ms_type = mindspore::DataType(mindspore::DataType::kNumberTypeBool));
/// \brief Destructor
~Mask() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief OneHot Op.
/// \notes Convert the labels into OneHot format.
class OneHot final : public TensorTransform {
@ -143,6 +240,30 @@ class OneHot final : public TensorTransform {
std::shared_ptr<Data> data_;
};
/// \brief PadEnd Op.
/// \notes Pad input tensor according to pad_shape, need to have same rank.
class PadEnd final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] pad_shape List of integers representing the shape needed.
/// Dimensions that set to `None` will not be padded (i.e., original dim will be used).
/// Shorter dimensions will truncate the values.
/// \param[in] pad_value Value used to pad. Default to be {}.
explicit PadEnd(const std::vector<dsize_t> &pad_shape, MSTensor pad_value = {});
/// \brief Destructor
~PadEnd() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomApply Op.
/// \notes Randomly perform a series of transforms with a given probability.
class RandomApply final : public TensorTransform {
@ -200,6 +321,29 @@ class RandomChoice final : public TensorTransform {
std::shared_ptr<Data> data_;
};
/// \brief Slice Op.
/// \notes Slice operation to extract a tensor out using the given n slices.
/// The functionality of Slice is similar to NumPy's indexing feature.
/// (Currently only rank-1 tensors are supported).
class Slice final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] slice_input Vector of SliceOption
explicit Slice(const std::vector<SliceOption> &slice_input);
/// \brief Destructor
~Slice() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief TypeCast Op.
/// \notes Tensor operation to cast to a given MindSpore data type.
class TypeCast final : public TensorTransform {

View File

@ -124,15 +124,6 @@ Status PadEndStringHelper(const std::shared_ptr<Tensor> &src, std::vector<std::s
const TensorShape &dst_shape, std::vector<dsize_t> cur_ind, size_t cur_dim,
const std::string &pad_value);
enum class RelationalOp {
kEqual = 0, // ==
kNotEqual, // !=
kLess, // <
kLessEqual, // <=
kGreater, // >
kGreaterEqual, // >=
};
/// Helper method that masks the input tensor
/// @tparam T type of the tensor
/// @param input[in] input tensor

View File

@ -20,10 +20,22 @@
// Kernel data headers (in alphabetical order)
#include "minddata/dataset/kernels/data/compose_op.h"
#ifndef ENABLE_ANDROID
#include "minddata/dataset/kernels/data/concatenate_op.h"
#endif
#include "minddata/dataset/kernels/data/duplicate_op.h"
#ifndef ENABLE_ANDROID
#include "minddata/dataset/kernels/data/mask_op.h"
#endif
#include "minddata/dataset/kernels/data/one_hot_op.h"
#ifndef ENABLE_ANDROID
#include "minddata/dataset/kernels/data/pad_end_op.h"
#endif
#include "minddata/dataset/kernels/data/random_apply_op.h"
#include "minddata/dataset/kernels/data/random_choice_op.h"
#ifndef ENABLE_ANDROID
#include "minddata/dataset/kernels/data/slice_op.h"
#endif
#include "minddata/dataset/kernels/data/type_cast_op.h"
#ifndef ENABLE_ANDROID
#include "minddata/dataset/kernels/data/unique_op.h"
@ -58,11 +70,55 @@ std::shared_ptr<TensorOp> ComposeOperation::Build() {
return std::make_shared<ComposeOp>(tensor_ops);
}
#ifndef ENABLE_ANDROID
// ConcatenateOperation
ConcatenateOperation::ConcatenateOperation(int8_t axis, const std::shared_ptr<Tensor> &prepend,
const std::shared_ptr<Tensor> &append)
: axis_(axis), prepend_(prepend), append_(append) {}
Status ConcatenateOperation::ValidateParams() {
if (axis_ != 0 && axis_ != -1) {
std::string err_msg = "Concatenate: Only 1D concatenation supported.";
MS_LOG(ERROR) << err_msg;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
if (prepend_) {
if (prepend_->shape().Size() != 1) {
std::string err_msg = "Concatenate: Can only prepend 1D arrays.";
MS_LOG(ERROR) << err_msg;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
}
if (append_) {
if (append_->shape().Size() != 1) {
std::string err_msg = "Concatenate: Can only append 1D arrays.";
MS_LOG(ERROR) << err_msg;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
}
return Status::OK();
}
std::shared_ptr<TensorOp> ConcatenateOperation::Build() {
return std::make_shared<ConcatenateOp>(axis_, prepend_, append_);
}
#endif
// DuplicateOperation
Status DuplicateOperation::ValidateParams() { return Status::OK(); }
std::shared_ptr<TensorOp> DuplicateOperation::Build() { return std::make_shared<DuplicateOp>(); }
#ifndef ENABLE_ANDROID
// MaskOperation
MaskOperation::MaskOperation(RelationalOp op, const std::shared_ptr<Tensor> &constant, DataType dtype)
: op_(op), constant_(constant), dtype_(dtype) {}
Status MaskOperation::ValidateParams() { return Status::OK(); }
std::shared_ptr<TensorOp> MaskOperation::Build() { return std::make_shared<MaskOp>(op_, constant_, dtype_); }
#endif
// OneHotOperation
OneHotOperation::OneHotOperation(int32_t num_classes) : num_classes_(num_classes) {}
@ -85,6 +141,16 @@ Status OneHotOperation::to_json(nlohmann::json *out_json) {
return Status::OK();
}
#ifndef ENABLE_ANDROID
// PadEndOperation
PadEndOperation::PadEndOperation(const TensorShape &pad_shape, const std::shared_ptr<Tensor> &pad_value)
: pad_shape_(pad_shape), pad_value_(pad_value) {}
Status PadEndOperation::ValidateParams() { return Status::OK(); }
std::shared_ptr<TensorOp> PadEndOperation::Build() { return std::make_shared<PadEndOp>(pad_shape_, pad_value_); }
#endif
// PreBuiltOperation
PreBuiltOperation::PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op) : op_(tensor_op) {
#ifdef ENABLE_PYTHON
@ -137,6 +203,15 @@ std::shared_ptr<TensorOp> RandomChoiceOperation::Build() {
return std::make_shared<RandomChoiceOp>(tensor_ops);
}
#ifndef ENABLE_ANDROID
// SliceOperation
SliceOperation::SliceOperation(const std::vector<SliceOption> &slice_input) : slice_input_(slice_input) {}
Status SliceOperation::ValidateParams() { return Status::OK(); }
std::shared_ptr<TensorOp> SliceOperation::Build() { return std::make_shared<SliceOp>(slice_input_); }
#endif
// TypeCastOperation
TypeCastOperation::TypeCastOperation(std::string data_type) : data_type_(data_type) {}

View File

@ -28,9 +28,13 @@ namespace mindspore {
namespace dataset {
// Char arrays storing name of corresponding classes (in alphabetical order)
constexpr char kComposeOperation[] = "Compose";
constexpr char kConcatenateOperation[] = "Concatenate";
constexpr char kDuplicateOperation[] = "Duplicate";
constexpr char kMaskOperation[] = "Mask";
constexpr char kOneHotOperation[] = "OneHot";
constexpr char kPadEndOperation[] = "PadEnd";
constexpr char kPreBuiltOperation[] = "PreBuilt";
constexpr char kSliceOperation[] = "Slice";
constexpr char kRandomApplyOperation[] = "RandomApply";
constexpr char kRandomChoiceOperation[] = "RandomChoice";
constexpr char kTypeCastOperation[] = "TypeCast";
@ -56,6 +60,25 @@ class ComposeOperation : public TensorOperation {
std::vector<std::shared_ptr<TensorOperation>> transforms_;
};
class ConcatenateOperation : public TensorOperation {
public:
explicit ConcatenateOperation(int8_t axis, const std::shared_ptr<Tensor> &prepend,
const std::shared_ptr<Tensor> &append);
~ConcatenateOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kConcatenateOperation; }
private:
int8_t axis_;
std::shared_ptr<Tensor> prepend_;
std::shared_ptr<Tensor> append_;
};
class DuplicateOperation : public TensorOperation {
public:
DuplicateOperation() = default;
@ -69,6 +92,24 @@ class DuplicateOperation : public TensorOperation {
std::string Name() const override { return kDuplicateOperation; }
};
class MaskOperation : public TensorOperation {
public:
explicit MaskOperation(RelationalOp op, const std::shared_ptr<Tensor> &constant, DataType dtype);
~MaskOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kMaskOperation; }
private:
RelationalOp op_;
std::shared_ptr<Tensor> constant_;
DataType dtype_;
};
class OneHotOperation : public TensorOperation {
public:
explicit OneHotOperation(int32_t num_classes);
@ -87,6 +128,23 @@ class OneHotOperation : public TensorOperation {
int32_t num_classes_;
};
class PadEndOperation : public TensorOperation {
public:
explicit PadEndOperation(const TensorShape &pad_shape, const std::shared_ptr<Tensor> &pad_value);
~PadEndOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kPadEndOperation; }
private:
TensorShape pad_shape_;
std::shared_ptr<Tensor> pad_value_;
};
class PreBuiltOperation : public TensorOperation {
public:
explicit PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op);
@ -137,6 +195,23 @@ class RandomChoiceOperation : public TensorOperation {
private:
std::vector<std::shared_ptr<TensorOperation>> transforms_;
};
class SliceOperation : public TensorOperation {
public:
explicit SliceOperation(const std::vector<SliceOption> &slice_input);
~SliceOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kSliceOperation; }
private:
std::vector<SliceOption> slice_input_;
};
class TypeCastOperation : public TensorOperation {
public:
explicit TypeCastOperation(std::string data_type);

View File

@ -160,7 +160,7 @@ class _SliceOption(cde.SliceOption):
super().__init__(slice_option)
class Slice(cde.SliceOp):
class Slice():
"""
Slice operation to extract a tensor out using the given n slices.
@ -200,7 +200,10 @@ class Slice(cde.SliceOp):
def __init__(self, *slices):
slice_input_ = list(slices)
slice_input_ = [_SliceOption(slice_dim) for slice_dim in slice_input_]
super().__init__(slice_input_)
self.slice_input_ = slice_input_
def parse(self):
return cde.SliceOperation(self.slice_input_)
class Relational(IntEnum):
@ -220,7 +223,7 @@ DE_C_RELATIONAL = {Relational.EQ: cde.RelationalOp.EQ,
Relational.LE: cde.RelationalOp.LE}
class Mask(cde.MaskOp):
class Mask():
"""
Mask content of the input tensor with the given predicate.
Any element of the tensor that matches the predicate will be evaluated to True, otherwise False.
@ -250,12 +253,15 @@ class Mask(cde.MaskOp):
@check_mask_op
def __init__(self, operator, constant, dtype=mstype.bool_):
dtype = mstype_to_detype(dtype)
constant = cde.Tensor(np.array(constant))
super().__init__(DE_C_RELATIONAL[operator], constant, dtype)
self.operator = operator
self.dtype = mstype_to_detype(dtype)
self.constant = cde.Tensor(np.array(constant))
def parse(self):
return cde.MaskOperation(DE_C_RELATIONAL[self.operator], self.constant, self.dtype)
class PadEnd(cde.PadEndOp):
class PadEnd():
"""
Pad input tensor according to pad_shape, need to have same rank.
@ -284,12 +290,14 @@ class PadEnd(cde.PadEndOp):
@check_pad_end
def __init__(self, pad_shape, pad_value=None):
if pad_value is not None:
pad_value = cde.Tensor(np.array(pad_value))
super().__init__(cde.TensorShape(pad_shape), pad_value)
self.pad_shape = cde.TensorShape(pad_shape)
self.pad_value = cde.Tensor(np.array(pad_value)) if pad_value is not None else pad_value
def parse(self):
return cde.PadEndOperation(self.pad_shape, self.pad_value)
class Concatenate(cde.ConcatenateOp):
class Concatenate():
"""
Tensor operation that concatenates all columns into a single tensor.
@ -311,11 +319,12 @@ class Concatenate(cde.ConcatenateOp):
@check_concat_type
def __init__(self, axis=0, prepend=None, append=None):
if prepend is not None:
prepend = cde.Tensor(np.array(prepend))
if append is not None:
append = cde.Tensor(np.array(append))
super().__init__(axis, prepend, append)
self.axis = axis
self.prepend = cde.Tensor(np.array(prepend)) if prepend is not None else prepend
self.append = cde.Tensor(np.array(append)) if append is not None else append
def parse(self):
return cde.ConcatenateOperation(self.axis, self.prepend, self.append)
class Duplicate(TensorOperation):

View File

@ -137,6 +137,70 @@ TEST_F(MindDataTestPipeline, TestComposeFail3) {
EXPECT_EQ(iter, nullptr);
}
TEST_F(MindDataTestPipeline, TestConcatenateSuccess) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatenateSuccess.";
// Create a RandomDataset
u_int32_t curr_seed = GlobalContext::config_manager()->seed();
GlobalContext::config_manager()->set_seed(246);
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1});
std::shared_ptr<Dataset> ds = RandomData(4, schema);
EXPECT_NE(ds, nullptr);
ds = ds->SetNumWorkers(2);
EXPECT_NE(ds, nullptr);
// Create Concatenate op
std::vector<std::int16_t> prepend_vector = {1, 2};
std::shared_ptr<Tensor> prepend_tensor;
ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor));
mindspore::MSTensor prepend_MSTensor =
mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(prepend_tensor));
std::vector<std::int16_t> append_vector = {3};
std::shared_ptr<Tensor> append_tensor;
ASSERT_OK(Tensor::CreateFromVector(append_vector, &append_tensor));
mindspore::MSTensor append_MSTensor =
mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(append_tensor));
transforms::Concatenate concatenate = transforms::Concatenate(0, prepend_MSTensor, append_MSTensor);
// Create a Map operation on ds
ds = ds->Map({concatenate}, {"col1"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
iter->GetNextRow(&row);
std::vector<std::vector<std::int16_t>> expected = {
{1, 2, 31354, 3}, {1, 2, -5655, 3}, {1, 2, -17734, 3}, {1, 2, -17220, 3}};
// Check concatnate results
uint64_t i = 0;
while (row.size() != 0) {
auto ind = row["col1"];
std::shared_ptr<Tensor> de_expected_tensor;
ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor));
mindspore::MSTensor expected_tensor =
mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_tensor));
EXPECT_MSTENSOR_EQ(ind, expected_tensor);
iter->GetNextRow(&row);
i++;
}
EXPECT_EQ(i, 4);
// Manually terminate the pipeline
iter->Stop();
GlobalContext::config_manager()->set_seed(curr_seed);
}
TEST_F(MindDataTestPipeline, TestDuplicateSuccess) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDuplicateSuccess.";
@ -177,6 +241,59 @@ TEST_F(MindDataTestPipeline, TestDuplicateSuccess) {
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestMaskSuccess) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMaskSuccess.";
// Create a RandomDataset
u_int32_t curr_seed = GlobalContext::config_manager()->seed();
GlobalContext::config_manager()->set_seed(246);
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {4});
std::shared_ptr<Dataset> ds = RandomData(4, schema);
EXPECT_NE(ds, nullptr);
ds = ds->SetNumWorkers(2);
EXPECT_NE(ds, nullptr);
// Create Mask op
std::shared_ptr<Tensor> constant_tensor;
ASSERT_OK(Tensor::CreateScalar(0, &constant_tensor));
mindspore::MSTensor constant_MSTensor =
mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(constant_tensor));
transforms::Mask mask = transforms::Mask(RelationalOp::kGreater, constant_MSTensor);
ds = ds->Map({mask}, {"col1"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
iter->GetNextRow(&row);
std::vector<std::vector<bool>> expected = {
{true, true, true, true}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}};
uint64_t i = 0;
while (row.size() != 0) {
auto ind = row["col1"];
std::shared_ptr<Tensor> de_expected_tensor;
ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor));
mindspore::MSTensor expected_tensor =
mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_tensor));
EXPECT_MSTENSOR_EQ(ind, expected_tensor);
iter->GetNextRow(&row);
i++;
}
EXPECT_EQ(i, 4);
// Manually terminate the pipeline
iter->Stop();
GlobalContext::config_manager()->set_seed(curr_seed);
}
TEST_F(MindDataTestPipeline, TestOneHotSuccess1) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestOneHotSuccess1.";
// Testing CutMixBatch on a batch of CHW images
@ -330,6 +447,59 @@ TEST_F(MindDataTestPipeline, TestOneHotFail2) {
EXPECT_EQ(iter, nullptr);
}
TEST_F(MindDataTestPipeline, TestPadEndSuccess) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPadEndSuccess.";
// Create a RandomDataset
u_int32_t curr_seed = GlobalContext::config_manager()->seed();
GlobalContext::config_manager()->set_seed(246);
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1});
std::shared_ptr<Dataset> ds = RandomData(4, schema);
EXPECT_NE(ds, nullptr);
ds = ds->SetNumWorkers(2);
EXPECT_NE(ds, nullptr);
// Create PadEnd op
std::shared_ptr<Tensor> pad_value;
ASSERT_OK(Tensor::CreateScalar(0, &pad_value));
mindspore::MSTensor pad_value_MSTensor =
mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(pad_value));
transforms::PadEnd pad_end = transforms::PadEnd({3}, pad_value_MSTensor);
ds = ds->Map({pad_end}, {"col1"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
iter->GetNextRow(&row);
std::vector<std::vector<std::int16_t>> expected = {{31354, 0, 0}, {-5655, 0, 0}, {-17734, 0, 0}, {-17220, 0, 0}};
uint64_t i = 0;
while (row.size() != 0) {
auto ind = row["col1"];
std::shared_ptr<Tensor> de_expected_tensor;
ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor));
mindspore::MSTensor expected_tensor =
mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_tensor));
EXPECT_MSTENSOR_EQ(ind, expected_tensor);
iter->GetNextRow(&row);
i++;
}
EXPECT_EQ(i, 4);
// Manually terminate the pipeline
iter->Stop();
GlobalContext::config_manager()->set_seed(curr_seed);
}
TEST_F(MindDataTestPipeline, TestRandomApplySuccess) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomApplySuccess.";
@ -565,6 +735,69 @@ TEST_F(MindDataTestPipeline, TestRandomChoiceFail3) {
EXPECT_EQ(iter, nullptr);
}
TEST_F(MindDataTestPipeline, TestSliceSuccess) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSliceSuccess.";
// Create a RandomDataset
u_int32_t curr_seed = GlobalContext::config_manager()->seed();
GlobalContext::config_manager()->set_seed(246);
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {1});
std::shared_ptr<Dataset> ds = RandomData(4, schema);
EXPECT_NE(ds, nullptr);
ds = ds->SetNumWorkers(2);
EXPECT_NE(ds, nullptr);
// Create concatenate op
std::vector<std::int16_t> prepend_vector = {1, 2, 3};
std::shared_ptr<Tensor> prepend_tensor;
ASSERT_OK(Tensor::CreateFromVector(prepend_vector, &prepend_tensor));
mindspore::MSTensor prepend_MSTensor =
mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(prepend_tensor));
transforms::Concatenate concatenate = transforms::Concatenate(0, prepend_MSTensor);
// Create a Map operation on ds
ds = ds->Map({concatenate}, {"col1"});
EXPECT_NE(ds, nullptr);
// Apply Slice op on ds, get the first and third elements in each row.
SliceOption slice_option = SliceOption(Slice(0, 3, 2));
transforms::Slice slice = transforms::Slice({slice_option});
ds = ds->Map({slice}, {"col1"});
EXPECT_NE(ds, nullptr);
// Create an iterator over the result of the above dataset
// This will trigger the creation of the Execution Tree and launch it.
std::shared_ptr<Iterator> iter = ds->CreateIterator();
EXPECT_NE(iter, nullptr);
// Iterate the dataset and get each row
std::unordered_map<std::string, mindspore::MSTensor> row;
iter->GetNextRow(&row);
std::vector<std::vector<std::int16_t>> expected = {{1, 3}, {1, 3}, {1, 3}, {1, 3}};
// Check slice results
uint64_t i = 0;
while (row.size() != 0) {
auto ind = row["col1"];
std::shared_ptr<Tensor> de_expected_tensor;
ASSERT_OK(Tensor::CreateFromVector(expected[i], &de_expected_tensor));
mindspore::MSTensor expected_tensor =
mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_tensor));
EXPECT_MSTENSOR_EQ(ind, expected_tensor);
iter->GetNextRow(&row);
i++;
}
EXPECT_EQ(i, 4);
// Manually terminate the pipeline
iter->Stop();
GlobalContext::config_manager()->set_seed(curr_seed);
}
TEST_F(MindDataTestPipeline, TestTypeCastSuccess) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTypeCastSuccess.";