diff --git a/mindspore/ccsrc/minddata/dataset/api/audio.cc b/mindspore/ccsrc/minddata/dataset/api/audio.cc index aa0f33d0fdc..42a94f712f0 100644 --- a/mindspore/ccsrc/minddata/dataset/api/audio.cc +++ b/mindspore/ccsrc/minddata/dataset/api/audio.cc @@ -149,7 +149,7 @@ std::shared_ptr ComplexNorm::Parse() { return std::make_shared< // FrequencyMasking Transform Operation. struct FrequencyMasking::Data { - Data(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, double mask_value) + Data(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value) : iid_masks_(iid_masks), frequency_mask_param_(frequency_mask_param), mask_start_(mask_start), @@ -157,10 +157,10 @@ struct FrequencyMasking::Data { int32_t frequency_mask_param_; int32_t mask_start_; bool iid_masks_; - double mask_value_; + float mask_value_; }; -FrequencyMasking::FrequencyMasking(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, double mask_value) +FrequencyMasking::FrequencyMasking(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value) : data_(std::make_shared(iid_masks, frequency_mask_param, mask_start, mask_value)) {} std::shared_ptr FrequencyMasking::Parse() { @@ -170,15 +170,15 @@ std::shared_ptr FrequencyMasking::Parse() { // TimeMasking Transform Operation. struct TimeMasking::Data { - Data(bool iid_masks, int64_t time_mask_param, int64_t mask_start, double mask_value) + Data(bool iid_masks, int32_t time_mask_param, int32_t mask_start, float mask_value) : iid_masks_(iid_masks), time_mask_param_(time_mask_param), mask_start_(mask_start), mask_value_(mask_value) {} - int64_t time_mask_param_; - int64_t mask_start_; + int32_t time_mask_param_; + int32_t mask_start_; bool iid_masks_; - double mask_value_; + float mask_value_; }; -TimeMasking::TimeMasking(bool iid_masks, int64_t time_mask_param, int64_t mask_start, double mask_value) +TimeMasking::TimeMasking(bool iid_masks, int32_t time_mask_param, int32_t mask_start, float mask_value) : data_(std::make_shared(iid_masks, time_mask_param, mask_start, mask_value)) {} std::shared_ptr TimeMasking::Parse() { diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc index 4564426ab74..af6b179ade3 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc @@ -134,7 +134,7 @@ PYBIND_REGISTER( (void) py::class_>( *m, "FrequencyMaskingOperation") - .def(py::init([](bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, double mask_value) { + .def(py::init([](bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value) { auto frequency_masking = std::make_shared(iid_masks, frequency_mask_param, mask_start, mask_value); THROW_IF_ERROR(frequency_masking->ValidateParams()); @@ -146,7 +146,7 @@ PYBIND_REGISTER( TimeMaskingOperation, 1, ([](const py::module *m) { (void)py::class_>( *m, "TimeMaskingOperation") - .def(py::init([](bool iid_masks, int64_t time_mask_param, int64_t mask_start, double mask_value) { + .def(py::init([](bool iid_masks, int32_t time_mask_param, int32_t mask_start, float mask_value) { auto time_masking = std::make_shared(iid_masks, time_mask_param, mask_start, mask_value); THROW_IF_ERROR(time_masking->ValidateParams()); diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/frequency_masking_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/frequency_masking_ir.cc index 2ee3a8a8123..a1d0b57179d 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/frequency_masking_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/frequency_masking_ir.cc @@ -24,7 +24,7 @@ namespace dataset { namespace audio { FrequencyMaskingOperation::FrequencyMaskingOperation(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, - double mask_value) + float mask_value) : iid_masks_(iid_masks), frequency_mask_param_(frequency_mask_param), mask_start_(mask_start), diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/frequency_masking_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/frequency_masking_ir.h index 85a23ff851f..6f710a513b5 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/frequency_masking_ir.h +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/frequency_masking_ir.h @@ -31,7 +31,7 @@ constexpr char kFrequencyMaskingOperation[] = "FrequencyMasking"; class FrequencyMaskingOperation : public TensorOperation { public: - FrequencyMaskingOperation(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, double mask_value); + FrequencyMaskingOperation(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, float mask_value); ~FrequencyMaskingOperation(); @@ -47,7 +47,7 @@ class FrequencyMaskingOperation : public TensorOperation { int32_t frequency_mask_param_; int32_t mask_start_; bool iid_masks_; - double mask_value_; + float mask_value_; }; // class FrequencyMaskingOperation } // namespace audio diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_masking_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_masking_ir.cc index 0bdffac2590..bfc092db0c7 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_masking_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_masking_ir.cc @@ -23,8 +23,8 @@ namespace mindspore { namespace dataset { namespace audio { -TimeMaskingOperation::TimeMaskingOperation(bool iid_masks, int64_t time_mask_param, int64_t mask_start, - double mask_value) +TimeMaskingOperation::TimeMaskingOperation(bool iid_masks, int32_t time_mask_param, int32_t mask_start, + float mask_value) : iid_masks_(iid_masks), time_mask_param_(time_mask_param), mask_start_(mask_start), mask_value_(mask_value) {} TimeMaskingOperation::~TimeMaskingOperation() = default; diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_masking_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_masking_ir.h index 183ea642ed0..c8a920f3feb 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_masking_ir.h +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_masking_ir.h @@ -31,7 +31,7 @@ constexpr char kTimeMaskingOperation[] = "TimeMasking"; class TimeMaskingOperation : public TensorOperation { public: - TimeMaskingOperation(bool iid_masks, int64_t time_mask_param, int64_t mask_start, double mask_value); + TimeMaskingOperation(bool iid_masks, int32_t time_mask_param, int32_t mask_start, float mask_value); ~TimeMaskingOperation(); @@ -44,10 +44,10 @@ class TimeMaskingOperation : public TensorOperation { Status to_json(nlohmann::json *out_json) override; private: - int64_t time_mask_param_; - int64_t mask_start_; + int32_t time_mask_param_; + int32_t mask_start_; bool iid_masks_; - double mask_value_; + float mask_value_; }; // class TimeMaskingOperation } // namespace audio diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc index d225eabd48b..45247d90f50 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc @@ -346,20 +346,20 @@ Status TimeStretch(std::shared_ptr input, std::shared_ptr *outpu return Status::OK(); } -Status RandomMaskAlongAxis(const std::shared_ptr &input, std::shared_ptr *output, int64_t mask_param, - double mask_value, int axis, std::mt19937 rnd) { - std::uniform_int_distribution mask_width_value(0, mask_param); +Status RandomMaskAlongAxis(const std::shared_ptr &input, std::shared_ptr *output, int32_t mask_param, + float mask_value, int axis, std::mt19937 rnd) { + std::uniform_int_distribution mask_width_value(0, mask_param); TensorShape input_shape = input->shape(); - int64_t mask_dim_size = axis == 1 ? input_shape[-2] : input_shape[-1]; - int64_t mask_width = mask_width_value(rnd); - std::uniform_int_distribution min_freq_value(0, mask_dim_size - mask_width); - int64_t mask_start = min_freq_value(rnd); + int32_t mask_dim_size = axis == 1 ? input_shape[-2] : input_shape[-1]; + int32_t mask_width = mask_width_value(rnd); + std::uniform_int_distribution min_freq_value(0, mask_dim_size - mask_width); + int32_t mask_start = min_freq_value(rnd); return MaskAlongAxis(input, output, mask_width, mask_start, mask_value, axis); } -Status MaskAlongAxis(const std::shared_ptr &input, std::shared_ptr *output, int64_t mask_width, - int64_t mask_start, double mask_value, int axis) { +Status MaskAlongAxis(const std::shared_ptr &input, std::shared_ptr *output, int32_t mask_width, + int32_t mask_start, float mask_value, int32_t axis) { if (axis != 2 && axis != 1) { RETURN_STATUS_UNEXPECTED("MaskAlongAxis: only support Time and Frequency masking, axis should be 1 or 2."); } @@ -374,7 +374,7 @@ Status MaskAlongAxis(const std::shared_ptr &input, std::shared_ptrtype().SizeInBytes(); + int32_t cell_size = input->type().SizeInBytes(); if (axis == 1) { // freq diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h index 932e7e03dc2..2b8f6e21e5f 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h @@ -231,8 +231,8 @@ Status TimeStretch(std::shared_ptr input, std::shared_ptr *outpu /// \param axis: Axis to apply masking on (1 -> frequency, 2 -> time). /// \param rnd: Number generator. /// \return Status code. -Status RandomMaskAlongAxis(const std::shared_ptr &input, std::shared_ptr *output, int64_t mask_param, - double mask_value, int axis, std::mt19937 rnd); +Status RandomMaskAlongAxis(const std::shared_ptr &input, std::shared_ptr *output, int32_t mask_param, + float mask_value, int axis, std::mt19937 rnd); /// \brief Apply a mask along axis. All examples will have the same mask interval. /// \param input: Tensor of shape <..., freq, time>. @@ -243,8 +243,8 @@ Status RandomMaskAlongAxis(const std::shared_ptr &input, std::shared_ptr /// \param mask_value: Value to assign to the masked columns. /// \param axis: Axis to apply masking on (1 -> frequency, 2 -> time). /// \return Status code. -Status MaskAlongAxis(const std::shared_ptr &input, std::shared_ptr *output, int64_t mask_width, - int64_t mask_start, double mask_value, int axis); +Status MaskAlongAxis(const std::shared_ptr &input, std::shared_ptr *output, int32_t mask_width, + int32_t mask_start, float mask_value, int32_t axis); /// \brief Compute the norm of complex tensor input. /// \param power Power of the norm description (optional). diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/frequency_masking_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/frequency_masking_op.cc index 56bbfe09dfa..b1b5bbed06a 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/frequency_masking_op.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/frequency_masking_op.cc @@ -25,7 +25,7 @@ namespace dataset { // constructor FrequencyMaskingOp::FrequencyMaskingOp(bool iid_masks, int32_t frequency_mask_param, int32_t mask_start, - double mask_value) + float mask_value) : frequency_mask_param_(frequency_mask_param), mask_start_(mask_start), iid_masks_(iid_masks), @@ -42,7 +42,7 @@ Status FrequencyMaskingOp::Compute(const std::shared_ptr &input, std::sh TensorShape input_shape = input->shape(); CHECK_FAIL_RETURN_UNEXPECTED( input_shape[-2] >= frequency_mask_param_, - "FrequencyMasking: frequency_mask_param should be less than the length of frequency dimension."); + "FrequencyMasking: frequency_mask_param should be less than or equal to the length of frequency dimension."); std::shared_ptr input_tensor; // typecast @@ -53,13 +53,11 @@ Status FrequencyMaskingOp::Compute(const std::shared_ptr &input, std::sh } else { input_tensor = input; } - auto mask_val = - input->type() != DataType::DE_FLOAT64 ? static_cast(mask_value_) : static_cast(mask_value_); // iid_masks - whether to apply different masks to each example/channel. if (iid_masks_ == false) { - return MaskAlongAxis(input_tensor, output, frequency_mask_param_, mask_start_, mask_val, 1); + return MaskAlongAxis(input_tensor, output, frequency_mask_param_, mask_start_, mask_value_, 1); } else { - return RandomMaskAlongAxis(input_tensor, output, frequency_mask_param_, mask_val, 1, rnd_); + return RandomMaskAlongAxis(input_tensor, output, frequency_mask_param_, mask_value_, 1, rnd_); } } } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/frequency_masking_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/frequency_masking_op.h index 2da79d5261e..6bc48b6c51b 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/frequency_masking_op.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/frequency_masking_op.h @@ -31,7 +31,7 @@ namespace dataset { class FrequencyMaskingOp : public TensorOp { public: explicit FrequencyMaskingOp(bool iid_masks = false, int32_t frequency_mask_param = 0, int32_t mask_start = 0, - double mask_value_ = 0.0); + float mask_value_ = 0.0); ~FrequencyMaskingOp() override = default; @@ -43,7 +43,7 @@ class FrequencyMaskingOp : public TensorOp { bool iid_masks_; int32_t frequency_mask_param_; int32_t mask_start_; - double mask_value_; + float mask_value_; std::mt19937 rnd_; }; } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_masking_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_masking_op.cc index 0bf309955ef..c93c1198e53 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_masking_op.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_masking_op.cc @@ -24,7 +24,7 @@ namespace mindspore { namespace dataset { // constructor -TimeMaskingOp::TimeMaskingOp(bool iid_masks, int64_t time_mask_param, int64_t mask_start, double mask_value) +TimeMaskingOp::TimeMaskingOp(bool iid_masks, int32_t time_mask_param, int32_t mask_start, float mask_value) : time_mask_param_(time_mask_param), mask_start_(mask_start), iid_masks_(iid_masks), mask_value_(mask_value) { rnd_.seed(GetSeed()); } @@ -35,8 +35,9 @@ Status TimeMaskingOp::Compute(const std::shared_ptr &input, std::shared_ // input <..., freq, time> CHECK_FAIL_RETURN_UNEXPECTED(input->Rank() >= 2, "TimeMasking: input tensor is not in shape of <..., freq, time>."); TensorShape input_shape = input->shape(); - CHECK_FAIL_RETURN_UNEXPECTED(input_shape[-1] >= time_mask_param_, - "TimeMasking: time_mask_param should be less than the length of time dimension."); + CHECK_FAIL_RETURN_UNEXPECTED( + input_shape[-1] >= time_mask_param_, + "TimeMasking: time_mask_param should be less than or equal to the length of time dimension."); std::shared_ptr input_tensor; // typecast diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_masking_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_masking_op.h index cf2ef07287e..7099a2cbb27 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_masking_op.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_masking_op.h @@ -30,8 +30,8 @@ namespace dataset { class TimeMaskingOp : public TensorOp { public: - explicit TimeMaskingOp(bool iid_masks = false, int64_t time_mask_param = 0, int64_t mask_start = 0, - double mask_value_ = 0.0); + explicit TimeMaskingOp(bool iid_masks = false, int32_t time_mask_param = 0, int32_t mask_start = 0, + float mask_value_ = 0.0); ~TimeMaskingOp() override = default; @@ -41,9 +41,9 @@ class TimeMaskingOp : public TensorOp { private: bool iid_masks_; - int64_t time_mask_param_; - int64_t mask_start_; - double mask_value_; + int32_t time_mask_param_; + int32_t mask_start_; + float mask_value_; std::mt19937 rnd_; }; } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h index b2fa960ad20..2bb890e0f28 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h @@ -215,13 +215,13 @@ class FrequencyMasking final : public TensorTransform { public: /// \brief Constructor. /// \param[in] iid_masks Whether to apply different masks to each example. - /// \param[in] frequency_mask_param Maximum possible length of the mask. + /// \param[in] frequency_mask_param Maximum possible length of the mask, range: [0, freq_length] (Default: 0). /// Indices uniformly sampled from [0, frequency_mask_param]. /// Mask width when iid_masks=true. - /// \param[in] mask_start Mask start when iid_masks=true. + /// \param[in] mask_start Mask start when iid_masks=true, range: [0, freq_length-frequency_mask_param] (Default: 0). /// \param[in] mask_value Mask value. explicit FrequencyMasking(bool iid_masks = false, int32_t frequency_mask_param = 0, int32_t mask_start = 0, - double mask_value = 0.0); + float mask_value = 0.0); /// \brief Destructor. ~FrequencyMasking() = default; @@ -242,13 +242,13 @@ class TimeMasking final : public TensorTransform { public: /// \brief Constructor. /// \param[in] iid_masks Whether to apply different masks to each example. - /// \param[in] time_mask_param Maximum possible length of the mask. + /// \param[in] time_mask_param Maximum possible length of the mask, range: [0, time_length] (Default: 0). /// Indices uniformly sampled from [0, time_mask_param]. /// Mask width when iid_masks=true. - /// \param[in] mask_start Mask start when iid_masks=true. + /// \param[in] mask_start Mask start when iid_masks=true, range: [0, time_length-time_mask_param] (Default: 0). /// \param[in] mask_value Mask value. - explicit TimeMasking(bool iid_masks = false, int64_t time_mask_param = 0, int64_t mask_start = 0, - double mask_value = 0.0); + explicit TimeMasking(bool iid_masks = false, int32_t time_mask_param = 0, int32_t mask_start = 0, + float mask_value = 0.0); /// \brief Destructor. ~TimeMasking() = default; diff --git a/mindspore/dataset/audio/transforms.py b/mindspore/dataset/audio/transforms.py index 0bbc0191b08..a56bd830161 100644 --- a/mindspore/dataset/audio/transforms.py +++ b/mindspore/dataset/audio/transforms.py @@ -273,9 +273,10 @@ class FrequencyMasking(AudioTensorOperation): Args: iid_masks (bool, optional): Whether to apply different masks to each example (default=false). - frequency_mask_param (int): Maximum possible length of the mask (default=0). + frequency_mask_param (int): Maximum possible length of the mask, range: [0, freq_length] (default=0). Indices uniformly sampled from [0, frequency_mask_param]. - mask_start (int): Mask start when iid_masks=true (default=0). + mask_start (int): Mask start takes effect when iid_masks=true, + range: [0, freq_length-frequency_mask_param] (default=0). mask_value (double): Mask value (default=0.0). Examples: @@ -304,9 +305,10 @@ class TimeMasking(AudioTensorOperation): Args: iid_masks (bool, optional): Whether to apply different masks to each example (default=false). - time_mask_param (int): Maximum possible length of the mask (default=0). + time_mask_param (int): Maximum possible length of the mask, range: [0, time_length] (default=0). Indices uniformly sampled from [0, time_mask_param]. - mask_start (int): Mask start takes effect when iid_masks=true (default=0). + mask_start (int): Mask start takes effect when iid_masks=true, + range: [0, time_length-time_mask_param] (default=0). mask_value (double): Mask value (default=0.0). Examples: diff --git a/tests/ut/python/dataset/test_frequency_masking.py b/tests/ut/python/dataset/test_frequency_masking.py index 3893b2f13bb..fc77bca5c69 100644 --- a/tests/ut/python/dataset/test_frequency_masking.py +++ b/tests/ut/python/dataset/test_frequency_masking.py @@ -119,7 +119,8 @@ def test_frequency_masking_invalid_input(): test_invalid_input("invalid mask_start", False, 2, 100, RuntimeError, "MaskAlongAxis: mask_start should be less than the length of chosen dimension.") test_invalid_input("invalid mask_width", False, 200, 2, RuntimeError, - "FrequencyMasking: frequency_mask_param should be less than the length of frequency dimension.") + "FrequencyMasking: frequency_mask_param should be less than or equal to the length of " + + "frequency dimension.") if __name__ == "__main__": diff --git a/tests/ut/python/dataset/test_time_masking.py b/tests/ut/python/dataset/test_time_masking.py index 3aee6340590..1ef77b3eec8 100644 --- a/tests/ut/python/dataset/test_time_masking.py +++ b/tests/ut/python/dataset/test_time_masking.py @@ -119,7 +119,7 @@ def test_time_masking_invalid_input(): test_invalid_input("invalid mask_start", False, 2, 100, RuntimeError, "MaskAlongAxis: mask_start should be less than the length of chosen dimension.") test_invalid_input("invalid mask_width", False, 200, 2, RuntimeError, - "TimeMasking: time_mask_param should be less than the length of time dimension.") + "TimeMasking: time_mask_param should be less than or equal to the length of time dimension.") if __name__ == "__main__":