From 4c3931f5c949d8db430a663bbc5c9fb6a2c03577 Mon Sep 17 00:00:00 2001 From: zhuchuang Date: Thu, 18 Nov 2021 17:18:43 +0800 Subject: [PATCH] [feat] [assistant] [I3CKEE] add new audio operator ComputeDeltas --- mindspore/ccsrc/minddata/dataset/api/audio.cc | 15 ++ .../dataset/audio/kernels/ir/bindings.cc | 12 ++ .../dataset/audio/ir/kernels/CMakeLists.txt | 1 + .../audio/ir/kernels/compute_deltas_ir.cc | 51 +++++++ .../audio/ir/kernels/compute_deltas_ir.h | 52 +++++++ .../dataset/audio/kernels/CMakeLists.txt | 1 + .../dataset/audio/kernels/audio_utils.cc | 142 ++++++++++++++++++ .../dataset/audio/kernels/audio_utils.h | 8 + .../audio/kernels/compute_deltas_op.cc | 44 ++++++ .../dataset/audio/kernels/compute_deltas_op.h | 49 ++++++ .../minddata/dataset/include/dataset/audio.h | 22 +++ .../minddata/dataset/kernels/tensor_op.h | 1 + mindspore/dataset/audio/transforms.py | 57 ++++++- mindspore/dataset/audio/utils.py | 20 +++ mindspore/dataset/audio/validators.py | 17 ++- .../ut/cpp/dataset/c_api_audio_a_to_q_test.cc | 63 ++++++++ tests/ut/cpp/dataset/execute_test.cc | 60 +++++++- .../ut/python/dataset/test_compute_deltas.py | 137 +++++++++++++++++ 18 files changed, 738 insertions(+), 14 deletions(-) create mode 100644 mindspore/ccsrc/minddata/dataset/audio/ir/kernels/compute_deltas_ir.cc create mode 100644 mindspore/ccsrc/minddata/dataset/audio/ir/kernels/compute_deltas_ir.h create mode 100644 mindspore/ccsrc/minddata/dataset/audio/kernels/compute_deltas_op.cc create mode 100644 mindspore/ccsrc/minddata/dataset/audio/kernels/compute_deltas_op.h create mode 100644 tests/ut/python/dataset/test_compute_deltas.py diff --git a/mindspore/ccsrc/minddata/dataset/api/audio.cc b/mindspore/ccsrc/minddata/dataset/api/audio.cc index e6924b438ac..55db0ea23ac 100644 --- a/mindspore/ccsrc/minddata/dataset/api/audio.cc +++ b/mindspore/ccsrc/minddata/dataset/api/audio.cc @@ -25,6 +25,7 @@ #include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h" #include "minddata/dataset/audio/ir/kernels/biquad_ir.h" #include "minddata/dataset/audio/ir/kernels/complex_norm_ir.h" +#include "minddata/dataset/audio/ir/kernels/compute_deltas_ir.h" #include "minddata/dataset/audio/ir/kernels/contrast_ir.h" #include "minddata/dataset/audio/ir/kernels/db_to_amplitude_ir.h" #include "minddata/dataset/audio/ir/kernels/dc_shift_ir.h" @@ -187,6 +188,20 @@ ComplexNorm::ComplexNorm(float power) : data_(std::make_shared(power)) {} std::shared_ptr ComplexNorm::Parse() { return std::make_shared(data_->power_); } +// ComputeDeltas Transform Operation. +struct ComputeDeltas::Data { + Data(int32_t win_length, BorderType pad_mode) : win_length_(win_length), pad_mode_(pad_mode) {} + int32_t win_length_; + BorderType pad_mode_; +}; + +ComputeDeltas::ComputeDeltas(int32_t win_length, BorderType pad_mode) + : data_(std::make_shared(win_length, pad_mode)) {} + +std::shared_ptr ComputeDeltas::Parse() { + return std::make_shared(data_->win_length_, data_->pad_mode_); +} + // Contrast Transform Operation. struct Contrast::Data { explicit Data(float enhancement_amount) : enhancement_amount_(enhancement_amount) {} diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc index a6dd2cd898e..1dc12452346 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc @@ -29,6 +29,7 @@ #include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h" #include "minddata/dataset/audio/ir/kernels/biquad_ir.h" #include "minddata/dataset/audio/ir/kernels/complex_norm_ir.h" +#include "minddata/dataset/audio/ir/kernels/compute_deltas_ir.h" #include "minddata/dataset/audio/ir/kernels/contrast_ir.h" #include "minddata/dataset/audio/ir/kernels/db_to_amplitude_ir.h" #include "minddata/dataset/audio/ir/kernels/dc_shift_ir.h" @@ -161,6 +162,17 @@ PYBIND_REGISTER( })); })); +PYBIND_REGISTER( + ComputeDeltasOperation, 1, ([](const py::module *m) { + (void)py::class_>( + *m, "ComputeDeltasOperation") + .def(py::init([](int32_t win_length, BorderType pad_mode) { + auto compute_deltas = std::make_shared(win_length, pad_mode); + THROW_IF_ERROR(compute_deltas->ValidateParams()); + return compute_deltas; + })); + })); + PYBIND_REGISTER(ContrastOperation, 1, ([](const py::module *m) { (void) py::class_>( diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt index 5c38e720d66..b5c9ef29988 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt @@ -11,6 +11,7 @@ add_library(audio-ir-kernels OBJECT bass_biquad_ir.cc biquad_ir.cc complex_norm_ir.cc + compute_deltas_ir.cc contrast_ir.cc db_to_amplitude_ir.cc dc_shift_ir.cc diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/compute_deltas_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/compute_deltas_ir.cc new file mode 100644 index 00000000000..e41765120b0 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/compute_deltas_ir.cc @@ -0,0 +1,51 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "minddata/dataset/audio/ir/kernels/compute_deltas_ir.h" + +#include "minddata/dataset/audio/ir/validators.h" +#include "minddata/dataset/audio/kernels/compute_deltas_op.h" + +namespace mindspore { +namespace dataset { +namespace audio { +ComputeDeltasOperation::ComputeDeltasOperation(int32_t win_length, BorderType pad_mode) + : win_length_(win_length), pad_mode_(pad_mode) {} + +std::shared_ptr ComputeDeltasOperation::Build() { + return std::make_shared(win_length_, pad_mode_); +} + +Status ComputeDeltasOperation::to_json(nlohmann::json *out_json) { + nlohmann::json args; + args["win_length"] = win_length_; + args["pad_mode"] = pad_mode_; + *out_json = args; + return Status::OK(); +} + +Status ComputeDeltasOperation::ValidateParams() { + RETURN_IF_NOT_OK(ValidateScalar("ComputeDeltas", "win_length", win_length_, {3}, false)); + if (pad_mode_ != BorderType::kConstant && pad_mode_ != BorderType::kEdge && pad_mode_ != BorderType::kReflect && + pad_mode_ != BorderType::kSymmetric) { + std::string err_msg = "ComputeDeltas: invalid BorderType, please check input value of enum."; + LOG_AND_RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + return Status::OK(); +} +} // namespace audio +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/compute_deltas_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/compute_deltas_ir.h new file mode 100644 index 00000000000..3a46ae4654d --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/compute_deltas_ir.h @@ -0,0 +1,52 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_COMPUTE_DELTAS_IR_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_COMPUTE_DELTAS_IR_H_ + +#include +#include +#include + +#include "include/api/status.h" +#include "minddata/dataset/kernels/ir/tensor_operation.h" + +namespace mindspore { +namespace dataset { +namespace audio { +constexpr char kComputeDeltasOperation[] = "ComputeDeltas"; + +class ComputeDeltasOperation : public TensorOperation { + public: + ComputeDeltasOperation(int32_t win_length, BorderType pad_mode); + + ~ComputeDeltasOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kComputeDeltasOperation; } + + Status to_json(nlohmann::json *out_json) override; + + private: + int32_t win_length_; + BorderType pad_mode_; +}; +} // namespace audio +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_COMPUTE_DELTAS_IR_H_ diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt index e447343b798..4ca1e0a39aa 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt @@ -12,6 +12,7 @@ add_library(audio-kernels OBJECT bass_biquad_op.cc biquad_op.cc complex_norm_op.cc + compute_deltas_op.cc contrast_op.cc db_to_amplitude_op.cc dc_shift_op.cc diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc index 2f0530666d4..ce43bcb4e4b 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc @@ -1063,5 +1063,147 @@ Status SlidingWindowCmn(const std::shared_ptr &input, std::shared_ptr +Status Pad(const std::shared_ptr &input, std::shared_ptr *output, int32_t pad_left, int32_t pad_right, + BorderType padding_mode, T value = 0) { + CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 2, "Pad: input tensor is not in shape of <..., time>."); + CHECK_FAIL_RETURN_UNEXPECTED( + input->type().IsNumeric(), + "Pad: input tensor type should be int, float or double, but got: " + input->type().ToString()); + CHECK_FAIL_RETURN_UNEXPECTED(pad_left >= 0 && pad_right >= 0, + "Pad: left and right padding values must be non negative, but got pad_left: " + + std::to_string(pad_left) + " and pad_right: " + std::to_string(pad_right)); + TensorShape input_shape = input->shape(); + int32_t wave_length = input_shape[-1]; + int32_t num_wavs = static_cast(input->Size() / wave_length); + TensorShape to_shape = TensorShape({num_wavs, wave_length}); + RETURN_IF_NOT_OK(input->Reshape(to_shape)); + int32_t pad_length = wave_length + pad_left + pad_right; + TensorShape new_shape = TensorShape({num_wavs, pad_length}); + RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), output)); + using MatrixXT = Eigen::Matrix; + using Eigen::Map; + constexpr int pad_mul = 2; + T *input_data = reinterpret_cast(const_cast(input->GetBuffer())); + T *output_data = reinterpret_cast(const_cast((*output)->GetBuffer())); + auto input_map = Map(input_data, num_wavs, wave_length); + auto output_map = Map(output_data, num_wavs, pad_length); + output_map.block(0, pad_left, num_wavs, wave_length) = input_map; + if (padding_mode == BorderType::kConstant) { + output_map.block(0, 0, num_wavs, pad_left).setConstant(value); + output_map.block(0, pad_left + wave_length, num_wavs, pad_right).setConstant(value); + } else if (padding_mode == BorderType::kEdge) { + output_map.block(0, 0, num_wavs, pad_left).colwise() = input_map.col(0); + output_map.block(0, pad_left + wave_length, num_wavs, pad_right).colwise() = input_map.col(wave_length - 1); + } else if (padding_mode == BorderType::kReflect) { + // First, deal with the pad operation on the right. + int32_t current_pad = wave_length - 1; + while (pad_right >= current_pad) { + // current_pad: the length of pad required for current loop. + // pad_right: the length of the remaining pad on the right. + output_map.block(0, pad_left + current_pad + 1, num_wavs, current_pad) = + output_map.block(0, pad_left, num_wavs, current_pad).rowwise().reverse(); + pad_right -= current_pad; + current_pad += current_pad; + } + output_map.block(0, pad_length - pad_right, num_wavs, pad_right) = + output_map.block(0, pad_length - pad_right * pad_mul - 1, num_wavs, pad_right).rowwise().reverse(); + // Next, deal with the pad operation on the left. + current_pad = wave_length - 1; + while (pad_left >= current_pad) { + // current_pad: the length of pad required for current loop. + // pad_left: the length of the remaining pad on the left. + output_map.block(0, pad_left - current_pad, num_wavs, current_pad) = + output_map.block(0, pad_left + 1, num_wavs, current_pad).rowwise().reverse(); + pad_left -= current_pad; + current_pad += current_pad; + } + output_map.block(0, 0, num_wavs, pad_left) = + output_map.block(0, pad_left + 1, num_wavs, pad_left).rowwise().reverse(); + } else if (padding_mode == BorderType::kSymmetric) { + // First, deal with the pad operation on the right. + int32_t current_pad = wave_length; + while (pad_right >= current_pad) { + // current_pad: the length of pad required for current loop. + // pad_right: the length of the remaining pad on the right. + output_map.block(0, pad_left + current_pad, num_wavs, current_pad) = + output_map.block(0, pad_left, num_wavs, current_pad).rowwise().reverse(); + pad_right -= current_pad; + current_pad += current_pad; + } + output_map.block(0, pad_length - pad_right, num_wavs, pad_right) = + output_map.block(0, pad_length - pad_right * pad_mul, num_wavs, pad_right).rowwise().reverse(); + // Next, deal with the pad operation on the left. + current_pad = wave_length; + while (pad_left >= current_pad) { + // current_pad: the length of pad required for current loop. + // pad_left: the length of the remaining pad on the left. + output_map.block(0, pad_left - current_pad, num_wavs, current_pad) = + output_map.block(0, pad_left, num_wavs, current_pad).rowwise().reverse(); + pad_left -= current_pad; + current_pad += current_pad; + } + output_map.block(0, 0, num_wavs, pad_left) = output_map.block(0, pad_left, num_wavs, pad_left).rowwise().reverse(); + } else { + RETURN_STATUS_UNEXPECTED("Pad: unsupported border type."); + } + std::vector shape_vec = input_shape.AsVector(); + shape_vec[shape_vec.size() - 1] = static_cast(pad_length); + TensorShape output_shape(shape_vec); + RETURN_IF_NOT_OK((*output)->Reshape(output_shape)); + return Status::OK(); +} + +template +Status ComputeDeltasImpl(const std::shared_ptr &input, std::shared_ptr *output, int all_freqs, + int n_frame, int n) { + using VectorXT = Eigen::Matrix; + using MatrixXT = Eigen::Matrix; + using Eigen::Map; + int32_t denom = n * (n + 1) * (n * 2 + 1) / 3; + // twice sum of integer squared + VectorXT kernel = VectorXT::LinSpaced(2 * n + 1, -n, n); // 2n+1 + T *input_data = reinterpret_cast(const_cast(input->GetBuffer())); // [all_freq,n_fram+2n] + RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{all_freqs, n_frame}, input->type(), output)); + T *output_data = reinterpret_cast(const_cast((*output)->GetBuffer())); + for (int freq = 0; freq < all_freqs; ++freq) { // conv with im2col + auto input_map = Map>(input_data + freq * (n_frame + 2 * n), n_frame, + 2 * n + 1); // n_frmae,2n+1 + Map(output_data + freq * n_frame, n_frame) = (input_map * kernel).array() / T(denom); + } + return Status::OK(); +} + +Status ComputeDeltas(const std::shared_ptr &input, std::shared_ptr *output, int32_t win_length, + const BorderType &mode) { + constexpr int min_shape_dim = 2; + auto raw_shape = input->shape(); + CHECK_FAIL_RETURN_UNEXPECTED(raw_shape.Size() >= min_shape_dim, + "ComputeDeltas: input tensor is not in shape of <..., freq, time>."); + CHECK_FAIL_RETURN_UNEXPECTED( + input->type().IsNumeric(), + "ComputeDeltas: input tensor type should be int, float or double, but got: " + input->type().ToString()); + + // reshape Tensor from <..., freq, time> to <-1, time> + int32_t n_frames = raw_shape[-1]; + int32_t all_freqs = raw_shape.NumOfElements() / n_frames; + RETURN_IF_NOT_OK(input->Reshape(TensorShape{all_freqs, n_frames})); + + int32_t n = (win_length - 1) / 2; + + std::shared_ptr specgram_local_pad; + if (input->type() == DataType(DataType::DE_FLOAT64)) { + RETURN_IF_NOT_OK(Pad(input, &specgram_local_pad, n, n, mode)); + RETURN_IF_NOT_OK(ComputeDeltasImpl(specgram_local_pad, output, all_freqs, n_frames, n)); + } else { + std::shared_ptr float_tensor; + RETURN_IF_NOT_OK(TypeCast(input, &float_tensor, DataType(DataType::DE_FLOAT32))); + RETURN_IF_NOT_OK(Pad(float_tensor, &specgram_local_pad, n, n, mode)); + RETURN_IF_NOT_OK(ComputeDeltasImpl(specgram_local_pad, output, all_freqs, n_frames, n)); + } + RETURN_IF_NOT_OK((*output)->Reshape(raw_shape)); + return Status::OK(); +} } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h index 997b7fff449..1cd206ef0fe 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h @@ -1030,6 +1030,14 @@ Status ReadWaveFile(const std::string &wav_file_dir, std::vector *wavefor /// \return Status code. Status SlidingWindowCmn(const std::shared_ptr &input, std::shared_ptr *output, int32_t cmn_window, int32_t min_cmn_window, bool center, bool norm_vars); +/// \brief Compute delta coefficients of a tensor, usually a spectrogram. +/// \param input: Tensor of shape <...,freq,time>. +/// \param output: Tensor of shape <...,freq,time>. +/// \param win_length: The window length used for computing delta. +/// \param mode: Padding mode. +/// \return Status code. +Status ComputeDeltas(const std::shared_ptr &input, std::shared_ptr *output, int32_t win_length, + const BorderType &mode); } // namespace dataset } // namespace mindspore #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/compute_deltas_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/compute_deltas_op.cc new file mode 100644 index 00000000000..cf16d448006 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/compute_deltas_op.cc @@ -0,0 +1,44 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "minddata/dataset/audio/kernels/compute_deltas_op.h" + +#include "minddata/dataset/audio/kernels/audio_utils.h" +#include "minddata/dataset/util/status.h" + +namespace mindspore { +namespace dataset { +ComputeDeltasOp::ComputeDeltasOp(int32_t win_length, BorderType mode) : win_length_(win_length), mode_(mode) {} + +Status ComputeDeltasOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { + IO_CHECK(input, output); + return ComputeDeltas(input, output, win_length_, mode_); +} + +Status ComputeDeltasOp::OutputType(const std::vector &inputs, std::vector &outputs) { + RETURN_IF_NOT_OK(TensorOp::OutputType(inputs, outputs)); + if (!inputs[0].IsNumeric()) { + RETURN_STATUS_UNEXPECTED("ComputeDeltas: input tensor type should be int, float or double, but got: " + + inputs[0].ToString()); + } else if (inputs[0] == DataType(DataType::DE_FLOAT64)) { + outputs[0] = DataType(DataType::DE_FLOAT64); + } else { + outputs[0] = DataType(DataType::DE_FLOAT32); + } + return Status::OK(); +} +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/compute_deltas_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/compute_deltas_op.h new file mode 100644 index 00000000000..d17463807ee --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/compute_deltas_op.h @@ -0,0 +1,49 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_COMPUTE_DELTAS_OP_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_COMPUTE_DELTAS_OP_H_ + +#include +#include +#include + +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/include/dataset/constants.h" +#include "minddata/dataset/kernels/tensor_op.h" +#include "minddata/dataset/util/status.h" + +namespace mindspore { +namespace dataset { +class ComputeDeltasOp : public TensorOp { + public: + explicit ComputeDeltasOp(int32_t win_length = 5, BorderType mode = BorderType::kEdge); + + Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + + Status OutputType(const std::vector &inputs, std::vector &outputs) override; + + std::string Name() const override { return kComputeDeltasOp; }; + + ~ComputeDeltasOp() = default; + + private: + int32_t win_length_; + BorderType mode_; +}; +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_COMPUTE_DELTAS_OP_H_ diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h index 3f078752f1c..6b11f39c265 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h @@ -236,6 +236,28 @@ class ComplexNorm final : public TensorTransform { std::shared_ptr data_; }; +/// \brief ComputeDeltas Transform. +/// \note Compute delta coefficients of a spectrogram. +class ComputeDeltas final : public TensorTransform { + public: + /// \brief Construct a new Compute Deltas object. + /// \param[in] win_length The window length used for computing delta, must be no less than 3 (Default: 5). + /// \param[in] pad_mode Mode parameter passed to padding (Default: BorderType::kEdge). + explicit ComputeDeltas(int32_t win_length = 5, BorderType pad_mode = BorderType::kEdge); + + /// \brief Destructor. + ~ComputeDeltas() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + /// \brief Apply contrast effect. class Contrast final : public TensorTransform { public: diff --git a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h index 9597fc2efb1..726f073a89a 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h @@ -153,6 +153,7 @@ constexpr char kBandrejectBiquadOp[] = "BandrejectBiquadOp"; constexpr char kBassBiquadOp[] = "BassBiquadOp"; constexpr char kBiquadOp[] = "BiquadOp"; constexpr char kComplexNormOp[] = "ComplexNormOp"; +constexpr char kComputeDeltasOp[] = "ComputeDeltasOp"; constexpr char kContrastOp[] = "ContrastOp"; constexpr char kDBToAmplitudeOp[] = " DBToAmplitudeOp"; constexpr char kDCShiftOp[] = "DCShiftOp"; diff --git a/mindspore/dataset/audio/transforms.py b/mindspore/dataset/audio/transforms.py index 88ed3e290f5..dea303e176c 100644 --- a/mindspore/dataset/audio/transforms.py +++ b/mindspore/dataset/audio/transforms.py @@ -23,13 +23,13 @@ import numpy as np import mindspore._c_dataengine as cde from ..transforms.c_transforms import TensorOperation -from .utils import FadeShape, GainType, Interpolation, Modulation, ScaleType +from .utils import BorderType, FadeShape, GainType, Interpolation, Modulation, ScaleType from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \ - check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_contrast, \ - check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, check_equalizer_biquad, \ - check_fade, check_flanger, check_highpass_biquad, check_lfilter, check_lowpass_biquad, check_magphase, \ - check_masking, check_mu_law_coding, check_overdrive, check_phaser, check_riaa_biquad, check_sliding_window_cmn, \ - check_time_stretch, check_treble_biquad, check_vol + check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_compute_deltas, \ + check_contrast, check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, \ + check_equalizer_biquad, check_fade, check_flanger, check_highpass_biquad, check_lfilter, check_lowpass_biquad, \ + check_magphase, check_masking, check_mu_law_coding, check_overdrive, check_phaser, check_riaa_biquad, \ + check_sliding_window_cmn, check_time_stretch, check_treble_biquad, check_vol class AudioTensorOperation(TensorOperation): @@ -305,6 +305,51 @@ class ComplexNorm(AudioTensorOperation): return cde.ComplexNormOperation(self.power) +DE_C_BORDER_TYPE = { + BorderType.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT, + BorderType.EDGE: cde.BorderType.DE_BORDER_EDGE, + BorderType.REFLECT: cde.BorderType.DE_BORDER_REFLECT, + BorderType.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC, +} + + +class ComputeDeltas(AudioTensorOperation): + """ + Compute delta coefficients of a spectrogram. + + Args: + win_length (int): The window length used for computing delta, must be no less than 3 (default=5). + mode (BorderType): Mode parameter passed to padding (default=BorderType.EDGE).It can be any of + [BorderType.CONSTANT, BorderType.EDGE, BorderType.REFLECT, BordBorderTypeer.SYMMETRIC]. + + - BorderType.CONSTANT, means it fills the border with constant values. + + - BorderType.EDGE, means it pads with the last value on the edge. + + - BorderType.REFLECT, means it reflects the values on the edge omitting the last + value of edge. + + - BorderType.SYMMETRIC, means it reflects the values on the edge repeating the last + value of edge. + + Examples: + >>> import numpy as np + >>> + >>> waveform = np.random.random([1, 400//2+1, 30]) + >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"]) + >>> transforms = [audio.ComputeDeltas(win_length=7, pad_mode = BorderType.EDGE)] + >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"]) + """ + + @check_compute_deltas + def __init__(self, win_length=5, pad_mode=BorderType.EDGE): + self.win_len = win_length + self.pad_mode = pad_mode + + def parse(self): + return cde.ComputeDeltasOperation(self.win_len, DE_C_BORDER_TYPE[self.pad_mode]) + + class Contrast(AudioTensorOperation): """ Apply contrast effect. Similar to SoX implementation. diff --git a/mindspore/dataset/audio/utils.py b/mindspore/dataset/audio/utils.py index 93c6d7375a6..b5271d1391e 100644 --- a/mindspore/dataset/audio/utils.py +++ b/mindspore/dataset/audio/utils.py @@ -141,3 +141,23 @@ def CreateDct(n_mfcc, n_mels, norm=NormMode.NONE): if n_mels <= 0: raise ValueError("n_mels must be greater than 0, but got {0}.".format(n_mels)) return cde.CreateDct(n_mfcc, n_mels, DE_C_NORMMODE_TYPE[norm]).as_array() + + +class BorderType(str, Enum): + """ + Padding Mode, BorderType Type. + + Possible enumeration values are: BorderType.CONSTANT, BorderType.EDGE, BorderType.REFLECT, BorderType.SYMMETRIC. + + - BorderType.CONSTANT: means it fills the border with constant values. + - BorderType.EDGE: means it pads with the last value on the edge. + - BorderType.REFLECT: means it reflects the values on the edge omitting the last value of edge. + - BorderType.SYMMETRIC: means it reflects the values on the edge repeating the last value of edge. + + Note: This class derived from class str to support json serializable. + """ + CONSTANT: str = "constant" + EDGE: str = "edge" + REFLECT: str = "reflect" + SYMMETRIC: str = "symmetric" + \ No newline at end of file diff --git a/mindspore/dataset/audio/validators.py b/mindspore/dataset/audio/validators.py index 22524405512..3102bdcb345 100644 --- a/mindspore/dataset/audio/validators.py +++ b/mindspore/dataset/audio/validators.py @@ -20,8 +20,8 @@ from functools import wraps from mindspore.dataset.core.validator_helpers import check_float32, check_float32_not_zero, check_int32,\ check_int32_not_zero, check_list_same_size, check_non_negative_float32, check_non_negative_int32, \ - check_pos_float32, check_pos_int32, check_value, parse_user_args, type_check -from .utils import FadeShape, GainType, Interpolation, Modulation, ScaleType + check_pos_float32, check_pos_int32, check_value, INT32_MAX, parse_user_args, type_check +from .utils import BorderType, FadeShape, GainType, Interpolation, Modulation, ScaleType def check_amplitude_to_db(method): @@ -576,3 +576,16 @@ def check_sliding_window_cmn(method): return method(self, *args, **kwargs) return new_method + + +def check_compute_deltas(method): + """Wrapper method to check the parameter of ComputeDeltas.""" + @wraps(method) + def new_method(self, *args, **kwargs): + [win_length, pad_mode], _ = parse_user_args(method, *args, **kwargs) + type_check(pad_mode, (BorderType,), "pad_mode") + type_check(win_length, (int,), "win_length") + check_value(win_length, (3, INT32_MAX), "win_length") + return method(self, *args, **kwargs) + + return new_method diff --git a/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc b/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc index 795d106353b..fe59d63ed17 100644 --- a/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc +++ b/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc @@ -1957,3 +1957,66 @@ TEST_F(MindDataTestPipeline, TestDBToAmplitudePipeline) { iter->Stop(); } + +/// Feature: ComputeDeltas +/// Description: test basic function of ComputeDeltas +/// Expectation: get correct number of data +TEST_F(MindDataTestPipeline, TestComputeDeltas) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestComputeDeltas."; + // Original waveform + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200})); + std::shared_ptr ds = RandomData(50, schema); + EXPECT_NE(ds, nullptr); + + ds = ds->SetNumWorkers(4); + EXPECT_NE(ds, nullptr); + + auto compute_deltas_op = audio::ComputeDeltas(); + + ds = ds->Map({compute_deltas_op}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(ds, nullptr); + + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector expected = {2, 200}; + + int i = 0; + while (row.size() != 0) { + auto col = row["inputData"]; + ASSERT_EQ(col.Shape(), expected); + ASSERT_EQ(col.Shape().size(), 2); + ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + EXPECT_EQ(i, 50); + + iter->Stop(); +} + +/// Feature: ComputeDeltas +/// Description: test wrong input args of ComputeDeltas +/// Expectation: get nullptr of iterator +TEST_F(MindDataTestPipeline, TestComputeDeltasWrongArgs) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestComputeDeltasWrongArgs."; + // Original waveform + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200})); + std::shared_ptr ds = RandomData(50, schema); + EXPECT_NE(ds, nullptr); + + ds = ds->SetNumWorkers(4); + EXPECT_NE(ds, nullptr); + auto compute_deltas_op = audio::ComputeDeltas(2, mindspore::dataset::BorderType::kEdge); + ds = ds->Map({compute_deltas_op}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr iter = ds->CreateIterator(); + // Expect failure + EXPECT_EQ(iter, nullptr); +} \ No newline at end of file diff --git a/tests/ut/cpp/dataset/execute_test.cc b/tests/ut/cpp/dataset/execute_test.cc index 21727ccb9ea..16d13f26af9 100644 --- a/tests/ut/cpp/dataset/execute_test.cc +++ b/tests/ut/cpp/dataset/execute_test.cc @@ -177,6 +177,57 @@ TEST_F(MindDataTestExecute, TestComposeTransforms) { EXPECT_EQ(30, image.Shape()[1]); } +/// Feature: ComputeDeltas +/// Description: test basic function of ComputeDeltas +/// Expectation: get correct number of data +TEST_F(MindDataTestExecute, TestComputeDeltas) { + MS_LOG(INFO) << "Doing MindDataTestExecute-TestComputeDeltas."; + std::shared_ptr input_tensor_; + + int win_length = 5; + + // create tensor + TensorShape s = TensorShape({2, 15, 7}); + // init input vec + std::vector input_vec(s.NumOfElements()); + for (int ind = 0; ind < input_vec.size(); ind++) { + input_vec[ind] = std::rand() % (1000) / (1000.0f); + } + ASSERT_OK(Tensor::CreateFromVector(input_vec, s, &input_tensor_)); + auto input_ms = mindspore::MSTensor(std::make_shared(input_tensor_)); + std::shared_ptr compute_deltas_op = std::make_shared(win_length); + + // apply compute_deltas + mindspore::dataset::Execute Transform({compute_deltas_op}); + Status status = Transform(input_ms, &input_ms); + EXPECT_TRUE(status.IsOk()); +} + +/// Feature: ComputeDeltas +/// Description: test wrong input args of ComputeDeltas +/// Expectation: get nullptr of iterator +TEST_F(MindDataTestExecute, TestComputeDeltasWrongArgs) { + MS_LOG(INFO) << "Doing MindDataTestExecute-TestComputeDeltasWrongArgs."; + std::shared_ptr input_tensor_; + // win_length is less than minimum of 3 + int win_length = 2; + + // create tensor + TensorShape s = TensorShape({2, 15, 7}); + // init input vec + std::vector input_vec(s.NumOfElements()); + for (int ind = 0; ind < input_vec.size(); ind++) { + input_vec[ind] = std::rand() % (1000) / (1000.0f); + } + ASSERT_OK(Tensor::CreateFromVector(input_vec, s, &input_tensor_)); + auto input_ms = mindspore::MSTensor(std::make_shared(input_tensor_)); + + std::shared_ptr compute_deltas_op = std::make_shared(win_length); + mindspore::dataset::Execute Transform({compute_deltas_op}); + Status status = Transform(input_ms, &input_ms); + EXPECT_FALSE(status.IsOk()); +} + TEST_F(MindDataTestExecute, TestCrop) { MS_LOG(INFO) << "Doing MindDataTestExecute-TestCrop."; @@ -937,12 +988,9 @@ TEST_F(MindDataTestExecute, TestOverdriveBasicWithEager) { /// Expectation: throw exception correctly TEST_F(MindDataTestExecute, TestOverdriveWrongArgWithEager) { MS_LOG(INFO) << "Doing MindDataTestExecute-TestOverdriveWrongArgWithEager"; - std::vector labels = { - 0.271, 1.634, 9.246, 0.108, - 1.138, 1.156, 3.394, 1.55, - 3.614, 1.8402, 0.718, 4.599, - 5.64, 2.510620117187500000e-02, 1.38, 5.825, - 4.1906, 5.28, 1.052, 9.36}; + std::vector labels = {0.271, 1.634, 9.246, 0.108, 1.138, 1.156, 3.394, + 1.55, 3.614, 1.8402, 0.718, 4.599, 5.64, 2.510620117187500000e-02, + 1.38, 5.825, 4.1906, 5.28, 1.052, 9.36}; std::shared_ptr input; ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({4, 5}), &input)); diff --git a/tests/ut/python/dataset/test_compute_deltas.py b/tests/ut/python/dataset/test_compute_deltas.py new file mode 100644 index 00000000000..9ba428fc7ac --- /dev/null +++ b/tests/ut/python/dataset/test_compute_deltas.py @@ -0,0 +1,137 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +""" +Testing ComputeDeltas op in DE +""" +import numpy as np +import pytest + +import mindspore.dataset as ds +import mindspore.dataset.audio.transforms as c_audio +from mindspore import log as logger +from mindspore.dataset.audio.utils import BorderType + +CHANNEL = 1 +FREQ = 20 +TIME = 15 + + +def gen(shape): + np.random.seed(0) + data = np.random.random(shape) + yield (np.array(data, dtype=np.float32),) + + +def count_unequal_element(data_expected, data_me, rtol, atol): + """ Precision calculation func """ + assert data_expected.shape == data_me.shape + total_count = len(data_expected.flatten()) + error = np.abs(data_expected - data_me) + greater = np.greater(error, atol + np.abs(data_expected) * rtol) + loss_count = np.count_nonzero(greater) + assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format( + data_expected[greater], data_me[greater], error[greater]) + + +def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True): + """ Precision calculation formula """ + if np.any(np.isnan(data_expected)): + assert np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan) + elif not np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan): + count_unequal_element(data_expected, data_me, rtol, atol) + + +def test_compute_deltas_eager(): + """ + Feature: test the basic function in eager mode. + Description: mindspore eager mode normal testcase:compute_deltas op. + Expectation: compile done without error. + """ + + logger.info("check compute_deltas op output") + ndarr_in = np.array([[[0.08746047, -0.33246294, 0.5240939, 0.6064913, -0.70366], + [1.1420338, 0.50532603, 0.73435473, -0.83435977, -1.0607501], + [-1.4298731, -0.86117035, -0.7773941, -0.60023546, 1.1807907], + [0.4973711, 0.5299286, 0.818514, 0.7559297, -0.3418539], + [-0.2824797, 0.30402678, 0.7848569, -0.4135576, 0.19522846], + [-0.11636204, -0.4780833, 1.2691815, 0.9824286, 0.029275], + [-1.2611166, -1.1957082, 0.26212585, 0.35354254, 0.3609486]]]).astype(np.float32) + + out_expect = np.array([[[0.0453, 0.1475, -0.0643, -0.1970, -0.3766], + [-0.1452, -0.4360, -0.5745, -0.4927, -0.3817], + [0.1874, 0.2312, 0.5482, 0.6042, 0.5697], + [0.0675, 0.0838, -0.1452, -0.2904, -0.3419], + [0.2721, 0.0805, 0.0238, -0.0807, -0.0570], + [0.2409, 0.3583, 0.1752, -0.0225, -0.3433], + [0.3112, 0.4753, 0.4793, 0.3212, 0.0205]]]).astype(np.float32) + + compute_deltas_op = c_audio.ComputeDeltas() + out_mindspore = compute_deltas_op(ndarr_in) + + allclose_nparray(out_mindspore, out_expect, 0.0001, 0.0001) + + +def test_compute_deltas_pipeline(): + """ + Feature: test the basic function in pipeline mode. + Description: mindspore pipeline mode normal testcase:compute_deltas op. + Expectation: compile done without error. + """ + + logger.info("test ComputeDeltas op with default value") + generator = gen([CHANNEL, FREQ, TIME]) + + data1 = ds.GeneratorDataset( + source=generator, column_names=["multi_dimensional_data"] + ) + + transforms = [c_audio.ComputeDeltas()] + data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"]) + + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): + out_put = item["multi_dimensional_data"] + assert out_put.shape == (CHANNEL, FREQ, TIME) + + +def test_compute_deltas_invalid_input(): + """ + Feature: test the validate function with invalid parameters. + Description: mindspore invalid parameters testcase:compute_deltas op. + Expectation: compile done without error. + """ + def test_invalid_input(test_name, win_length, pad_mode, error, error_msg): + logger.info("Test ComputeDeltas with bad input: {0}".format(test_name)) + with pytest.raises(error) as error_info: + c_audio.ComputeDeltas(win_length=win_length, pad_mode=pad_mode) + assert error_msg in str(error_info.value) + + test_invalid_input( + "invalid win_length parameter value", "test", BorderType.EDGE, TypeError, + "Argument win_length with value test is not of type [], but got .", + ) + test_invalid_input( + "invalid win_length parameter value", 2, BorderType.EDGE, ValueError, + "Input win_length is not within the required interval of [3, 2147483647]", + ) + test_invalid_input( + "invalid pad_mode parameter value", 5, 2, TypeError, + "Argument pad_mode with value 2 is not of type [], but got .", + ) + + +if __name__ == "__main__": + test_compute_deltas_eager() + test_compute_deltas_pipeline() + test_compute_deltas_invalid_input()