!33270 [assistant][InverseMelScale]

Merge pull request !33270 from chenchen/InverseMelScale
2022-04-26 08:55:58 +00:00 · 2022-04-26 08:55:58 +00:00 · fcb0319747
parent 359cf60144 cd8395f352
commit fcb0319747
27 changed files with 1057 additions and 29 deletions
--- a/mindspore/ccsrc/minddata/dataset/api/audio.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/audio.cc
@ -39,6 +39,7 @@
 #include "minddata/dataset/audio/ir/kernels/gain_ir.h"
 #include "minddata/dataset/audio/ir/kernels/griffin_lim_ir.h"
 #include "minddata/dataset/audio/ir/kernels/highpass_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/inverse_mel_scale_ir.h"
 #include "minddata/dataset/audio/ir/kernels/lfilter_ir.h"
 #include "minddata/dataset/audio/ir/kernels/lowpass_biquad_ir.h"
 #include "minddata/dataset/audio/ir/kernels/magphase_ir.h"
@ -464,6 +465,47 @@ std::shared_ptr<TensorOperation> HighpassBiquad::Parse() {
  return std::make_shared<HighpassBiquadOperation>(data_->sample_rate_, data_->cutoff_freq_, data_->Q_);
 }

+// InverseMelScale Transform Operation.
+struct InverseMelScale::Data {
+  Data(int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t max_iter,
+       float tolerance_loss, float tolerance_change, const std::map<std::string, float> &sgdargs, NormType norm,
+       MelType mel_type)
+      : n_stft_(n_stft),
+        n_mels_(n_mels),
+        sample_rate_(sample_rate),
+        f_min_(f_min),
+        f_max_(f_max),
+        max_iter_(max_iter),
+        tolerance_loss_(tolerance_loss),
+        tolerance_change_(tolerance_change),
+        sgdargs_(sgdargs),
+        norm_(norm),
+        mel_type_(mel_type) {}
+  int32_t n_stft_;
+  int32_t n_mels_;
+  int32_t sample_rate_;
+  float f_min_;
+  float f_max_;
+  int32_t max_iter_;
+  float tolerance_loss_;
+  float tolerance_change_;
+  std::map<std::string, float> sgdargs_;
+  NormType norm_;
+  MelType mel_type_;
+};
+
+InverseMelScale::InverseMelScale(int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min, float f_max,
+                                 int32_t max_iter, float tolerance_loss, float tolerance_change,
+                                 const std::map<std::string, float> &sgdargs, NormType norm, MelType mel_type)
+    : data_(std::make_shared<Data>(n_stft, n_mels, sample_rate, f_min, f_max, max_iter, tolerance_loss,
+                                   tolerance_change, sgdargs, norm, mel_type)) {}
+
+std::shared_ptr<TensorOperation> InverseMelScale::Parse() {
+  return std::make_shared<InverseMelScaleOperation>(
+    data_->n_stft_, data_->n_mels_, data_->sample_rate_, data_->f_min_, data_->f_max_, data_->max_iter_,
+    data_->tolerance_loss_, data_->tolerance_change_, data_->sgdargs_, data_->norm_, data_->mel_type_);
+}
+
 // LFilter Transform Operation.
 struct LFilter::Data {
  Data(const std::vector<float> &a_coeffs, const std::vector<float> &b_coeffs, bool clamp)
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc
@ -43,6 +43,7 @@
 #include "minddata/dataset/audio/ir/kernels/gain_ir.h"
 #include "minddata/dataset/audio/ir/kernels/griffin_lim_ir.h"
 #include "minddata/dataset/audio/ir/kernels/highpass_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/inverse_mel_scale_ir.h"
 #include "minddata/dataset/audio/ir/kernels/lfilter_ir.h"
 #include "minddata/dataset/audio/ir/kernels/lowpass_biquad_ir.h"
 #include "minddata/dataset/audio/ir/kernels/magphase_ir.h"
@ -359,6 +360,20 @@ PYBIND_REGISTER(
      }));
  }));

+PYBIND_REGISTER(InverseMelScaleOperation, 1, ([](const py::module *m) {
+                  (void)py::class_<audio::InverseMelScaleOperation, TensorOperation,
+                                   std::shared_ptr<audio::InverseMelScaleOperation>>(*m, "InverseMelScaleOperation")
+                    .def(py::init([](int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min, float f_max,
+                                     int32_t max_iter, float tolerance_loss, float tolerance_change,
+                                     const py::dict &sgdargs, NormType norm, MelType mel_type) {
+                      auto inverse_mel_scale = std::make_shared<audio::InverseMelScaleOperation>(
+                        n_stft, n_mels, sample_rate, f_min, f_max, max_iter, tolerance_loss, tolerance_change,
+                        toStringFloatMap(sgdargs), norm, mel_type);
+                      THROW_IF_ERROR(inverse_mel_scale->ValidateParams());
+                      return inverse_mel_scale;
+                    }));
+                }));
+
 PYBIND_REGISTER(LFilterOperation, 1, ([](const py::module *m) {
                  (void)py::class_<audio::LFilterOperation, TensorOperation, std::shared_ptr<audio::LFilterOperation>>(
                    *m, "LFilterOperation")
--- a/mindspore/ccsrc/minddata/dataset/api/python/pybind_conversion.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/pybind_conversion.cc
@ -49,6 +49,16 @@ std::map<std::string, int32_t> toStringMap(const py::dict dict) {
  return map;
 }

+std::map<std::string, float> toStringFloatMap(const py::dict dict) {
+  std::map<std::string, float> map;
+  if (!dict.empty()) {
+    for (auto p : dict) {
+      (void)map.emplace(toString(p.first), toFloat(p.second));
+    }
+  }
+  return map;
+}
+
 std::vector<std::string> toStringVector(const py::list list) {
  std::vector<std::string> vector;
  if (!list.empty()) {
--- a/mindspore/ccsrc/minddata/dataset/api/python/pybind_conversion.h
+++ b/mindspore/ccsrc/minddata/dataset/api/python/pybind_conversion.h
@ -55,6 +55,8 @@ std::set<std::string> toStringSet(const py::list list);

 std::map<std::string, int32_t> toStringMap(const py::dict dict);

+std::map<std::string, float> toStringFloatMap(const py::dict dict);
+
 std::vector<std::string> toStringVector(const py::list list);

 std::vector<pid_t> toIntVector(const py::list input_list);
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt
@ -25,6 +25,7 @@ add_library(audio-ir-kernels OBJECT
        gain_ir.cc
        griffin_lim_ir.cc
        highpass_biquad_ir.cc
+        inverse_mel_scale_ir.cc
        lfilter_ir.cc
        lowpass_biquad_ir.cc
        magphase_ir.cc
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/inverse_mel_scale_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/inverse_mel_scale_ir.cc
@ -0,0 +1,88 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/audio/ir/kernels/inverse_mel_scale_ir.h"
+
+#include "minddata/dataset/audio/ir/validators.h"
+#include "minddata/dataset/audio/kernels/inverse_mel_scale_op.h"
+
+namespace mindspore {
+namespace dataset {
+namespace audio {
+// InverseMelScale
+InverseMelScaleOperation::InverseMelScaleOperation(int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min,
+                                                   float f_max, int32_t max_iter, float tolerance_loss,
+                                                   float tolerance_change, const std::map<std::string, float> &sgdargs,
+                                                   NormType norm, MelType mel_type)
+    : n_stft_(n_stft),
+      n_mels_(n_mels),
+      sample_rate_(sample_rate),
+      f_min_(f_min),
+      f_max_(f_max),
+      max_iter_(max_iter),
+      tolerance_loss_(tolerance_loss),
+      tolerance_change_(tolerance_change),
+      sgdargs_(sgdargs),
+      norm_(norm),
+      mel_type_(mel_type) {
+  sgd_lr_ = sgdargs_.find("sgd_lr") == sgdargs_.end() ? 0.1 : sgdargs_["sgd_lr"];
+  constexpr float SGD_MOMENTUM_DEFAULT = 0.9;
+  sgd_momentum_ = sgdargs_.find("sgd_momentum") == sgdargs_.end() ? SGD_MOMENTUM_DEFAULT : sgdargs_["sgd_momentum"];
+}
+
+InverseMelScaleOperation::~InverseMelScaleOperation() = default;
+
+std::string InverseMelScaleOperation::Name() const { return kInverseMelScaleOperation; }
+
+Status InverseMelScaleOperation::ValidateParams() {
+  RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("InverseMelScale", "n_mels", n_mels_));
+  RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("InverseMelScale", "sample_rate", sample_rate_));
+  CHECK_FAIL_RETURN_UNEXPECTED(n_stft_ != 1,
+                               "InverseMelScale: n_stft can not be equal to 1, but got: " + std::to_string(n_stft_));
+  RETURN_IF_NOT_OK(ValidateFloatScalarNonNegative("InverseMelScale", "f_max", f_max_));
+  CHECK_FAIL_RETURN_UNEXPECTED(f_min_ < f_max_, "InverseMelScale: f_max must be greater than f_min.");
+
+  // SGD params
+  RETURN_IF_NOT_OK(ValidateFloatScalarNonNegative("InverseMelScale", "sgd_lr", sgd_lr_));
+  RETURN_IF_NOT_OK(ValidateFloatScalarNonNegative("InverseMelScale", "sgd_momentum", sgd_momentum_));
+  return Status::OK();
+}
+
+std::shared_ptr<TensorOp> InverseMelScaleOperation::Build() {
+  std::shared_ptr<InverseMelScaleOp> tensor_op =
+    std::make_shared<InverseMelScaleOp>(n_stft_, n_mels_, sample_rate_, f_min_, f_max_, max_iter_, tolerance_loss_,
+                                        tolerance_change_, sgd_lr_, sgd_momentum_, norm_, mel_type_);
+  return tensor_op;
+}
+
+Status InverseMelScaleOperation::to_json(nlohmann::json *out_json) {
+  nlohmann::json args;
+  args["n_stft"] = n_stft_;
+  args["n_mels"] = n_mels_;
+  args["sample_rate"] = sample_rate_;
+  args["f_min"] = f_min_;
+  args["f_max"] = f_max_;
+  args["max_iter"] = max_iter_;
+  args["tolerance_loss"] = tolerance_loss_;
+  args["tolerance_change"] = tolerance_change_;
+  args["sgdargs"] = sgdargs_;
+  args["norm"] = norm_;
+  args["mel_type"] = mel_type_;
+  *out_json = args;
+  return Status::OK();
+}
+}  // namespace audio
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/inverse_mel_scale_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/inverse_mel_scale_ir.h
@ -0,0 +1,67 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_INVERSE_MEL_SCALE_IR_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_INVERSE_MEL_SCALE_IR_H_
+
+#include <map>
+#include <memory>
+#include <string>
+
+#include "include/api/status.h"
+#include "minddata/dataset/include/dataset/constants.h"
+#include "minddata/dataset/kernels/ir/tensor_operation.h"
+
+namespace mindspore {
+namespace dataset {
+namespace audio {
+constexpr char kInverseMelScaleOperation[] = "InverseMelScale";
+
+class InverseMelScaleOperation : public TensorOperation {
+ public:
+  InverseMelScaleOperation(int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min, float f_max,
+                           int32_t max_iter, float tolerance_loss, float tolerance_change,
+                           const std::map<std::string, float> &sgdargs, NormType norm, MelType mel_type);
+
+  ~InverseMelScaleOperation();
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  Status ValidateParams() override;
+
+  std::string Name() const override;
+
+  Status to_json(nlohmann::json *out_json) override;
+
+ private:
+  int32_t n_stft_;
+  int32_t n_mels_;
+  int32_t sample_rate_;
+  float f_min_;
+  float f_max_;
+  int32_t max_iter_;
+  float tolerance_loss_;
+  float tolerance_change_;
+  std::map<std::string, float> sgdargs_;
+  float sgd_lr_;
+  float sgd_momentum_;
+  NormType norm_;
+  MelType mel_type_;
+};
+}  // namespace audio
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_INVERSE_MEL_SCALE_IR_H_
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt
@ -26,6 +26,7 @@ add_library(audio-kernels OBJECT
        gain_op.cc
        griffin_lim_op.cc
        highpass_biquad_op.cc
+        inverse_mel_scale_op.cc
        lfilter_op.cc
        lowpass_biquad_op.cc
        magphase_op.cc
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc
@ -2050,5 +2050,106 @@ Status GriffinLim(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor>
                                  momentum, length, rand_init, rnd);
  }
 }
+
+template <typename T>
+Status InverseMelScaleImpl(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t n_stft,
+                           int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t max_iter,
+                           float tolerance_loss, float tolerance_change, float sgd_lr, float sgd_momentum,
+                           NormType norm, MelType mel_type, std::mt19937 rnd) {
+  f_max = f_max == 0 ? static_cast<T>(std::floor(sample_rate / 2)) : f_max;
+  // create fb mat <freq, n_mels>
+  std::shared_ptr<Tensor> freq_bin_mat;
+  RETURN_IF_NOT_OK(CreateFbanks(&freq_bin_mat, n_stft, f_min, f_max, n_mels, sample_rate, norm, mel_type));
+
+  auto fb_ptr = &*freq_bin_mat->begin<float>();
+  Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>> matrix_fb(fb_ptr, n_mels, n_stft);
+  // pack melspec <n, n_mels, time>
+  TensorShape input_shape = input->shape();
+  TensorShape input_reshape({input->Size() / input_shape[-1] / input_shape[-2], input_shape[-2], input_shape[-1]});
+  RETURN_IF_NOT_OK(input->Reshape(input_reshape));
+  CHECK_FAIL_RETURN_UNEXPECTED(n_mels == input_shape[-1 * TWO],
+                               "InverseMelScale: n_mels must be equal to the penultimate dimension of input.");
+
+  int time = input_shape[-1];
+  int freq = matrix_fb.cols();
+  // input matrix 3d
+  std::vector<T> specgram;
+  // engine for random matrix
+  std::uniform_real_distribution<T> dist(0, 1);
+  for (int channel = 0; channel < input_reshape[0]; channel++) {
+    // slice by first dimension
+    auto data_ptr = &*input->begin<T>();
+    Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> input_channel(data_ptr + time * n_mels * channel, time,
+                                                                               n_mels);
+    // init specgram at n=channel
+    Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> mat_channel =
+      Eigen::MatrixXd::Zero(time, freq).unaryExpr([&rnd, &dist](double dummy) { return dist(rnd); });
+    std::vector<T> vec_channel(mat_channel.data(), mat_channel.data() + mat_channel.size());
+    std::shared_ptr<Tensor> param_channel;
+    TensorShape output_shape = TensorShape({freq, time});
+    RETURN_IF_NOT_OK(Tensor::CreateFromVector(vec_channel, TensorShape({freq * time}), &param_channel));
+    // sgd
+    T loss = std::numeric_limits<T>::max();
+    for (int epoch = 0; epoch < max_iter; epoch++) {
+      auto pred = mat_channel * (matrix_fb.transpose().template cast<T>());
+      // cal loss with pred and gt
+      auto diff = input_channel - pred;
+      T new_loss = diff.array().square().mean();
+      // cal grad
+      auto grad = diff * (matrix_fb.template cast<T>()) * (-1) / time;
+      Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> mat_grad = grad;
+      std::vector<T> vec_grad(mat_grad.data(), mat_grad.data() + mat_grad.size());
+      std::shared_ptr<Tensor> tensor_grad;
+      RETURN_IF_NOT_OK(Tensor::CreateFromVector(vec_grad, TensorShape({grad.size()}), &tensor_grad));
+
+      std::shared_ptr<Tensor> nspec;
+      RETURN_IF_NOT_OK(SGD<T>(param_channel, &nspec, tensor_grad, sgd_lr, sgd_momentum));
+
+      T diff_loss = std::abs(loss - new_loss);
+      if ((new_loss < tolerance_loss) || (diff_loss < tolerance_change)) {
+        break;
+      }
+      loss = new_loss;
+      data_ptr = &*nspec->begin<T>();
+      mat_channel = Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>(data_ptr, time, freq);
+      // use new mat_channel to update param_channel
+      RETURN_IF_NOT_OK(Tensor::CreateFromTensor(nspec, &param_channel));
+    }
+    // clamp and transpose
+    auto res = mat_channel.cwiseMax(0);
+    Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> mat_res = res;
+    std::vector<T> spec_channel(mat_res.data(), mat_res.data() + mat_res.size());
+    specgram.insert(specgram.end(), spec_channel.begin(), spec_channel.end());
+  }
+  std::shared_ptr<Tensor> final_out;
+  if (input_shape.Size() > TWO) {
+    std::vector<int64_t> out_shape_vec = input_shape.AsVector();
+    out_shape_vec[input_shape.Size() - 1] = time;
+    out_shape_vec[input_shape.Size() - TWO] = freq;
+    TensorShape output_shape(out_shape_vec);
+    RETURN_IF_NOT_OK(Tensor::CreateFromVector(specgram, output_shape, &final_out));
+  } else {
+    TensorShape output_shape = TensorShape({input_reshape[0], freq, time});
+    RETURN_IF_NOT_OK(Tensor::CreateFromVector(specgram, output_shape, &final_out));
+  }
+  *output = final_out;
+  return Status::OK();
+}
+
+Status InverseMelScale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t n_stft,
+                       int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t max_iter,
+                       float tolerance_loss, float tolerance_change, float sgd_lr, float sgd_momentum, NormType norm,
+                       MelType mel_type, std::mt19937 rnd) {
+  std::shared_ptr<Tensor> input_tensor;
+  if (input->type() != DataType::DE_FLOAT64) {
+    RETURN_IF_NOT_OK(TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32)));
+    return InverseMelScaleImpl<float>(input_tensor, output, n_stft, n_mels, sample_rate, f_min, f_max, max_iter,
+                                      tolerance_loss, tolerance_change, sgd_lr, sgd_momentum, norm, mel_type, rnd);
+  } else {
+    input_tensor = input;
+    return InverseMelScaleImpl<double>(input_tensor, output, n_stft, n_mels, sample_rate, f_min, f_max, max_iter,
+                                       tolerance_loss, tolerance_change, sgd_lr, sgd_momentum, norm, mel_type, rnd);
+  }
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h
@ -501,6 +501,80 @@ Status Dct(std::shared_ptr<Tensor> *output, int32_t n_mfcc, int32_t n_mels, Norm
 /// \return Status code.
 Status ComplexNorm(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float power);

+/// \brief Stochastic gradient descent.
+/// \param[in] input Input tensor.
+/// \param[out] output Output tensor.
+/// \param[in] grad Input grad for params.
+/// \param[in] lr Learning rate.
+/// \param[in] momentum Momentum factor.
+/// \param[in] dampening Dampening for momentum.
+/// \param[in] weight_decay Weight decay.
+/// \param[in] nesterov Whether enable nesterov momentum.
+/// \param[in] stat Stat.
+/// \return Status code.
+template <typename T>
+Status SGD(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::shared_ptr<Tensor> &grad,
+           float lr, float momentum = 0.0, float dampening = 0.0, float weight_decay = 0.0, bool nesterov = false,
+           float stat = 0.0) {
+  size_t elem_num = input->Size();
+  std::vector<T> accum(elem_num);
+  std::shared_ptr<Tensor> output_param;
+  std::vector<T> out_param(elem_num);
+  int ind = 0;
+  auto itr_inp = input->begin<T>();
+  auto itr_grad = grad->begin<T>();
+  while (itr_inp != input->end<T>() && itr_grad != grad->end<T>()) {
+    T grad_new = (*itr_grad);
+    if (weight_decay > static_cast<float>(0.0)) {
+      grad_new += (*itr_inp) * static_cast<T>(weight_decay);
+    }
+    if (momentum > 0) {
+      if (stat > 0) {
+        accum[ind] = grad_new;
+        stat = 0;
+      } else {
+        accum[ind] = accum[ind] * momentum + (1 - static_cast<T>(dampening)) * grad_new;
+      }
+      if (nesterov) {
+        grad_new += accum[ind] * momentum;
+      } else {
+        grad_new = accum[ind];
+      }
+    }
+    out_param[ind] = (*itr_inp) - lr * grad_new;
+    itr_inp++;
+    itr_grad++;
+    ind++;
+  }
+
+  RETURN_IF_NOT_OK(Tensor::CreateFromVector(out_param, TensorShape({input->Size()}), &output_param));
+  *output = output_param;
+  return Status::OK();
+}
+
+/// \brief Use conversion matrix to solve normal STFT from mel frequency STFT.
+/// \param input Tensor of shape <..., n_mels, time>.
+/// \param output Tensor of shape <..., freq, time>.
+/// \param n_stft Number of bins in STFT, the value must be greater than 0.
+/// \param n_mels Number of mel filter, the value must be greater than 0.
+/// \param sample_rate Sample rate of the signal, the value can't be zero.
+/// \param f_min Minimum frequency, the value must be greater than or equal to 0.
+/// \param f_max Maximum frequency, the value must be greater than 0.
+/// \param max_iter Maximum number of optimization iterations, the value must be greater than 0.
+/// \param tolerance_loss Value of loss to stop optimization at, the value must be greater than or equal to 0.
+/// \param tolerance_change Difference in losses to stop optimization at, the value must be greater than or equal to 0.
+/// \param sgd_lr Learning rate for SGD optimizer, the value must be greater than or equal to 0.
+/// \param sgd_momentum Momentum factor for SGD optimizer, the value must be greater than or equal to 0.
+/// \param norm Type of norm, value should be NormType::kSlaney or NormType::kNone. If norm is NormType::kSlaney,
+///     divide the triangle mel weight by the width of the mel band.
+/// \param mel_type Type of mel, value should be MelType::kHtk or MelType::kSlaney.
+/// \param rnd Random generator.
+/// \return Status code.
+Status InverseMelScale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t n_stft,
+                       int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t max_iter,
+                       float tolerance_loss, float tolerance_change, float sgd_lr, float sgd_momentum, NormType norm,
+                       MelType mel_type, std::mt19937 rnd);
+
 /// \brief Decode mu-law encoded signal.
 /// \param input Tensor of shape <..., time>.
 /// \param output Tensor of shape <..., time>.
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/inverse_mel_scale_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/inverse_mel_scale_op.cc
@ -0,0 +1,59 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/audio/kernels/inverse_mel_scale_op.h"
+
+#include "minddata/dataset/audio/kernels/audio_utils.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/util/status.h"
+
+namespace mindspore {
+namespace dataset {
+Status InverseMelScaleOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
+  // check and init
+  IO_CHECK(input, output);
+  // check input dimension, it should be greater than 0
+  RETURN_IF_NOT_OK(ValidateLowRank("InverseMelScale", input, kDefaultAudioDim, "<..., freq, time>"));
+  // check input type, it should be [int, float, double]
+  RETURN_IF_NOT_OK(ValidateTensorNumeric("InverseMelScale", input));
+
+  return InverseMelScale(input, output, n_stft_, n_mels_, sample_rate_, f_min_, f_max_, max_iter_, tolerance_loss_,
+                         tolerance_change_, sgd_lr_, sgd_momentum_, norm_, mel_type_, rnd_);
+}
+
+Status InverseMelScaleOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) {
+  RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
+  outputs.clear();
+  auto input_size = inputs[0].AsVector();
+  input_size.pop_back();
+  TensorShape out = TensorShape(input_size);
+  outputs.emplace_back(out);
+  if (!outputs.empty()) return Status::OK();
+  return Status(StatusCode::kMDUnexpectedError, "InverseMelScale: invalid input shape.");
+}
+
+Status InverseMelScaleOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) {
+  RETURN_IF_NOT_OK(TensorOp::OutputType(inputs, outputs));
+  RETURN_IF_NOT_OK(
+    ValidateTensorType("InverseMelScale", inputs[0].IsNumeric(), "[int, float, double]", inputs[0].ToString()));
+  if (inputs[0] == DataType(DataType::DE_FLOAT64)) {
+    outputs[0] = DataType(DataType::DE_FLOAT64);
+  } else {
+    outputs[0] = DataType(DataType::DE_FLOAT32);
+  }
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/inverse_mel_scale_op.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/inverse_mel_scale_op.h
@ -0,0 +1,79 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_INVERSE_MEL_SCALE_OP_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_INVERSE_MEL_SCALE_OP_H_
+
+#include <memory>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "include/dataset/constants.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/random.h"
+
+namespace mindspore {
+namespace dataset {
+class InverseMelScaleOp : public TensorOp {
+ public:
+  InverseMelScaleOp(int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t max_iter,
+                    float tolerance_loss, float tolerance_change, float sgd_lr, float sgd_momentum, NormType norm,
+                    MelType mel_type)
+      : n_stft_(n_stft),
+        n_mels_(n_mels),
+        sample_rate_(sample_rate),
+        f_min_(f_min),
+        f_max_(f_max),
+        max_iter_(max_iter),
+        tolerance_loss_(tolerance_loss),
+        tolerance_change_(tolerance_change),
+        sgd_lr_(sgd_lr),
+        sgd_momentum_(sgd_momentum),
+        norm_(norm),
+        mel_type_(mel_type) {
+    rnd_.seed(GetSeed());
+  }
+
+  ~InverseMelScaleOp() override = default;
+
+  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+
+  std::string Name() const override { return kInverseMelScaleOp; }
+
+  Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
+
+  Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
+
+ private:
+  int32_t n_stft_;
+  int32_t n_mels_;
+  int32_t sample_rate_;
+  float f_min_;
+  float f_max_;
+  int32_t max_iter_;
+  float tolerance_loss_;
+  float tolerance_change_;
+  float sgd_lr_;
+  float sgd_momentum_;
+  NormType norm_;
+  MelType mel_type_;
+  std::mt19937 rnd_;
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_INVERSE_MEL_SCALE_OP_H_
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h
@ -18,6 +18,7 @@
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_

 #include <limits>
+#include <map>
 #include <memory>
 #include <string>
 #include <utility>
@ -585,6 +586,43 @@ class MS_API HighpassBiquad final : public TensorTransform {
  std::shared_ptr<Data> data_;
 };

+/// \brief InverseMelScale TensorTransform
+/// \notes Solve for a normal STFT from a mel frequency STFT, using a conversion matrix.
+class MS_API InverseMelScale final : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  /// \param[in] n_stft Number of bins in STFT, must be positive.
+  /// \param[in] n_mels Number of mel filter, must be positive (Default: 128).
+  /// \param[in] sample_rate Sample rate of the signal, the value can't be zero (Default: 16000).
+  /// \param[in] f_min Minimum frequency, must be non-negative (Default: 0.0).
+  /// \param[in] f_max Maximum frequency, must be non-negative (Default: 0.0, will be set to sample_rate / 2).
+  /// \param[in] max_iter Maximum number of optimization iterations, must be positive (Default: 100000).
+  /// \param[in] tolerance_loss Value of loss to stop optimization at, must be non-negative (Default: 1e-5).
+  /// \param[in] tolerance_change Difference in losses to stop optimization at, must be non-negative (Default: 1e-8).
+  /// \param[in] sgdargs Parameters of SGD optimizer, including lr, momentum
+  ///     (Default: {{"sgd_lr", 0.1}, {"sgd_momentum", 0.0}}).
+  /// \param[in] norm Type of norm, value should be NormType::kSlaney or NormType::kNone. If norm is NormType::kSlaney,
+  ///     divide the triangle mel weight by the width of the mel band (Default: NormType::kNone).
+  /// \param[in] mel_type Type of mel, value should be MelType::kHtk or MelType::kSlaney (Default: MelType::kHtk).
+  explicit InverseMelScale(int32_t n_stft, int32_t n_mels = 128, int32_t sample_rate = 16000, float f_min = 0.0,
+                           float f_max = 0.0, int32_t max_iter = 100000, float tolerance_loss = 1e-5,
+                           float tolerance_change = 1e-8,
+                           const std::map<std::string, float> &sgdargs = {{"sgd_lr", 0.1}, {"sgd_momentum", 0.0}},
+                           NormType norm = NormType::kNone, MelType mel_type = MelType::kHtk);
+
+  /// \brief Destructor.
+  ~InverseMelScale() = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+
+ private:
+  struct Data;
+  std::shared_ptr<Data> data_;
+};
+
 /// \brief Design filter. Similar to SoX implementation.
 class MS_API LFilter final : public TensorTransform {
 public:
--- a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
@ -170,6 +170,7 @@ constexpr char kFrequencyMaskingOp[] = "FrequencyMaskingOp";
 constexpr char kGainOp[] = "GainOp";
 constexpr char kGriffinLimOp[] = "GriffinLimOp";
 constexpr char kHighpassBiquadOp[] = "HighpassBiquadOp";
+constexpr char kInverseMelScaleOp[] = "InverseMelScaleOp";
 constexpr char kLFilterOp[] = "LFilterOp";
 constexpr char kLowpassBiquadOp[] = "LowpassBiquadOp";
 constexpr char kMagphaseOp[] = "MagphaseOp";
--- a/mindspore/python/mindspore/dataset/audio/transforms.py
+++ b/mindspore/python/mindspore/dataset/audio/transforms.py
@ -29,10 +29,10 @@ from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_
    check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_compute_deltas, \
    check_contrast, check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, \
    check_dither, check_equalizer_biquad, check_fade, check_flanger, check_gain, check_griffin_lim, \
-    check_highpass_biquad, check_lfilter, check_lowpass_biquad, check_magphase, check_mask_along_axis, \
-    check_mask_along_axis_iid, check_masking, check_mel_scale, check_mu_law_coding, check_overdrive, \
-    check_phase_vocoder, check_phaser, check_riaa_biquad, check_sliding_window_cmn, check_spectral_centroid, \
-    check_spectrogram, check_time_stretch, check_treble_biquad, check_vol
+    check_highpass_biquad, check_inverse_mel_scale, check_lfilter, check_lowpass_biquad, check_magphase, \
+    check_mask_along_axis, check_mask_along_axis_iid, check_masking, check_mel_scale, check_mu_law_coding, \
+    check_overdrive, check_phase_vocoder, check_phaser, check_riaa_biquad, check_sliding_window_cmn, \
+    check_spectral_centroid, check_spectrogram, check_time_stretch, check_treble_biquad, check_vol


 class AudioTensorOperation(TensorOperation):
@ -1010,6 +1010,58 @@ class HighpassBiquad(AudioTensorOperation):
        return cde.HighpassBiquadOperation(self.sample_rate, self.cutoff_freq, self.Q)


+class InverseMelScale(AudioTensorOperation):
+    """
+    Solve for a normal STFT form a mel frequency STFT, using a conversion matrix.
+
+    Args:
+        n_stft (int): Number of bins in STFT.
+        n_mels (int, optional): Number of mel filterbanks (default=128).
+        sample_rate (int, optional): Sample rate of audio signal (default=16000).
+        f_min (float, optional): Minimum frequency (default=0.0).
+        f_max (float, optional): Maximum frequency (default=None, will be set to sample_rate // 2).
+        max_iter (int, optional): Maximum number of optimization iterations (default=100000).
+        tolerance_loss (float, optional): Value of loss to stop optimization at (default=1e-5).
+        tolerance_change (float, optional): Difference in losses to stop optimization at (default=1e-8).
+        sgdargs (dict, optional): Arguments for the SGD optimizer (default=None, will be set to
+            {'sgd_lr': 0.1, 'sgd_momentum': 0.9}).
+        norm (NormType, optional): Normalization method, can be NormType.SLANEY or NormType.NONE
+            (default=NormType.NONE).
+        mel_type (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK (default=MelType.HTK).
+
+    Examples:
+        >>> import numpy as np
+        >>>
+        >>> waveform = np.random.randn(2, 2, 3, 2)
+        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
+        >>> transforms = [audio.InverseMelScale(20, 3, 16000, 0, 8000, 10)]
+        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
+    """
+
+    @check_inverse_mel_scale
+    def __init__(self, n_stft, n_mels=128, sample_rate=16000, f_min=0.0, f_max=None, max_iter=100000,
+                 tolerance_loss=1e-5, tolerance_change=1e-8, sgdargs=None, norm=NormType.NONE, mel_type=MelType.HTK):
+        self.n_stft = n_stft
+        self.n_mels = n_mels
+        self.sample_rate = sample_rate
+        self.f_min = f_min
+        self.f_max = f_max if f_max is not None else sample_rate // 2
+        self.max_iter = max_iter
+        self.tolerance_loss = tolerance_loss
+        self.tolerance_change = tolerance_change
+        if sgdargs is None:
+            self.sgdargs = {'sgd_lr': 0.1, 'sgd_momentum': 0.9}
+        else:
+            self.sgdargs = sgdargs
+        self.norm = norm
+        self.mel_type = mel_type
+
+    def parse(self):
+        return cde.InverseMelScaleOperation(self.n_stft, self.n_mels, self.sample_rate, self.f_min, self.f_max,
+                                            self.max_iter, self.tolerance_loss, self.tolerance_change, self.sgdargs,
+                                            DE_C_NORM_TYPE[self.norm], DE_C_MEL_TYPE[self.mel_type])
+
+
 class LFilter(AudioTensorOperation):
    """
    Design two-pole filter for audio waveform of dimension of (..., time).
--- a/mindspore/python/mindspore/dataset/audio/validators.py
+++ b/mindspore/python/mindspore/dataset/audio/validators.py
@ -286,6 +286,87 @@ def check_gain(method):
    return new_method


+def check_mel_scale_n_mels(n_mels):
+    """Wrapper method to check the parameters of n_mels."""
+    type_check(n_mels, (int,), "n_mels")
+    check_pos_int32(n_mels, "n_mels")
+
+
+def check_mel_scale_sample_rate(sample_rate):
+    """Wrapper method to check the parameters of sample_rate."""
+    type_check(sample_rate, (int,), "sample_rate")
+    check_pos_int32(sample_rate, "sample_rate")
+
+
+def check_mel_scale_freq(f_min, f_max, sample_rate):
+    """Wrapper method to check the parameters of f_min and f_max."""
+    type_check(f_min, (int, float), "f_min")
+    check_float32(f_min, "f_min")
+
+    if f_max is not None:
+        type_check(f_max, (int, float), "f_max")
+        check_pos_float32(f_max, "f_max")
+        if f_min >= f_max:
+            raise ValueError("MelScale: f_max should be greater than f_min.")
+    else:
+        if f_min >= sample_rate // 2:
+            raise ValueError("MelScale: sample_rate // 2 should be greater than f_min when f_max is set to None.")
+
+
+def check_mel_scale_n_stft(n_stft):
+    """Wrapper method to check the parameters of n_stft."""
+    type_check(n_stft, (int,), "n_stft")
+    check_pos_int32(n_stft, "n_stft")
+
+
+def check_mel_scale_norm(norm):
+    """Wrapper method to check the parameters of norm."""
+    type_check(norm, (NormType,), "norm")
+
+
+def check_mel_scale_mel_type(mel_type):
+    """Wrapper method to check the parameters of mel_type."""
+    type_check(mel_type, (MelType,), "mel_type")
+
+
+def check_inverse_mel_scale(method):
+    """Wrapper method to check the parameters of InverseMelScale."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [n_stft, n_mels, sample_rate, f_min, f_max, max_iter, tolerance_loss, tolerance_change, sgdargs, norm,
+         mel_type], _ = parse_user_args(method, *args, **kwargs)
+        check_mel_scale_n_mels(n_mels)
+        check_mel_scale_sample_rate(sample_rate)
+        check_mel_scale_freq(f_min, f_max, sample_rate)
+        check_mel_scale_n_stft(n_stft)
+        check_mel_scale_norm(norm)
+        check_mel_scale_mel_type(mel_type)
+
+        type_check(max_iter, (int,), "max_iter")
+        check_pos_int32(max_iter, "max_iter")
+
+        type_check(tolerance_loss, (int, float), "tolerance_loss")
+        check_pos_float32(tolerance_loss, "tolerance_loss")
+
+        type_check(tolerance_change, (int, float), "tolerance_change")
+        check_pos_float32(tolerance_change, "tolerance_change")
+
+        if sgdargs is not None:
+            sgd_lr = sgdargs["sgd_lr"]
+            sgd_momentum = sgdargs["sgd_momentum"]
+
+            type_check(sgd_lr, (int, float), "sgd_lr")
+            check_non_negative_float32(sgd_lr, "sgd_lr")
+
+            type_check(sgd_momentum, (int, float), "sgd_momentum")
+            check_non_negative_float32(sgd_momentum, "sgd_momentum")
+
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
 def check_lfilter(method):
    """Wrapper method to check the parameters of LFilter."""

@ -519,31 +600,12 @@ def check_mel_scale(method):
    @wraps(method)
    def new_method(self, *args, **kwargs):
        [n_mels, sample_rate, f_min, f_max, n_stft, norm, mel_type], _ = parse_user_args(method, *args, **kwargs)
-
-        type_check(n_mels, (int,), "n_mels")
-        check_pos_int32(n_mels, "n_mels")
-
-        type_check(sample_rate, (int,), "sample_rate")
-        check_pos_int32(sample_rate, "sample_rate")
-
-        type_check(f_min, (int, float), "f_min")
-        check_float32(f_min, "f_min")
-
-        if f_max is not None:
-            type_check(f_max, (int, float), "f_max")
-            check_pos_float32(f_max, "f_max")
-            if f_min >= f_max:
-                raise ValueError("MelScale: f_max should be greater than f_min.")
-        else:
-            if f_min >= sample_rate // 2:
-                raise ValueError("MelScale: sample_rate // 2 should be greater than f_min when f_max is set to None.")
-
-        type_check(n_stft, (int,), "n_stft")
-        check_pos_int32(n_stft, "n_stft")
-
-        type_check(norm, (NormType,), "norm")
-
-        type_check(mel_type, (MelType,), "mel_type")
+        check_mel_scale_n_mels(n_mels)
+        check_mel_scale_sample_rate(sample_rate)
+        check_mel_scale_freq(f_min, f_max, sample_rate)
+        check_mel_scale_n_stft(n_stft)
+        check_mel_scale_norm(norm)
+        check_mel_scale_mel_type(mel_type)

        return method(self, *args, **kwargs)

--- a/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc
+++ b/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc
@ -936,6 +936,164 @@ TEST_F(MindDataTestPipeline, TestHighpassBiquadWrongArgs) {
  EXPECT_EQ(iter02, nullptr);
 }

+/// Feature: InverseMelScale
+/// Description: test basic usage of InverseMelScale
+/// Expectation: get correct number of data
+TEST_F(MindDataTestPipeline, TestInverseMelScalePipeline) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestInverseMelScalePipeline.";
+  // Original waveform
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {4, 3, 7}));
+  std::shared_ptr<Dataset> ds = RandomData(10, schema);
+  EXPECT_NE(ds, nullptr);
+
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+
+  auto inverse_mel_scale_op1 = audio::InverseMelScale(20, 3, 16000, 0, 8000, 10);
+  ds = ds->Map({inverse_mel_scale_op1});
+  EXPECT_NE(ds, nullptr);
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(ds, nullptr);
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+  std::vector<int64_t> expected = {4, 20, 7};
+  int i = 0;
+  while (row.size() != 0) {
+    auto col = row["waveform"];
+    ASSERT_EQ(col.Shape(), expected);
+    ASSERT_EQ(col.Shape().size(), 3);
+    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+  EXPECT_EQ(i, 10);
+  iter->Stop();
+
+  std::shared_ptr<SchemaObj> schema2 = Schema();
+  ASSERT_OK(schema2->add_column("waveform", mindspore::DataType::kNumberTypeFloat64, {10, 20, 30}));
+  ds = RandomData(10, schema2);
+  EXPECT_NE(ds, nullptr);
+  auto inverse_mel_scale_op2 = audio::InverseMelScale(128, 20, 16000, 0, 8000, 100);
+  ds = ds->Map({inverse_mel_scale_op2});
+  EXPECT_NE(ds, nullptr);
+  iter = ds->CreateIterator();
+  EXPECT_NE(ds, nullptr);
+  ASSERT_OK(iter->GetNextRow(&row));
+  expected = {10, 128, 30};
+  i = 0;
+  while (row.size() != 0) {
+    auto col = row["waveform"];
+    ASSERT_EQ(col.Shape(), expected);
+    ASSERT_EQ(col.Shape().size(), 3);
+    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat64);
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+  EXPECT_EQ(i, 10);
+  iter->Stop();
+
+  std::shared_ptr<SchemaObj> schema3 = Schema();
+  ASSERT_OK(schema3->add_column("waveform", mindspore::DataType::kNumberTypeInt16, {3, 4, 5}));
+  ds = RandomData(10, schema3);
+  EXPECT_NE(ds, nullptr);
+  auto inverse_mel_scale_op3 = audio::InverseMelScale(128, 4, 16000, 0, 8000, 100);
+  ds = ds->Map({inverse_mel_scale_op3});
+  EXPECT_NE(ds, nullptr);
+  iter = ds->CreateIterator();
+  EXPECT_NE(ds, nullptr);
+  ASSERT_OK(iter->GetNextRow(&row));
+  expected = {3, 128, 5};
+  i = 0;
+  while (row.size() != 0) {
+    auto col = row["waveform"];
+    ASSERT_EQ(col.Shape(), expected);
+    ASSERT_EQ(col.Shape().size(), 3);
+    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+  EXPECT_EQ(i, 10);
+  iter->Stop();
+
+  std::shared_ptr<SchemaObj> schema4 = Schema();
+  ASSERT_OK(schema4->add_column("waveform", mindspore::DataType::kNumberTypeInt16, {4, 20}));
+  ds = RandomData(10, schema4);
+  EXPECT_NE(ds, nullptr);
+  auto inverse_mel_scale_op4 = audio::InverseMelScale(20, 4, 16000, 0, 8000, 100);
+  ds = ds->Map({inverse_mel_scale_op4});
+  EXPECT_NE(ds, nullptr);
+  iter = ds->CreateIterator();
+  EXPECT_NE(ds, nullptr);
+  ASSERT_OK(iter->GetNextRow(&row));
+  expected = {1, 20, 20};
+  i = 0;
+  while (row.size() != 0) {
+    auto col = row["waveform"];
+    ASSERT_EQ(col.Shape(), expected);
+    ASSERT_EQ(col.Shape().size(), 3);
+    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+  EXPECT_EQ(i, 10);
+  iter->Stop();
+}
+
+/// Feature: InverseMelScale
+/// Description: test WrongArg of InverseMelScale
+/// Expectation: return error
+TEST_F(MindDataTestPipeline, TestInverseMelScaleWrongArgs) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestInverseMelScaleWrongArgs.";
+  // MelScale: f_max must be greater than f_min.
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {3, 4, 5}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  EXPECT_NE(ds, nullptr);
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+  auto inverse_mel_scale_op = audio::InverseMelScale(128, 4, 1000, -100, -100);
+  ds = ds->Map({inverse_mel_scale_op});
+  EXPECT_NE(ds, nullptr);
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_EQ(iter, nullptr);
+
+  // MelScale: n_mels must be greater than 0.
+  inverse_mel_scale_op = audio::InverseMelScale(-128, 16000, 1000, 10, 100);
+  ds = ds->Map({inverse_mel_scale_op});
+  EXPECT_NE(ds, nullptr);
+  iter = ds->CreateIterator();
+  EXPECT_EQ(iter, nullptr);
+
+  // MelScale: sample_rate must be greater than f_min.
+  inverse_mel_scale_op = audio::InverseMelScale(128, -16000, 1000, 10, 100);
+  ds = ds->Map({inverse_mel_scale_op});
+  EXPECT_NE(ds, nullptr);
+  iter = ds->CreateIterator();
+  EXPECT_EQ(iter, nullptr);
+
+  // MelScale: max_iter must be greater than 0.
+  inverse_mel_scale_op = audio::InverseMelScale(128, 16000, 1000, 10, 100, -10);
+  ds = ds->Map({inverse_mel_scale_op});
+  EXPECT_NE(ds, nullptr);
+  iter = ds->CreateIterator();
+  EXPECT_EQ(iter, nullptr);
+
+  // MelScale: tolerance_loss must be greater than 0.
+  inverse_mel_scale_op = audio::InverseMelScale(128, 16000, 1000, 10, 100, 10, -10);
+  ds = ds->Map({inverse_mel_scale_op});
+  EXPECT_NE(ds, nullptr);
+  iter = ds->CreateIterator();
+  EXPECT_EQ(iter, nullptr);
+
+  // MelScale: tolerance_change must be greater than 0.
+  inverse_mel_scale_op = audio::InverseMelScale(128, 16000, 1000, 10, 100, 10, 10, -10);
+  ds = ds->Map({inverse_mel_scale_op});
+  EXPECT_NE(ds, nullptr);
+  iter = ds->CreateIterator();
+  EXPECT_EQ(iter, nullptr);
+}
+
 /// Feature: MelscaleFbanks.
 /// Description: Test normal operation.
 /// Expectation: As expected.
--- a/tests/ut/cpp/dataset/execute_test.cc
+++ b/tests/ut/cpp/dataset/execute_test.cc
@ -1000,6 +1000,29 @@ TEST_F(MindDataTestExecute, TestHighpassBiquadParamCheckSampleRate) {
  ASSERT_FALSE(rc.IsOk());
 }

+// Feature: InverseMelScale
+// Description: test InverseMelScale in eager mode
+// Expectation: the data is processed successfully
+TEST_F(MindDataTestExecute, TestInverseMelScale) {
+  MS_LOG(INFO) << "Doing MindDataTestExecute-TestInverseMelScale.";
+  // Original waveform
+  std::vector<float> labels = {
+    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
+    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
+    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
+    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
+  std::shared_ptr<Tensor> input;
+  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 2, 3, 2}), &input));
+  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
+  std::shared_ptr<TensorTransform> inverse_mel_op = std::make_shared<audio::InverseMelScale>(20, 3, 16000, 0, 8000, 10);
+  // apply inverse mel scale
+  mindspore::dataset::Execute trans({inverse_mel_op});
+  Status status = trans(input_ms, &input_ms);
+  EXPECT_TRUE(status.IsOk());
+}
+
 TEST_F(MindDataTestExecute, TestMuLawDecodingEager) {
  MS_LOG(INFO) << "Doing MindDataTestExecute-TestMuLawDecodingEager.";
  // testing
--- a/tests/ut/data/dataset/audiorecord/inverse_mel_scale_20x40_out.npy
+++ b/tests/ut/data/dataset/audiorecord/inverse_mel_scale_20x40_out.npy
--- a/tests/ut/data/dataset/audiorecord/inverse_mel_scale_32x81.npy
+++ b/tests/ut/data/dataset/audiorecord/inverse_mel_scale_32x81.npy
--- a/tests/ut/data/dataset/audiorecord/inverse_mel_scale_40x160_out.npy
+++ b/tests/ut/data/dataset/audiorecord/inverse_mel_scale_40x160_out.npy
--- a/tests/ut/data/dataset/audiorecord/inverse_mel_scale_40x80_out.npy
+++ b/tests/ut/data/dataset/audiorecord/inverse_mel_scale_40x80_out.npy
--- a/tests/ut/data/dataset/audiorecord/inverse_mel_scale_4x160.npy
+++ b/tests/ut/data/dataset/audiorecord/inverse_mel_scale_4x160.npy
--- a/tests/ut/data/dataset/audiorecord/inverse_mel_scale_4x80.npy
+++ b/tests/ut/data/dataset/audiorecord/inverse_mel_scale_4x80.npy
--- a/tests/ut/data/dataset/audiorecord/inverse_mel_scale_80x81_out.npy
+++ b/tests/ut/data/dataset/audiorecord/inverse_mel_scale_80x81_out.npy
--- a/tests/ut/data/dataset/audiorecord/inverse_mel_scale_8x40.npy
+++ b/tests/ut/data/dataset/audiorecord/inverse_mel_scale_8x40.npy
--- a/tests/ut/python/dataset/test_inverse_mel_scale.py
+++ b/tests/ut/python/dataset/test_inverse_mel_scale.py
@ -0,0 +1,155 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing InverseMelScale op in DE
+"""
+import numpy as np
+import pytest
+
+import mindspore.dataset as ds
+import mindspore.dataset.audio.transforms as c_audio
+from mindspore import log as logger
+from mindspore.dataset.audio.utils import MelType, NormType
+
+DATA_DIR = "../data/dataset/audiorecord/"
+
+
+def get_ratio(mat):
+    return mat.sum() / mat.size
+
+
+def test_inverse_mel_scale_pipeline():
+    """
+    Feature: InverseMelScale
+    Description: test InverseMelScale cpp op in pipeline
+    Expectation: equal results from Mindspore and benchmark
+    """
+    in_data = np.load(DATA_DIR + "inverse_mel_scale_8x40.npy")[np.newaxis, :]
+    out_expect = np.load(DATA_DIR + 'inverse_mel_scale_20x40_out.npy')[np.newaxis, :]
+    dataset = ds.NumpySlicesDataset(in_data, column_names=["multi_dimensional_data"], shuffle=False)
+    transforms = [c_audio.InverseMelScale(n_stft=20, n_mels=8, sample_rate=8000,
+                                          sgdargs={'sgd_lr': 0.05, 'sgd_momentum': 0.9})]
+    dataset = dataset.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
+        out_data = item["multi_dimensional_data"]
+        epsilon = 1e-60
+        relative_diff = np.abs((out_data - out_expect) / (out_expect + epsilon))
+        assert get_ratio(relative_diff < 1e-1) > 1e-2
+
+    in_data = np.load(DATA_DIR + "inverse_mel_scale_4x80.npy")[np.newaxis, :]
+    out_expect = np.load(DATA_DIR + 'inverse_mel_scale_40x80_out.npy')[np.newaxis, :]
+    dataset = ds.NumpySlicesDataset(in_data, column_names=["multi_dimensional_data"], shuffle=False)
+    transforms = [c_audio.InverseMelScale(n_stft=40, n_mels=4,
+                                          sgdargs={'sgd_lr': 0.01, 'sgd_momentum': 0.9})]
+    dataset = dataset.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
+        out_data = item["multi_dimensional_data"]
+        epsilon = 1e-60
+        relative_diff = np.abs((out_data - out_expect) / (out_expect + epsilon))
+        assert get_ratio(relative_diff < 1e-1) > 1e-2
+
+    in_data = np.load(DATA_DIR + "inverse_mel_scale_4x160.npy")[np.newaxis, :]
+    out_expect = np.load(DATA_DIR + 'inverse_mel_scale_40x160_out.npy')[np.newaxis, :]
+    dataset = ds.NumpySlicesDataset(in_data, column_names=["multi_dimensional_data"], shuffle=False)
+    transforms = [c_audio.InverseMelScale(n_stft=40, n_mels=4, f_min=10,
+                                          sgdargs={'sgd_lr': 0.1, 'sgd_momentum': 0.8})]
+    dataset = dataset.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
+        out_data = item["multi_dimensional_data"]
+        epsilon = 1e-60
+        relative_diff = np.abs((out_data - out_expect) / (out_expect + epsilon))
+        assert get_ratio(relative_diff < 1e-1) > 1e-2
+
+
+def test_inverse_mel_scale_pipeline_invalid_param():
+    """
+    Feature: InverseMelScale
+    Description: test InverseMelScale with invalid input parameters
+    Expectation: throw ValueError or TypeError
+    """
+    logger.info("test InverseMelScale op with default values")
+    in_data = np.load(DATA_DIR + "inverse_mel_scale_32x81.npy")[np.newaxis, :]
+    data1 = ds.GeneratorDataset(in_data, column_names=["multi_dimensional_data"])
+    # f_min and f_max
+    with pytest.raises(ValueError,
+                       match="MelScale: f_max should be greater than f_min."):
+        transforms = [c_audio.InverseMelScale(n_mels=20, n_stft=128, sample_rate=16200, f_min=1000, f_max=1000)]
+        data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+        for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
+            _ = item["multi_dimensional_data"]
+    # n_mel
+    with pytest.raises(ValueError, match=r"Input n_mels is not within the required interval of \[1, 2147483647\]."):
+        transforms = [c_audio.InverseMelScale(n_mels=-1, n_stft=2000, sample_rate=16200, f_min=10, f_max=1000)]
+        data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    # sample_rate
+    with pytest.raises(ValueError,
+                       match=r"Input sample_rate is not within the required interval of \[1, 2147483647\]."):
+        transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=0, f_min=10, f_max=1000)]
+        data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    # f_max
+    with pytest.raises(ValueError, match=r"Input f_max is not within the required interval of \(0, 16777216\]."):
+        transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10, f_max=-10)]
+        data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    # norm
+    with pytest.raises(TypeError, match=r"Argument norm with value slaney is not of type \[<enum 'NormType'>\], " +
+                       "but got <class 'str'>."):
+        transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10,
+                                              f_max=1000, norm="slaney", mel_type=MelType.SLANEY)]
+        data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    # mel_type
+    with pytest.raises(TypeError, match=r"Argument mel_type with value SLANEY is not of type \[<enum 'MelType'>\], " +
+                       "but got <class 'str'>."):
+        transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10, f_max=1000,
+                                              norm=NormType.NONE, mel_type="SLANEY")]
+        data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    # max_iter
+    with pytest.raises(ValueError, match=r"Input max_iter is not within the required interval of \[1, 2147483647\]."):
+        transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10, f_max=1000,
+                                              norm=NormType.NONE, mel_type=MelType.SLANEY, max_iter=-10)]
+        data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    # tolerance_loss
+    with pytest.raises(ValueError,
+                       match=r"Input tolerance_loss is not within the required interval of \(0, 16777216\]."):
+        transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10, f_max=1000,
+                                              norm=NormType.NONE, mel_type=MelType.SLANEY, tolerance_loss=-10)]
+        data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    # tolerance_change
+    with pytest.raises(ValueError,
+                       match=r"Input tolerance_change is not within the required interval of \(0, 16777216\]."):
+        transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10, f_max=1000,
+                                              norm=NormType.NONE, mel_type=MelType.SLANEY, tolerance_change=-10)]
+        data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+
+
+def test_inverse_mel_scale_eager():
+    """
+    Feature: InverseMelScale
+    Description: test InverseMelScale cpp op with eager mode
+    Expectation: equal results from Mindspore and benchmark
+    """
+    spectrogram = np.load(DATA_DIR + 'inverse_mel_scale_32x81.npy')
+    out_ms = c_audio.InverseMelScale(n_stft=80, n_mels=32)(spectrogram)
+    out_expect = np.load(DATA_DIR + 'inverse_mel_scale_80x81_out.npy')
+
+    epsilon = 1e-60
+    relative_diff = np.abs((out_ms - out_expect) / (out_expect + epsilon))
+    assert get_ratio(relative_diff < 1e-1) > 1e-2
+    assert get_ratio(relative_diff < 1e-3) > 1e-3
+
+
+if __name__ == "__main__":
+    test_inverse_mel_scale_pipeline()
+    test_inverse_mel_scale_pipeline_invalid_param()
+    test_inverse_mel_scale_eager()