[assistant][ops][I5EWI6] Add new data operator MFCC.

2022-12-08 16:59:22 +08:00 · 2022-12-08 16:59:22 +08:00 · ef952d9977
parent d870c9090c
commit ef952d9977
21 changed files with 1183 additions and 4 deletions
--- a/docs/api/api_python/dataset_audio/mindspore.dataset.audio.MFCC.rst
+++ b/docs/api/api_python/dataset_audio/mindspore.dataset.audio.MFCC.rst
@ -0,0 +1,27 @@
+mindspore.dataset.audio.MFCC
+============================
+
+.. py:class:: mindspore.dataset.audio.MFCC(sample_rate=16000, n_mfcc=40, dct_type=2, norm=NormMode.ORTHO, log_mels=False, melkwargs=None)
+
+    计算音频信号的梅尔频率倒谱系数。
+
+    参数：
+        - **sample_rate** (int, 可选) - 采样频率（单位：Hz），不能小于零。默认值：16000。
+        - **n_mfcc** (int, 可选) - 要保留的梅尔频率倒谱系数数，不能小于零。默认：40。
+        - **dct_type** (int, 可选) - 要使用的离散余弦变换类型（离散余弦变换），只能为2。默认：2。
+        - **norm** (NormMode, 可选) - 要使用的标准类型。默认：NormMode.ORTHO。
+        - **log_mels** (bool, 可选) - 是否使用梅尔对数谱图而不是分贝刻度。默认：False。
+        - **melkwargs** (dict, 可选) - 梅尔频谱的参数，如果为None则使用默认参数。默认：None，会被设置为
+          `{'n_fft': 400, 'win_length': n_fft, 'hop_length': win_length // 2, 'f_min' : 0.0, 'f_max' : sample_rate // 2,
+            'pad': 0, 'window': WindowType.HANN, 'power': 2.0, 'normalized': False, 'center': True, 'pad_mode': BorderType.REFLECT,
+            'onesided': True, 'norm' : NormType.NONE, 'mel_scale' : MelType.HTK}` 。
+
+    异常：
+        - **TypeError** - 如果 `sample_rate` 的类型不为int。
+        - **TypeError** - 如果 `log_mels` 的类型不为bool。
+        - **TypeError** - 如果 `norm` 的类型不为 :class:`mindspore.dataset.audio.utils.NormMode` 。
+        - **TypeError** - 如果 `n_mfcc` 的类型不为int。
+        - **TypeError** - 如果 `melkwargs` 的类型不为dict。
+        - **ValueError** - 如果 `sample_rate` 为负数。
+        - **ValueError** - 如果 `n_mfcc` 为负数。
+        - **ValueError** - 如果 `dct_type` 不为2。
--- a/docs/api/api_python/mindspore.dataset.transforms.rst
+++ b/docs/api/api_python/mindspore.dataset.transforms.rst
@ -388,6 +388,7 @@ API样例中常用的导入模块如下：
    mindspore.dataset.audio.MaskAlongAxis
    mindspore.dataset.audio.MaskAlongAxisIID
    mindspore.dataset.audio.MelScale
+    mindspore.dataset.audio.MFCC
    mindspore.dataset.audio.MuLawDecoding
    mindspore.dataset.audio.MuLawEncoding
    mindspore.dataset.audio.Overdrive
--- a/docs/api/api_python_en/mindspore.dataset.transforms.rst
+++ b/docs/api/api_python_en/mindspore.dataset.transforms.rst
@ -245,6 +245,7 @@ Transforms
    mindspore.dataset.audio.MaskAlongAxis
    mindspore.dataset.audio.MaskAlongAxisIID
    mindspore.dataset.audio.MelScale
+    mindspore.dataset.audio.MFCC
    mindspore.dataset.audio.MuLawDecoding
    mindspore.dataset.audio.MuLawEncoding
    mindspore.dataset.audio.Overdrive
--- a/mindspore/ccsrc/minddata/dataset/api/audio.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/audio.cc
@ -48,6 +48,7 @@
 #include "minddata/dataset/audio/ir/kernels/mask_along_axis_iid_ir.h"
 #include "minddata/dataset/audio/ir/kernels/mask_along_axis_ir.h"
 #include "minddata/dataset/audio/ir/kernels/mel_scale_ir.h"
+#include "minddata/dataset/audio/ir/kernels/mfcc_ir.h"
 #include "minddata/dataset/audio/ir/kernels/mu_law_decoding_ir.h"
 #include "minddata/dataset/audio/ir/kernels/mu_law_encoding_ir.h"
 #include "minddata/dataset/audio/ir/kernels/overdrive_ir.h"
@ -710,6 +711,71 @@ Status MelscaleFbanks(MSTensor *output, int32_t n_freqs, float f_min, float f_ma
  return Status::OK();
 }

+// MFCC Transform Operation.
+struct MFCC::Data {
+  Data(int32_t sample_rate, int32_t n_mfcc, int32_t dct_type, NormMode norm, bool log_mels, int32_t n_fft,
+       int32_t win_length, int32_t hop_length, float f_min, float f_max, int32_t pad, int32_t n_mels, WindowType window,
+       float power, bool normalized, bool center, BorderType pad_mode, bool onesided, NormType norm_mel,
+       MelType mel_scale)
+      : sample_rate_(sample_rate),
+        n_mfcc_(n_mfcc),
+        dct_type_(dct_type),
+        norm_(norm),
+        log_mels_(log_mels),
+        n_fft_(n_fft),
+        win_length_(win_length),
+        hop_length_(hop_length),
+        f_min_(f_min),
+        f_max_(f_max),
+        pad_(pad),
+        n_mels_(n_mels),
+        window_(window),
+        power_(power),
+        normalized_(normalized),
+        center_(center),
+        pad_mode_(pad_mode),
+        onesided_(onesided),
+        norm_mel_(norm_mel),
+        mel_scale_(mel_scale) {}
+  int32_t sample_rate_;
+  int32_t n_mfcc_;
+  int32_t dct_type_;
+  NormMode norm_;
+  bool log_mels_;
+  int32_t n_fft_;
+  int32_t win_length_;
+  int32_t hop_length_;
+  float f_min_;
+  float f_max_;
+  int32_t pad_;
+  int32_t n_mels_;
+  WindowType window_;
+  float power_;
+  bool normalized_;
+  bool center_;
+  BorderType pad_mode_;
+  bool onesided_;
+  NormType norm_mel_;
+  MelType mel_scale_;
+  std::map<std::string, std::string> melkwargs_;
+};
+
+MFCC::MFCC(int32_t sample_rate, int32_t n_mfcc, int32_t dct_type, NormMode norm, bool log_mels, int32_t n_fft,
+           int32_t win_length, int32_t hop_length, float f_min, float f_max, int32_t pad, int32_t n_mels,
+           WindowType window, float power, bool normalized, bool center, BorderType pad_mode, bool onesided,
+           NormType norm_mel, MelType mel_scale)
+    : data_(std::make_shared<Data>(sample_rate, n_mfcc, dct_type, norm, log_mels, n_fft, win_length, hop_length, f_min,
+                                   f_max, pad, n_mels, window, power, normalized, center, pad_mode, onesided, norm_mel,
+                                   mel_scale)) {}
+
+std::shared_ptr<TensorOperation> MFCC::Parse() {
+  return std::make_shared<MFCCOperation>(data_->sample_rate_, data_->n_mfcc_, data_->dct_type_, data_->norm_,
+                                         data_->log_mels_, data_->n_fft_, data_->win_length_, data_->hop_length_,
+                                         data_->f_min_, data_->f_max_, data_->pad_, data_->n_mels_, data_->window_,
+                                         data_->power_, data_->normalized_, data_->center_, data_->pad_mode_,
+                                         data_->onesided_, data_->norm_mel_, data_->mel_scale_);
+}
+
 // MuLawDecoding Transform Operation.
 struct MuLawDecoding::Data {
  explicit Data(int32_t quantization_channels) : quantization_channels_(quantization_channels) {}
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc
@ -52,6 +52,7 @@
 #include "minddata/dataset/audio/ir/kernels/mask_along_axis_iid_ir.h"
 #include "minddata/dataset/audio/ir/kernels/mask_along_axis_ir.h"
 #include "minddata/dataset/audio/ir/kernels/mel_scale_ir.h"
+#include "minddata/dataset/audio/ir/kernels/mfcc_ir.h"
 #include "minddata/dataset/audio/ir/kernels/mu_law_decoding_ir.h"
 #include "minddata/dataset/audio/ir/kernels/mu_law_encoding_ir.h"
 #include "minddata/dataset/audio/ir/kernels/overdrive_ir.h"
@ -479,6 +480,31 @@ PYBIND_REGISTER(MelScaleOperation, 1, ([](const py::module *m) {
                      }));
                }));

+PYBIND_REGISTER(MFCCOperation, 1, ([](const py::module *m) {
+                  (void)py::class_<audio::MFCCOperation, TensorOperation, std::shared_ptr<audio::MFCCOperation>>(
+                    *m, "MFCCOperation")
+                    .def(py::init([](int32_t sample_rate, int32_t n_mfcc, int32_t dct_type, NormMode norm,
+                                     bool log_mels, const py::dict &melkwargs, WindowType window, BorderType pad_mode,
+                                     NormType norm_mel, MelType mel_scale) {
+                      int32_t n_fft = py::cast<int>(melkwargs["n_fft"]);
+                      int32_t win_length = py::cast<int>(melkwargs["win_length"]);
+                      int32_t hop_length = py::cast<int>(melkwargs["hop_length"]);
+                      float f_min = py::cast<float>(melkwargs["f_min"]);
+                      float f_max = py::cast<float>(melkwargs["f_max"]);
+                      int32_t pad = py::cast<int>(melkwargs["pad"]);
+                      int32_t n_mels = py::cast<int>(melkwargs["n_mels"]);
+                      float power = py::cast<float>(melkwargs["power"]);
+                      bool normalized = py::cast<bool>(melkwargs["normalized"]);
+                      bool center = py::cast<bool>(melkwargs["center"]);
+                      bool onesided = py::cast<bool>(melkwargs["onesided"]);
+                      auto mfcc = std::make_shared<audio::MFCCOperation>(
+                        sample_rate, n_mfcc, dct_type, norm, log_mels, n_fft, win_length, hop_length, f_min, f_max, pad,
+                        n_mels, window, power, normalized, center, pad_mode, onesided, norm_mel, mel_scale);
+                      THROW_IF_ERROR(mfcc->ValidateParams());
+                      return mfcc;
+                    }));
+                }));
+
 PYBIND_REGISTER(
  MuLawDecodingOperation, 1, ([](const py::module *m) {
    (void)py::class_<audio::MuLawDecodingOperation, TensorOperation, std::shared_ptr<audio::MuLawDecodingOperation>>(
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt
@ -34,6 +34,7 @@ add_library(audio-ir-kernels OBJECT
        mask_along_axis_iid_ir.cc
        mask_along_axis_ir.cc
        mel_scale_ir.cc
+        mfcc_ir.cc
        mu_law_decoding_ir.cc
        mu_law_encoding_ir.cc
        overdrive_ir.cc
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/mfcc_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/mfcc_ir.cc
@ -0,0 +1,127 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "minddata/dataset/audio/ir/kernels/mfcc_ir.h"
+
+#include "minddata/dataset/audio/ir/validators.h"
+#include "minddata/dataset/audio/kernels/audio_utils.h"
+#include "minddata/dataset/audio/kernels/mfcc_op.h"
+
+namespace mindspore {
+namespace dataset {
+namespace audio {
+MFCCOperation::MFCCOperation(int32_t sample_rate, int32_t n_mfcc, int32_t dct_type, NormMode norm, bool log_mels,
+                             int32_t n_fft, int32_t win_length, int32_t hop_length, float f_min, float f_max,
+                             int32_t pad, int32_t n_mels, WindowType window, float power, bool normalized, bool center,
+                             BorderType pad_mode, bool onesided, NormType norm_mel, MelType mel_scale)
+    : sample_rate_(sample_rate),
+      n_mfcc_(n_mfcc),
+      dct_type_(dct_type),
+      norm_(norm),
+      log_mels_(log_mels),
+      n_fft_(n_fft),
+      win_length_(win_length),
+      hop_length_(hop_length),
+      f_min_(f_min),
+      f_max_(f_max),
+      pad_(pad),
+      n_mels_(n_mels),
+      window_(window),
+      power_(power),
+      normalized_(normalized),
+      center_(center),
+      pad_mode_(pad_mode),
+      onesided_(onesided),
+      norm_mel_(norm_mel),
+      mel_scale_(mel_scale) {}
+
+MFCCOperation::~MFCCOperation() = default;
+
+std::string MFCCOperation::Name() const { return kMFCCOperation; }
+
+Status MFCCOperation::ValidateParams() {
+  RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("MFCC", "sample_rate", sample_rate_));
+  RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("MFCC", "n_mfcc", n_mfcc_));
+  CHECK_FAIL_RETURN_UNEXPECTED(dct_type_ == TWO,
+                               "MFCC: dct_type must be equal to 2, but got: " + std::to_string(dct_type_));
+  RETURN_IF_NOT_OK(ValidateFloatScalarNonNegative("MFCC", "f_max", f_max_));
+  CHECK_FAIL_RETURN_UNEXPECTED(n_mfcc_ <= n_mels_,
+                               "MFCC: n_mels should be greater than or equal to n_mfcc, but got n_mfcc: " +
+                                 std::to_string(n_mfcc_) + " and n_mels: " + std::to_string(n_mels_));
+  // MelSpectrogram params
+  RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("MFCC", "n_mels", n_mels_));
+  RETURN_IF_NOT_OK(ValidateIntScalarPositive("MFCC", "n_fft", n_fft_));
+  RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("MFCC", "win_length", win_length_));
+  RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("MFCC", "hop_length", hop_length_));
+  RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("MFCC", "pad", pad_));
+  RETURN_IF_NOT_OK(ValidateIntScalarPositive("MFCC", "power", power_));
+  RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("MFCC", "n_mels", n_mels_));
+  CHECK_FAIL_RETURN_UNEXPECTED(pad_mode_ != BorderType::kEdge, "MFCC: invalid BorderType, kEdge is not supported.");
+  if (f_max_ != 0) {
+    RETURN_IF_NOT_OK(ValidateFloatScalarNonNegative("MFCC", "f_max", f_max_));
+    CHECK_FAIL_RETURN_UNEXPECTED(f_min_ <= f_max_,
+                                 "MFCC: f_max must be greater than or equal to f_min, but got "
+                                 "f_max: " +
+                                   std::to_string(f_max_) + " and f_min: " + std::to_string(f_min_));
+  } else {
+    CHECK_FAIL_RETURN_UNEXPECTED(f_min_ < (sample_rate_ * HALF),
+                                 "MFCC: f_min must be less than half of sample_rate when f_max is 0, but got"
+                                 " f_min: " +
+                                   std::to_string(f_min_));
+  }
+  CHECK_FAIL_RETURN_UNEXPECTED(win_length_ <= n_fft_,
+                               "MFCC: win_length must be less than or equal to n_fft, but got win_length: " +
+                                 std::to_string(win_length_) + ", n_fft: " + std::to_string(n_fft_));
+  return Status::OK();
+}
+
+std::shared_ptr<TensorOp> MFCCOperation::Build() {
+  win_length_ = win_length_ == 0 ? n_fft_ : win_length_;
+  hop_length_ = hop_length_ == 0 ? (win_length_ / TWO) : hop_length_;
+  f_max_ = f_max_ == 0 ? (sample_rate_ / TWO) : f_max_;
+  std::shared_ptr<MFCCOp> tensor_op = std::make_shared<MFCCOp>(
+    sample_rate_, n_mfcc_, dct_type_, log_mels_, n_fft_, win_length_, hop_length_, f_min_, f_max_, pad_, n_mels_,
+    window_, power_, normalized_, center_, pad_mode_, onesided_, norm_mel_, norm_, mel_scale_);
+  return tensor_op;
+}
+
+Status MFCCOperation::to_json(nlohmann::json *out_json) {
+  nlohmann::json args;
+  args["sample_rate"] = sample_rate_;
+  args["n_mfcc"] = n_mfcc_;
+  args["dct_type"] = dct_type_;
+  args["norm"] = norm_;
+  args["log_mels"] = log_mels_;
+  args["n_fft"] = n_fft_;
+  args["win_length"] = win_length_;
+  args["hop_length"] = hop_length_;
+  args["f_min"] = f_min_;
+  args["f_max"] = f_max_;
+  args["pad"] = pad_;
+  args["n_mels"] = n_mels_;
+  args["window"] = window_;
+  args["power"] = power_;
+  args["normalized"] = normalized_;
+  args["center"] = center_;
+  args["pad_mode"] = pad_mode_;
+  args["onesided"] = onesided_;
+  args["norm_mel"] = norm_mel_;
+  args["mel_scale"] = mel_scale_;
+  *out_json = args;
+  return Status::OK();
+}
+}  // namespace audio
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/mfcc_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/mfcc_ir.h
@ -0,0 +1,96 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_MFCC_IR_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_MFCC_IR_H_
+
+#include <map>
+#include <memory>
+#include <string>
+
+#include "include/api/status.h"
+#include "minddata/dataset/include/dataset/constants.h"
+#include "minddata/dataset/kernels/ir/tensor_operation.h"
+
+namespace mindspore {
+namespace dataset {
+namespace audio {
+constexpr char kMFCCOperation[] = "MFCC";
+
+class MFCCOperation : public TensorOperation {
+ public:
+  /// \brief Constructor.
+  /// \param[in] sample_rate Sample rate of audio signal.
+  /// \param[in] n_mfcc Number of mfc coefficients to retain.
+  /// \param[in] dct_type Type of DCT (discrete cosine transform) to use.
+  /// \param[in] log_mels Whether to use log-mel spectrograms instead of db-scaled.
+  /// \param[in] n_fft Size of FFT, creates n_fft // 2 + 1 bins.
+  /// \param[in] win_length Window size.
+  /// \param[in] hop_length Length of hop between STFT windows.
+  /// \param[in] f_min Minimum frequency.
+  /// \param[in] f_max Maximum frequency.
+  /// \param[in] pad Two sided padding of signal.
+  /// \param[in] n_mels Number of mel filterbanks.
+  /// \param[in] window A function to create a window tensor that is applied/multiplied to each frame/window.
+  /// \param[in] power Exponent for the magnitude spectrogram, (must be > 0) e.g., 1 for energy, 2 for power, etc.
+  /// \param[in] normalized Whether to normalize by magnitude after stft.
+  /// \param[in] center Whether to pad waveform on both sides.
+  /// \param[in] pad_mode Controls the padding method used when center is True.
+  /// \param[in] onesided Controls whether to return half of results to avoid redundancy.
+  /// \param[in] norm_mel Norm to use.
+  /// \param[in] norm If 'slaney', divide the triangular mel weights by the width of the mel band (area normalization).
+  /// \param[in] mel_scale Scale to use: htk or slaney.
+  MFCCOperation(int32_t sample_rate, int32_t n_mfcc, int32_t dct_type, NormMode norm, bool log_mels, int32_t n_fft,
+                int32_t win_length, int32_t hop_length, float f_min, float f_max, int32_t pad, int32_t n_mels,
+                WindowType window, float power, bool normalized, bool center, BorderType pad_mode, bool onesided,
+                NormType norm_mel, MelType mel_scale);
+
+  ~MFCCOperation();
+
+  std::shared_ptr<TensorOp> Build() override;
+
+  Status ValidateParams() override;
+
+  std::string Name() const override;
+
+  Status to_json(nlohmann::json *out_json) override;
+
+ private:
+  int32_t sample_rate_;
+  int32_t n_mfcc_;
+  int32_t dct_type_;
+  NormMode norm_;
+  bool log_mels_;
+  int32_t n_fft_;
+  int32_t win_length_;
+  int32_t hop_length_;
+  float f_min_;
+  float f_max_;
+  int32_t pad_;
+  int32_t n_mels_;
+  WindowType window_;
+  float power_;
+  bool normalized_;
+  bool center_;
+  BorderType pad_mode_;
+  bool onesided_;
+  NormType norm_mel_;
+  MelType mel_scale_;
+};
+}  // namespace audio
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_MFCC_IR_H_
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt
@ -35,6 +35,7 @@ add_library(audio-kernels OBJECT
        mask_along_axis_iid_op.cc
        mask_along_axis_op.cc
        mel_scale_op.cc
+        mfcc_op.cc
        mu_law_decoding_op.cc
        mu_law_encoding_op.cc
        overdrive_op.cc
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc
@ -2230,5 +2230,77 @@ Status LFCC(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu

  return Status::OK();
 }
+
+Status MelSpectrogram(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t sample_rate,
+                      int32_t n_fft, int32_t win_length, int32_t hop_length, float f_min, float f_max, int32_t pad,
+                      int32_t n_mels, WindowType window, float power, bool normalized, bool center, BorderType pad_mode,
+                      bool onesided, NormType norm, MelType mel_scale) {
+  auto input_shape_vec = input->shape().AsVector();
+  CHECK_FAIL_RETURN_UNEXPECTED(n_fft < TWO * input_shape_vec[input_shape_vec.size() - 1],
+                               "MelSpectrogram: Padding size should be less than the corresponding input dimension.");
+  RETURN_UNEXPECTED_IF_NULL(input);
+  RETURN_UNEXPECTED_IF_NULL(output);
+  std::shared_ptr<Tensor> spectrogram;
+  RETURN_IF_NOT_OK(Spectrogram(input, &spectrogram, pad, window, n_fft, hop_length, win_length, power, normalized,
+                               center, pad_mode, onesided));
+  RETURN_IF_NOT_OK(
+    MelScale<float>(spectrogram, output, n_mels, sample_rate, f_min, f_max, n_fft / TWO + 1, norm, mel_scale));
+  return Status::OK();
+}
+
+Status MFCC(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t sample_rate, int32_t n_mfcc,
+            int32_t dct_type, bool log_mels, int32_t n_fft, int32_t win_length, int32_t hop_length, float f_min,
+            float f_max, int32_t pad, int32_t n_mels, WindowType window, float power, bool normalized, bool center,
+            BorderType pad_mode, bool onesided, NormType norm, NormMode norm_M, MelType mel_scale) {
+  RETURN_UNEXPECTED_IF_NULL(input);
+  RETURN_UNEXPECTED_IF_NULL(output);
+  std::shared_ptr<Tensor> mel_spectrogram;
+  std::shared_ptr<Tensor> dct_mat;
+  RETURN_IF_NOT_OK(MelSpectrogram(input, &mel_spectrogram, sample_rate, n_fft, win_length, hop_length, f_min, f_max,
+                                  pad, n_mels, window, power, normalized, center, pad_mode, onesided, norm, mel_scale));
+  RETURN_IF_NOT_OK(Dct(&dct_mat, n_mfcc, n_mels, norm_M));
+  if (log_mels) {
+    for (auto itr = mel_spectrogram->begin<float>(); itr != mel_spectrogram->end<float>(); ++itr) {
+      float log_offset = 1e-6;
+      *itr = log(*itr + log_offset);
+    }
+  } else {
+    std::shared_ptr<Tensor> amplitude_to_db;
+    float multiplier = 10.0;
+    float db_multiplier = 0.0;
+    float amin = 1e-10;
+    float top_db = 80.0;
+    RETURN_IF_NOT_OK(AmplitudeToDB(mel_spectrogram, &amplitude_to_db, multiplier, amin, db_multiplier, top_db));
+    mel_spectrogram = amplitude_to_db;
+  }
+  auto dct_mat_ptr = &*dct_mat->begin<float>();
+  Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic> mat_res;
+  Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>> matrix_dm(dct_mat_ptr, n_mfcc, n_mels);
+  TensorShape st_shape = mel_spectrogram->shape();
+  TensorShape st_reshape({mel_spectrogram->Size() / st_shape[-1] / st_shape[-2], st_shape[-2], st_shape[-1]});
+  RETURN_IF_NOT_OK(mel_spectrogram->Reshape(st_reshape));
+
+  const dsize_t kRowIndex = 1;
+  const dsize_t kColIndex = 2;
+  int rows = st_reshape[kRowIndex];
+  int cols = st_reshape[kColIndex];
+  std::vector<float> out_temp;
+
+  for (int c = 0; c < st_reshape[0]; c++) {
+    Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>> matrix_c(
+      &*mel_spectrogram->begin<float>() + rows * cols * c, cols, rows);
+    mat_res.noalias() = (matrix_c * matrix_dm.transpose());
+    std::vector<float> vec_c(mat_res.data(), mat_res.data() + mat_res.size());
+    out_temp.insert(out_temp.end(), vec_c.begin(), vec_c.end());
+  }
+  // unpack
+  std::vector<int64_t> output_shape_vec = st_shape.AsVector();
+  output_shape_vec[st_shape.Size() - 1] = cols;
+  output_shape_vec[st_shape.Size() - TWO] = n_mfcc;
+  TensorShape output_shape(output_shape_vec);
+  RETURN_IF_NOT_OK(Tensor::CreateFromVector(out_temp, output_shape, output));
+
+  return Status::OK();
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h
@ -38,6 +38,7 @@ constexpr double PI = 3.141592653589793;
 constexpr int kMinAudioDim = 1;
 constexpr int kDefaultAudioDim = 2;
 constexpr int TWO = 2;
+constexpr float HALF = 0.5;

 namespace mindspore {
 namespace dataset {
@ -2140,6 +2141,60 @@ Status LFCC(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu
            int32_t n_filter, int32_t n_lfcc, int32_t dct_type, bool log_lf, int32_t n_fft, int32_t win_length,
            int32_t hop_length, float f_min, float f_max, int32_t pad, WindowType window, float power, bool normalized,
            bool center, BorderType pad_mode, bool onesided, NormMode norm);
+
+/// \brief Create MelSpectrogram for a raw audio signal.
+/// \param[in] input Input tensor.
+/// \param[out] output Output tensor.
+/// \param[in] sample_rate Sample rate of audio signal.
+/// \param[in] n_fft Size of FFT, creates n_fft // 2 + 1 bins.
+/// \param[in] win_length Window size.
+/// \param[in] hop_length Length of hop between STFT windows.
+/// \param[in] f_min Minimum frequency, which must be non negative.
+/// \param[in] f_max Maximum frequency, which must be positive.
+/// \param[in] pad Two sided padding of signal.
+/// \param[in] n_mels Number of mel filter, which must be positive.
+/// \param[in] window A function to create a window tensor that is applied/multiplied to each frame/window.
+/// \param[in] power Exponent for the magnitude spectrogram, (must be > 0) e.g., 1 for energy, 2 for power, etc.
+/// \param[in] normalized Whether to normalize by magnitude after stft.
+/// \param[in] center Whether to pad waveform on both sides.
+/// \param[in] pad_mode controls the padding method used when center is True.
+/// \param[in] onesided controls whether to return half of results to avoid redundancy.
+/// \param[in] norm If 'slaney', divide the triangular mel weights by the width of the mel band (area normalization).
+/// \param[in] mel_scale Scale to use: htk or slaney.
+/// \return Status return code.
+Status MelSpectrogram(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t sample_rate,
+                      int32_t n_fft, int32_t win_length, int32_t hop_length, float f_min, float f_max, int32_t pad,
+                      int32_t n_mels, WindowType window, float power, bool normalized, bool center, BorderType pad_mode,
+                      bool onesided, NormType norm, MelType mel_scale);
+
+/// \brief Create MFCC for a raw audio signal.
+/// \param[in] input Input tensor.
+/// \param[out] output Output tensor.
+/// \param[in] sample_rate Sample rate of audio signal.
+/// \param[in] n_mfcc Number of mfc coefficients to retain.
+/// \param[in] dct_type Type of DCT (discrete cosine transform) to use.
+/// \param[in] log_mels Whether to use log-mel spectrograms instead of db-scaled.
+/// \param[in] n_fft Size of FFT, creates n_fft // 2 + 1 bins.
+/// \param[in] win_length Window size.
+/// \param[in] hop_length Length of hop between STFT windows.
+/// \param[in] f_min Minimum frequency.
+/// \param[in] f_max Maximum frequency.
+/// \param[in] pad Two sided padding of signal.
+/// \param[in] n_mels Number of mel filterbanks.
+/// \param[in] window A function to create a window tensor that is applied/multiplied to each frame/window.
+/// \param[in] power Exponent for the magnitude spectrogram, (must be > 0) e.g., 1 for energy, 2 for power, etc.
+/// \param[in] normalized Whether to normalize by magnitude after stft.
+/// \param[in] center Whether to pad waveform on both sides.
+/// \param[in] pad_mode Controls the padding method used when center is True.
+/// \param[in] onesided Controls whether to return half of results to avoid redundancy.
+/// \param[in] norm Norm to use.
+/// \param[in] norm_M If 'slaney', divide the triangular mel weights by the width of the mel band (area normalization).
+/// \param[in] mel_scale Scale to use: htk or slaney.
+/// \return Status return code.
+Status MFCC(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t sample_rate, int32_t n_mfcc,
+            int32_t dct_type, bool log_mels, int32_t n_fft, int32_t win_length, int32_t hop_length, float f_min,
+            float f_max, int32_t pad, int32_t n_mels, WindowType window, float power, bool normalized, bool center,
+            BorderType pad_mode, bool onesided, NormType norm, NormMode norm_M, MelType mel_scale);
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/mfcc_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/mfcc_op.cc
@ -0,0 +1,57 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "minddata/dataset/audio/kernels/mfcc_op.h"
+
+#include "minddata/dataset/audio/kernels/audio_utils.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/data/data_utils.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+#include "minddata/dataset/util/status.h"
+
+namespace mindspore {
+namespace dataset {
+Status MFCCOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
+  IO_CHECK(input, output);
+  return MFCC(input, output, sample_rate_, n_mfcc_, dct_type_, log_mels_, n_fft_, win_length_, hop_length_, f_min_,
+              f_max_, pad_, n_mels_, window_, power_, normalized_, center_, pad_mode_, onesided_, norm_, norm_M_,
+              mel_scale_);
+}
+
+Status MFCCOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) {
+  RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
+  outputs.clear();
+  auto output_shape_vector = inputs[0].AsVector();
+  auto time = output_shape_vector[output_shape_vector.size()];
+  output_shape_vector.pop_back();
+  output_shape_vector.push_back(n_mfcc_);
+  output_shape_vector.push_back(time);
+  TensorShape out = TensorShape(output_shape_vector);
+  outputs.emplace_back(out);
+  if (!outputs.empty()) {
+    return Status::OK();
+  }
+  return Status(StatusCode::kMDUnexpectedError, "MFCC: input tensor is not in shape of <..., time>.");
+}
+
+Status MFCCOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) {
+  RETURN_IF_NOT_OK(TensorOp::OutputType(inputs, outputs));
+  RETURN_IF_NOT_OK(ValidateTensorType("MFCC", inputs[0].IsNumeric(), "[float]", inputs[0].ToString()));
+  outputs[0] = DataType(DataType::DE_FLOAT32);
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/mfcc_op.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/mfcc_op.h
@ -0,0 +1,113 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_MFCC_OP_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_MFCC_OP_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "include/dataset/constants.h"
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/tensor_op.h"
+
+namespace mindspore {
+namespace dataset {
+class MFCCOp : public TensorOp {
+ public:
+  /// \brief Constructor.
+  /// \param[in] sample_rate Sample rate of audio signal.
+  /// \param[in] n_mfcc Number of mfc coefficients to retain.
+  /// \param[in] dct_type Type of DCT (discrete cosine transform) to use.
+  /// \param[in] log_mels Whether to use log-mel spectrograms instead of db-scaled.
+  /// \param[in] n_fft Size of FFT, creates n_fft // 2 + 1 bins.
+  /// \param[in] win_length Window size.
+  /// \param[in] hop_length Length of hop between STFT windows.
+  /// \param[in] f_min Minimum frequency.
+  /// \param[in] f_max Maximum frequency.
+  /// \param[in] pad Two sided padding of signal.
+  /// \param[in] n_mels Number of mel filterbanks.
+  /// \param[in] window A function to create a window tensor that is applied/multiplied to each frame/window.
+  /// \param[in] power Exponent for the magnitude spectrogram, (must be > 0) e.g., 1 for energy, 2 for power, etc.
+  /// \param[in] normalized Whether to normalize by magnitude after stft.
+  /// \param[in] center Whether to pad waveform on both sides.
+  /// \param[in] pad_mode Controls the padding method used when center is True.
+  /// \param[in] onesided Controls whether to return half of results to avoid redundancy.
+  /// \param[in] norm Norm to use.
+  /// \param[in] norm_M If 'slaney', divide the triangular mel weights by the width of the mel band (area
+  ///     normalization).
+  /// \param[in] mel_scale Scale to use: htk or slaney.
+  MFCCOp(int32_t sample_rate, int32_t n_mfcc, int32_t dct_type, bool log_mels, int32_t n_fft, int32_t win_length,
+         int32_t hop_length, float f_min, float f_max, int32_t pad, int32_t n_mels, WindowType window, float power,
+         bool normalized, bool center, BorderType pad_mode, bool onesided, NormType norm, NormMode norm_M,
+         MelType mel_scale)
+      : sample_rate_(sample_rate),
+        n_mfcc_(n_mfcc),
+        dct_type_(dct_type),
+        log_mels_(log_mels),
+        n_fft_(n_fft),
+        win_length_(win_length),
+        hop_length_(hop_length),
+        f_min_(f_min),
+        f_max_(f_max),
+        pad_(pad),
+        n_mels_(n_mels),
+        window_(window),
+        power_(power),
+        normalized_(normalized),
+        center_(center),
+        pad_mode_(pad_mode),
+        onesided_(onesided),
+        norm_(norm),
+        norm_M_(norm_M),
+        mel_scale_(mel_scale) {}
+
+  ~MFCCOp() override = default;
+
+  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
+
+  std::string Name() const override { return kMFCCOp; }
+
+  Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
+
+  Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
+
+ private:
+  int32_t sample_rate_;
+  int32_t n_mfcc_;
+  int32_t dct_type_;
+  bool log_mels_;
+  int32_t n_fft_;
+  int32_t win_length_;
+  int32_t hop_length_;
+  float f_min_;
+  float f_max_;
+  int32_t pad_;
+  int32_t n_mels_;
+  WindowType window_;
+  float power_;
+  bool normalized_;
+  bool center_;
+  BorderType pad_mode_;
+  bool onesided_;
+  NormType norm_;
+  NormMode norm_M_;
+  MelType mel_scale_;
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_MFCC_OP_H_
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h
@ -867,6 +867,52 @@ Status DATASET_API MelscaleFbanks(MSTensor *output, int32_t n_freqs, float f_min
                                  int32_t sample_rate, NormType norm = NormType::kNone,
                                  MelType mel_type = MelType::kHtk);

+/// \brief Create MFCC for a raw audio signal.
+class DATASET_API MFCC final : public TensorTransform {
+ public:
+  /// \param[in] sample_rate Sample rate of audio signal. Default: 16000.
+  /// \param[in] n_mfcc Number of mfc coefficients to retain. Default: 40.
+  /// \param[in] dct_type Type of DCT (discrete cosine transform) to use. Default: 2.
+  /// \param[in] norm If 'slaney', divide the triangular mel weights by the width of the mel band (area normalization).
+  ///     Default: NormMode::kOrtho.
+  /// \param[in] log_mels Whether to use log-mel spectrograms instead of db-scaled. Default: false.
+  /// \param[in] n_fft Size of FFT, creates n_fft // 2 + 1 bins. Default: 400.
+  /// \param[in] win_length Window size. Default: 0.
+  /// \param[in] hop_length Length of hop between STFT windows. Default: 0.
+  /// \param[in] f_min Minimum frequency. Default: 0.
+  /// \param[in] f_max Maximum frequency. Default: 0.
+  /// \param[in] pad Two sided padding of signal. Default: 0.
+  /// \param[in] n_mels Number of mel filterbanks. Default: 128.
+  /// \param[in] window A function to create a window tensor that is applied/multiplied to each frame/window.
+  ///     Default: WindowType::kHann.
+  /// \param[in] power Exponent for the magnitude spectrogram, (must be > 0) e.g., 1 for energy, 2 for power, etc.
+  ///     Default: 2.0.
+  /// \param[in] normalized Whether to normalize by magnitude after stft. Default: false.
+  /// \param[in] center Whether to pad waveform on both sides. Default: true.
+  /// \param[in] pad_mode Controls the padding method used when center is True. Default: BorderType::kReflect.
+  /// \param[in] onesided Controls whether to return half of results to avoid redundancy. Default: true.
+  /// \param[in] norm_mel Norm to use. Default: NormType::kNone.
+  /// \param[in] mel_scale Scale to use: htk or slaney. Default: MelType::kHtk.
+  explicit MFCC(int32_t sample_rate = 16000, int32_t n_mfcc = 40, int32_t dct_type = 2,
+                NormMode norm = NormMode::kOrtho, bool log_mels = false, int32_t n_fft = 400, int32_t win_length = 0,
+                int32_t hop_length = 0, float f_min = 0, float f_max = 0, int32_t pad = 0, int32_t n_mels = 128,
+                WindowType window = WindowType::kHann, float power = 2.0, bool normalized = false, bool center = true,
+                BorderType pad_mode = BorderType::kReflect, bool onesided = true, NormType norm_mel = NormType::kNone,
+                MelType mel_scale = MelType::kHtk);
+
+  /// \brief Destructor.
+  ~MFCC() override = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+
+ private:
+  struct Data;
+  std::shared_ptr<Data> data_;
+};
+
 /// \brief MuLawDecoding TensorTransform.
 /// \note Decode mu-law encoded signal.
 class DATASET_API MuLawDecoding final : public TensorTransform {
--- a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
@ -192,6 +192,7 @@ constexpr char kMagphaseOp[] = "MagphaseOp";
 constexpr char kMaskAlongAxisIIDOp[] = "MaskAlongAxisIIDOp";
 constexpr char kMaskAlongAxisOp[] = "MaskAlongAxisOp";
 constexpr char kMelScaleOp[] = "MelScaleOp";
+constexpr char kMFCCOp[] = "MFCCOp";
 constexpr char kMuLawDecodingOp[] = "MuLawDecodingOp";
 constexpr char kMuLawEncodingOp[] = "MuLawEncodingOp";
 constexpr char kOverdriveOp[] = "OverdriveOp";
--- a/mindspore/python/mindspore/dataset/audio/init.py
+++ b/mindspore/python/mindspore/dataset/audio/init.py
@ -68,7 +68,7 @@ from mindspore.dataset.audio.transforms import AllpassBiquad, AmplitudeToDB, Ang
    BandpassBiquad, BandrejectBiquad, BassBiquad, Biquad, \
    ComplexNorm, ComputeDeltas, Contrast, DBToAmplitude, DCShift, DeemphBiquad, DetectPitchFrequency, Dither, \
    EqualizerBiquad, Fade, Flanger, FrequencyMasking, Gain, GriffinLim, HighpassBiquad, InverseMelScale, LFCC, \
-    LFilter, LowpassBiquad, Magphase, MaskAlongAxis, MaskAlongAxisIID, MelScale, MuLawDecoding, MuLawEncoding, \
+    LFilter, LowpassBiquad, Magphase, MaskAlongAxis, MaskAlongAxisIID, MelScale, MFCC, MuLawDecoding, MuLawEncoding, \
    Overdrive, Phaser, PhaseVocoder, Resample, RiaaBiquad, SlidingWindowCmn, SpectralCentroid, Spectrogram, \
    TimeMasking, TimeStretch, TrebleBiquad, Vad, Vol
 from mindspore.dataset.audio.utils import BorderType, DensityFunction, FadeShape, GainType, Interpolation, \
--- a/mindspore/python/mindspore/dataset/audio/transforms.py
+++ b/mindspore/python/mindspore/dataset/audio/transforms.py
@ -29,9 +29,10 @@ from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_
    check_contrast, check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, \
    check_dither, check_equalizer_biquad, check_fade, check_flanger, check_gain, check_griffin_lim, \
    check_highpass_biquad, check_inverse_mel_scale, check_lfcc, check_lfilter, check_lowpass_biquad, check_magphase, \
-    check_mask_along_axis, check_mask_along_axis_iid, check_masking, check_mel_scale, check_mu_law_coding, \
-    check_overdrive, check_phase_vocoder, check_phaser, check_resample, check_riaa_biquad, check_sliding_window_cmn, \
-    check_spectral_centroid, check_spectrogram, check_time_stretch, check_treble_biquad, check_vad, check_vol
+    check_mask_along_axis, check_mask_along_axis_iid, check_masking, check_mel_scale, check_mfcc, \
+    check_mu_law_coding, check_overdrive, check_phase_vocoder, check_phaser, check_resample, check_riaa_biquad, \
+    check_sliding_window_cmn, check_spectral_centroid, check_spectrogram, check_time_stretch, check_treble_biquad, \
+    check_vad, check_vol
 from ..transforms.py_transforms_util import Implementation
 from ..transforms.transforms import TensorOperation

@ -1516,6 +1517,83 @@ class MelScale(AudioTensorOperation):
                                     DE_C_NORM_TYPE.get(self.norm), DE_C_MEL_TYPE.get(self.mel_type))


+class MFCC(AudioTensorOperation):
+    """
+    Create MFCC for a raw audio signal.
+
+    Args:
+        sample_rate (int, optional): Sampling rate of audio signal (in Hz), can't be less than 0. Default: 16000.
+        n_mfcc (int, optional): Number of mfc coefficients to retain, can't be less than 0. Default: 40.
+        dct_type (int, optional): Type of DCT (discrete cosine transform) to use, can only be 2. Default: 2.
+        norm (NormMode, optional): Norm to use. Default: NormMode.ORTHO.
+        log_mels (bool, optional): Whether to use log-mel spectrograms instead of db-scaled. Default: False.
+        melkwargs (dict, optional): Arguments for Spectrogram. Default: None, will be set to
+            `{'n_fft': 400, 'win_length': n_fft, 'hop_length': win_length // 2, 'f_min' : 0.0,
+            'f_max' : sample_rate // 2, 'pad': 0, 'window': WindowType.HANN, 'power': 2.0, 'normalized': False,
+            'center': True, 'pad_mode': BorderType.REFLECT, 'onesided': True, 'norm' : NormType.NONE,
+            'mel_scale' : MelType.HTK}` .
+
+    Raises:
+        TypeError: If `sample_rate` is not of type int.
+        TypeError: If `log_mels` is not of type bool.
+        TypeError: If `norm` is not of type :class:`mindspore.dataset.audio.utils.NormMode` .
+        TypeError: If `n_mfcc` is not of type int.
+        TypeError: If `melkwargs` is not of type dict.
+        ValueError: If `sample_rate` is a negative number.
+        ValueError: If `n_mfcc` is a negative number.
+        ValueError: If `dct_type` is not 2.
+
+    Supported Platforms:
+        ``CPU``
+
+    Examples:
+        >>> import numpy as np
+        >>>
+        >>> waveform = np.array([[0.8236, 0.2049, 0.3335], [0.5933, 0.9911, 0.2482],
+        ...                      [0.3007, 0.9054, 0.7598], [0.5394, 0.2842, 0.5634], [0.6363, 0.2226, 0.2288]])
+        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
+        >>> transforms = [audio.MFCC(4000, 1500, 0.7)]
+        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
+    """
+
+    @check_mfcc
+    def __init__(self, sample_rate=16000, n_mfcc=40, dct_type=2, norm=NormMode.ORTHO, log_mels=False, melkwargs=None):
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.n_mfcc = n_mfcc
+        self.dct_type = dct_type
+        self.norm = norm
+        self.log_mels = log_mels
+        self.melkwargs = melkwargs
+        if melkwargs is None:
+            self.melkwargs = {}
+        self.melkwargs.setdefault("n_fft", 400)
+        self.melkwargs.setdefault("win_length", self.melkwargs.get("n_fft"))
+        self.melkwargs.setdefault("hop_length", self.melkwargs.get("win_length") // 2)
+        self.melkwargs.setdefault("f_min", 0.0)
+        self.melkwargs.setdefault("f_max", sample_rate // 2)
+        self.melkwargs.setdefault("pad", 0)
+        self.melkwargs.setdefault("n_mels", 128)
+        self.melkwargs.setdefault("window", WindowType.HANN)
+        self.melkwargs.setdefault("power", 2.0)
+        self.melkwargs.setdefault("normalized", False)
+        self.melkwargs.setdefault("center", True)
+        self.melkwargs.setdefault("pad_mode", BorderType.REFLECT)
+        self.melkwargs.setdefault("onesided", True)
+        self.melkwargs.setdefault("norm", NormType.NONE)
+        self.melkwargs.setdefault("mel_scale", MelType.HTK)
+        self.window = self.melkwargs.get("window")
+        self.pad_mode = self.melkwargs.get("pad_mode")
+        self.norm_mel = self.melkwargs.get("norm")
+        self.mel_scale = self.melkwargs.get("mel_scale")
+
+    def parse(self):
+        return cde.MFCCOperation(self.sample_rate, self.n_mfcc, self.dct_type, DE_C_NORM_MODE.get(self.norm),
+                                 self.log_mels, self.melkwargs, DE_C_WINDOW_TYPE.get(self.window),
+                                 DE_C_BORDER_TYPE.get(self.pad_mode), DE_C_NORM_TYPE.get(self.norm_mel),
+                                 DE_C_MEL_TYPE.get(self.mel_scale))
+
+
 class MuLawDecoding(AudioTensorOperation):
    """
    Decode mu-law encoded signal, refer to `mu-law algorithm <https://en.wikipedia.org/wiki/M-law_algorithm>`_ .
--- a/mindspore/python/mindspore/dataset/audio/validators.py
+++ b/mindspore/python/mindspore/dataset/audio/validators.py
@ -1006,3 +1006,61 @@ def check_lfcc(method):
        return method(self, *args, **kwargs)

    return new_method
+
+
+def check_mfcc(method):
+    """Wrapper method to check the parameters of MFCC."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [sample_rate, n_mfcc, dct_type, norm, log_mels, melkwargs], _ = parse_user_args(method, *args, **kwargs)
+        check_non_negative_int32(sample_rate, "sample_rate")
+        type_check(log_mels, (bool,), "log_mels")
+        type_check(norm, (NormMode,), "norm")
+        check_non_negative_int32(n_mfcc, "n_mfcc")
+        if dct_type != 2:
+            raise ValueError("Input dct_type must be 2, but got : {0}.".format(dct_type))
+
+        if melkwargs is not None:
+            type_check(melkwargs, (dict,), "melkwargs")
+            n_fft = melkwargs["n_fft"]
+            win_length = melkwargs["win_length"]
+            hop_length = melkwargs["hop_length"]
+            f_min = melkwargs["f_min"]
+            f_max = melkwargs["f_max"]
+            pad = melkwargs["pad"]
+            power = melkwargs["power"]
+            normalized = melkwargs["normalized"]
+            center = melkwargs["center"]
+            onesided = melkwargs["onesided"]
+            window = melkwargs["window"]
+            pad_mode = melkwargs["pad_mode"]
+            norm_mel = melkwargs["norm"]
+            mel_scale = melkwargs["mel_scale"]
+            n_mels = melkwargs["n_mels"]
+
+            check_pos_int32(n_fft, "n_fft")
+            check_mel_scale_n_mels(n_mels)
+            check_mel_scale_freq(f_min, f_max, sample_rate)
+            check_mel_scale_norm(norm_mel)
+            check_mel_scale_mel_type(mel_scale)
+            check_power(power)
+            type_check(window, (WindowType,), "window")
+            type_check(normalized, (bool,), "normalized")
+            type_check(center, (bool,), "center")
+            type_check(pad_mode, (BorderType,), "pad_mode")
+            type_check(onesided, (bool,), "onesided")
+            check_non_negative_int32(pad, "pad")
+            if hop_length is not None:
+                check_pos_int32(hop_length, "hop_length")
+            if f_max is not None:
+                check_non_negative_float32(f_max, "f_max")
+            if win_length is not None:
+                check_non_negative_int32(win_length, "win_length")
+            if n_mels < n_mfcc:
+                raise ValueError("Input n_mels should be greater than or equal to n_mfcc, but got n_mfcc: {0} and " \
+                                 "n_mels: {1}.".format(n_mfcc, n_mels))
+
+        return method(self, *args, **kwargs)
+
+    return new_method
--- a/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc
+++ b/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc
@ -3148,3 +3148,56 @@ TEST_F(MindDataTestPipeline, TestLFCCWrongArgs) {
  std::shared_ptr<Iterator> iter = ds->CreateIterator();
  EXPECT_EQ(iter, nullptr);
 }
+
+/// Feature: MFCC op
+/// Description: Test pipeline for MFCC op
+/// Expectation: Generate expected output after cases were executed
+TEST_F(MindDataTestPipeline, TestMFCCPipeline) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMFCCPipeline.";
+  // Original waveform
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {1, 1, 300}));
+  std::shared_ptr<Dataset> ds = RandomData(10, schema);
+  EXPECT_NE(ds, nullptr);
+
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+  auto mfcc_op1 = audio::MFCC(16000, 40, 2, NormMode::kOrtho, true);
+  ds = ds->Map({mfcc_op1});
+  EXPECT_NE(ds, nullptr);
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(ds, nullptr);
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+  std::vector<int64_t> expected = {1, 1, 40, 2};
+  int i = 0;
+  while (row.size() != 0) {
+    auto col = row["waveform"];
+    ASSERT_EQ(col.Shape(), expected);
+    ASSERT_EQ(col.Shape().size(), 4);
+    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+  EXPECT_EQ(i, 10);
+  iter->Stop();
+}
+
+/// Feature: MFCC op
+/// Description: Test wrong arguments for MFCC op
+/// Expectation: Error message is logged, and CreateIterator() for invalid pipeline returns nullptr
+TEST_F(MindDataTestPipeline, TestMFCCWrongArgs) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestInverseMelScaleWrongArgs.";
+  // MFCC: negative sample_rate.
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {1, 1, 300}));
+  std::shared_ptr<Dataset> ds = RandomData(10, schema);
+  EXPECT_NE(ds, nullptr);
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+  auto mfcc_op0 = audio::MFCC(-1);
+  ds = ds->Map({mfcc_op0});
+  EXPECT_NE(ds, nullptr);
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_EQ(iter, nullptr);
+}
--- a/tests/ut/cpp/dataset/execute_test.cc
+++ b/tests/ut/cpp/dataset/execute_test.cc
@ -3053,3 +3053,22 @@ TEST_F(MindDataTestExecute, TestTruncateOpStr) {
  Status status = trans(input_ms, &input_ms);
  EXPECT_TRUE(status.IsOk());
 }
+
+/// Feature: MFCC op
+/// Description: Test basic usage of MFCC op
+/// Expectation: The data is processed successfully
+TEST_F(MindDataTestExecute, TestMFCCEager) {
+  MS_LOG(INFO) << "Doing MindDataTestExecute-TestMFCC.";
+  // Original waveform
+  std::vector<float> labels = {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2,
+                               2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5};
+  std::shared_ptr<Tensor> input;
+  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({1, 1, 30}), &input));
+  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
+  std::shared_ptr<TensorTransform> mfcc_op =
+    std::make_shared<audio::MFCC>(16000, 4, 2, NormMode::kOrtho, true, 10);
+  // apply MFCC
+  mindspore::dataset::Execute trans({mfcc_op});
+  Status status = trans(input_ms, &input_ms);
+  EXPECT_TRUE(status.IsOk());
+}
--- a/tests/ut/python/dataset/test_mfcc_ops.py
+++ b/tests/ut/python/dataset/test_mfcc_ops.py
@ -0,0 +1,281 @@
+# Copyright 2022 Huawei Technologies Co., Ltd :
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing MFCC Python API
+"""
+import numpy as np
+
+import mindspore.dataset as ds
+import mindspore.dataset.audio as audio
+from mindspore import log as logger
+from mindspore.dataset.audio.utils import WindowType, BorderType, MelType, NormType, NormMode
+
+
+def count_unequal_element(data_expected, data_me, rtol, atol):
+    """ Precision calculation func """
+    assert data_expected.shape == data_me.shape
+    total_count = len(data_expected.flatten())
+    error = np.abs(data_expected - data_me)
+    greater = np.greater(error, atol + np.abs(data_expected) * rtol)
+    loss_count = np.count_nonzero(greater)
+    assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format(
+        data_expected[greater], data_me[greater], error[greater])
+
+
+def test_mfcc_pipeline():
+    """
+    Feature: Mindspore pipeline mode normal testcase: mfcc op
+    Description: Input audio signal to test pipeline
+    Expectation: Generate expected output after cases were executed
+    """
+    logger.info("test_mfcc_pipeline")
+
+    wav = [[[1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]]]
+    dataset = ds.NumpySlicesDataset(wav, column_names=["audio"], shuffle=False)
+    out = audio.MFCC(sample_rate=16000, n_mfcc=4, dct_type=2, norm=NormMode.ORTHO, log_mels=True,
+                     melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0,
+                                "f_max": 10000.0, "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0,
+                                "normalized": False, "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    dataset = dataset.map(operations=out, input_columns=["audio"], output_columns=["MFCC"])
+    result = np.array([[[2.7625, 5.6919, 3.6229, 3.9756],
+                        [0.8142, 3.2698, 1.4946, 3.0683],
+                        [-1.6855, -0.8312, -1.1395, 0.0481],
+                        [-2.1808, -2.5489, -2.3110, -3.1485]]])
+    for data1 in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
+        count_unequal_element(data1["MFCC"], result, 0.0001, 0.0001)
+
+
+def test_mfcc_eager():
+    """
+    Feature: Mindspore eager mode normal testcase: mfcc op
+    Description: Input audio signal to test eager
+    Expectation: Generate expected output after cases were executed
+    """
+    logger.info("test_mfcc_eager")
+    wav = np.array([[[1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]]])
+    out = audio.MFCC(sample_rate=16000, n_mfcc=4, dct_type=2, norm=NormMode.ORTHO, log_mels=True,
+                     melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": False,
+                                "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                "norm": NormType.NONE, "mel_scale": MelType.HTK})(wav)
+    result = np.array([[[[2.7625, 5.6919, 3.6229, 3.9756],
+                         [0.8142, 3.2698, 1.4946, 3.0683],
+                         [-1.6855, -0.8312, -1.1395, 0.0481],
+                         [-2.1808, -2.5489, -2.3110, -3.1485]]]])
+    count_unequal_element(out, result, 0.0001, 0.0001)
+
+
+def test_mfcc_param():
+    """
+    Feature: Test mfcc invalid parameter.
+    Description: Test some invalid parameters.
+    Expectation: throw ValueError, TypeError or RuntimeError exception.
+    """
+    try:
+        _ = audio.MFCC(sample_rate=-1)
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Input sample_rate is not within the required interval of [0, 2147483647]." in str(error)
+    try:
+        _ = audio.MFCC(log_mels=-1)
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument log_mels with value -1 is not of type [<class 'bool'>], but got <class 'int'>." in str(error)
+    try:
+        _ = audio.MFCC(norm="Karl Marx")
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument norm with value Karl Marx is not of type [<enum 'NormMode'>], but got <class 'str'>." \
+        in str(error)
+    try:
+        _ = audio.MFCC(dct_type=-1)
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "dct_type must be 2, but got : -1." in str(error)
+    try:
+        _ = audio.MFCC(sample_rate=-1)
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Input sample_rate is not within the required interval of [0, 2147483647]." in str(error)
+    try:
+        _ = audio.MFCC(sample_rate="s")
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument sample_rate with value s is not of type [<class 'int'>], but got <class 'str'>." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": -1,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Input f_max is not within the required interval of (0, 16777216]." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": -1, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Input n_mels should be greater than or equal to n_mfcc, but got n_mfcc: 40 and n_mels: 5." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": -1, "mel_scale": MelType.HTK})
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument norm with value -1 is not of type [<enum 'NormType'>], but got <class 'int'>." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": -1})
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument mel_type with value -1 is not of type [<enum 'MelType'>], but got <class 'int'>." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": -1, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Input n_fft is not within the required interval of [1, 2147483647]." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 0, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Input n_fft is not within the required interval of [1, 2147483647]." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 0, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 50, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Input win_length is not within the required interval of [0, 2147483647]." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": "s", "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument win_length with value s is not of type [<class 'int'>], but got <class 'str'>." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": -1, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Input hop_length is not within the required interval of [1, 2147483647]." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 200, "win_length": 300, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 50, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Input win_length should be no more than n_fft, but got win_length: 300 and n_fft: 200." \
+               in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": -1, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Input pad is not within the required interval of [0, 2147483647]." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": -1, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except ValueError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Input power is not within the required interval of [0, 16777216]." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": "XiaDanni", "win_length": 16, "hop_length": 8, "f_min": 0.0,
+                                  "f_max": 10000.0, "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0,
+                                  "normalized": True, "center": True, "pad_mode": BorderType.REFLECT,
+                                  "onesided": True, "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument n_fft with value XiaDanni is not of type [<class 'int'>], but got <class 'str'>." \
+               in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": False, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": True,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument window with value False is not of type [<enum 'WindowType'>], but got <class 'bool'>." \
+               in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": False, "onesided": True, "norm": NormType.NONE,
+                                  "mel_scale": MelType.HTK})
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument pad_mode with value False is not of type [<enum 'BorderType'>], but got <class 'bool'>." \
+               in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": "LianLinghang",
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument onesided with value LianLinghang is not of type [<class 'bool'>], but got <class 'str'>." \
+               in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": True,
+                                  "center": "XiaDanni", "pad_mode": BorderType.REFLECT, "onesided": False,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument center with value XiaDanni is not of type [<class 'bool'>], but got <class 'str'>." \
+               in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": "s",
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": False,
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument normalized with value s is not of type [<class 'bool'>], but got <class 'str'>." in str(error)
+    try:
+        _ = audio.MFCC(melkwargs={"n_fft": 16, "win_length": 16, "hop_length": 8, "f_min": 0.0, "f_max": 10000.0,
+                                  "pad": 0, "n_mels": 5, "window": WindowType.HANN, "power": 2.0, "normalized": 1,
+                                  "center": True, "pad_mode": BorderType.REFLECT, "onesided": "LianLinghang",
+                                  "norm": NormType.NONE, "mel_scale": MelType.HTK})
+    except TypeError as error:
+        logger.info("Got an exception in MFCC: {}".format(str(error)))
+        assert "Argument normalized with value 1 is not of type [<class 'bool'>], but got <class 'int'>." in str(error)
+
+
+if __name__ == '__main__':
+    test_mfcc_pipeline()
+    test_mfcc_eager()
+    test_mfcc_param()