forked from mindspore-Ecosystem/mindspore
!33270 [assistant][InverseMelScale]
Merge pull request !33270 from chenchen/InverseMelScale
This commit is contained in:
commit
fcb0319747
|
@ -39,6 +39,7 @@
|
|||
#include "minddata/dataset/audio/ir/kernels/gain_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/griffin_lim_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/highpass_biquad_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/inverse_mel_scale_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/lfilter_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/lowpass_biquad_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/magphase_ir.h"
|
||||
|
@ -464,6 +465,47 @@ std::shared_ptr<TensorOperation> HighpassBiquad::Parse() {
|
|||
return std::make_shared<HighpassBiquadOperation>(data_->sample_rate_, data_->cutoff_freq_, data_->Q_);
|
||||
}
|
||||
|
||||
// InverseMelScale Transform Operation.
|
||||
struct InverseMelScale::Data {
|
||||
Data(int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t max_iter,
|
||||
float tolerance_loss, float tolerance_change, const std::map<std::string, float> &sgdargs, NormType norm,
|
||||
MelType mel_type)
|
||||
: n_stft_(n_stft),
|
||||
n_mels_(n_mels),
|
||||
sample_rate_(sample_rate),
|
||||
f_min_(f_min),
|
||||
f_max_(f_max),
|
||||
max_iter_(max_iter),
|
||||
tolerance_loss_(tolerance_loss),
|
||||
tolerance_change_(tolerance_change),
|
||||
sgdargs_(sgdargs),
|
||||
norm_(norm),
|
||||
mel_type_(mel_type) {}
|
||||
int32_t n_stft_;
|
||||
int32_t n_mels_;
|
||||
int32_t sample_rate_;
|
||||
float f_min_;
|
||||
float f_max_;
|
||||
int32_t max_iter_;
|
||||
float tolerance_loss_;
|
||||
float tolerance_change_;
|
||||
std::map<std::string, float> sgdargs_;
|
||||
NormType norm_;
|
||||
MelType mel_type_;
|
||||
};
|
||||
|
||||
InverseMelScale::InverseMelScale(int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min, float f_max,
|
||||
int32_t max_iter, float tolerance_loss, float tolerance_change,
|
||||
const std::map<std::string, float> &sgdargs, NormType norm, MelType mel_type)
|
||||
: data_(std::make_shared<Data>(n_stft, n_mels, sample_rate, f_min, f_max, max_iter, tolerance_loss,
|
||||
tolerance_change, sgdargs, norm, mel_type)) {}
|
||||
|
||||
std::shared_ptr<TensorOperation> InverseMelScale::Parse() {
|
||||
return std::make_shared<InverseMelScaleOperation>(
|
||||
data_->n_stft_, data_->n_mels_, data_->sample_rate_, data_->f_min_, data_->f_max_, data_->max_iter_,
|
||||
data_->tolerance_loss_, data_->tolerance_change_, data_->sgdargs_, data_->norm_, data_->mel_type_);
|
||||
}
|
||||
|
||||
// LFilter Transform Operation.
|
||||
struct LFilter::Data {
|
||||
Data(const std::vector<float> &a_coeffs, const std::vector<float> &b_coeffs, bool clamp)
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include "minddata/dataset/audio/ir/kernels/gain_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/griffin_lim_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/highpass_biquad_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/inverse_mel_scale_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/lfilter_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/lowpass_biquad_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/magphase_ir.h"
|
||||
|
@ -359,6 +360,20 @@ PYBIND_REGISTER(
|
|||
}));
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(InverseMelScaleOperation, 1, ([](const py::module *m) {
|
||||
(void)py::class_<audio::InverseMelScaleOperation, TensorOperation,
|
||||
std::shared_ptr<audio::InverseMelScaleOperation>>(*m, "InverseMelScaleOperation")
|
||||
.def(py::init([](int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min, float f_max,
|
||||
int32_t max_iter, float tolerance_loss, float tolerance_change,
|
||||
const py::dict &sgdargs, NormType norm, MelType mel_type) {
|
||||
auto inverse_mel_scale = std::make_shared<audio::InverseMelScaleOperation>(
|
||||
n_stft, n_mels, sample_rate, f_min, f_max, max_iter, tolerance_loss, tolerance_change,
|
||||
toStringFloatMap(sgdargs), norm, mel_type);
|
||||
THROW_IF_ERROR(inverse_mel_scale->ValidateParams());
|
||||
return inverse_mel_scale;
|
||||
}));
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(LFilterOperation, 1, ([](const py::module *m) {
|
||||
(void)py::class_<audio::LFilterOperation, TensorOperation, std::shared_ptr<audio::LFilterOperation>>(
|
||||
*m, "LFilterOperation")
|
||||
|
|
|
@ -49,6 +49,16 @@ std::map<std::string, int32_t> toStringMap(const py::dict dict) {
|
|||
return map;
|
||||
}
|
||||
|
||||
std::map<std::string, float> toStringFloatMap(const py::dict dict) {
|
||||
std::map<std::string, float> map;
|
||||
if (!dict.empty()) {
|
||||
for (auto p : dict) {
|
||||
(void)map.emplace(toString(p.first), toFloat(p.second));
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
std::vector<std::string> toStringVector(const py::list list) {
|
||||
std::vector<std::string> vector;
|
||||
if (!list.empty()) {
|
||||
|
|
|
@ -55,6 +55,8 @@ std::set<std::string> toStringSet(const py::list list);
|
|||
|
||||
std::map<std::string, int32_t> toStringMap(const py::dict dict);
|
||||
|
||||
std::map<std::string, float> toStringFloatMap(const py::dict dict);
|
||||
|
||||
std::vector<std::string> toStringVector(const py::list list);
|
||||
|
||||
std::vector<pid_t> toIntVector(const py::list input_list);
|
||||
|
|
|
@ -25,6 +25,7 @@ add_library(audio-ir-kernels OBJECT
|
|||
gain_ir.cc
|
||||
griffin_lim_ir.cc
|
||||
highpass_biquad_ir.cc
|
||||
inverse_mel_scale_ir.cc
|
||||
lfilter_ir.cc
|
||||
lowpass_biquad_ir.cc
|
||||
magphase_ir.cc
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "minddata/dataset/audio/ir/kernels/inverse_mel_scale_ir.h"
|
||||
|
||||
#include "minddata/dataset/audio/ir/validators.h"
|
||||
#include "minddata/dataset/audio/kernels/inverse_mel_scale_op.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
namespace audio {
|
||||
// InverseMelScale
|
||||
InverseMelScaleOperation::InverseMelScaleOperation(int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min,
|
||||
float f_max, int32_t max_iter, float tolerance_loss,
|
||||
float tolerance_change, const std::map<std::string, float> &sgdargs,
|
||||
NormType norm, MelType mel_type)
|
||||
: n_stft_(n_stft),
|
||||
n_mels_(n_mels),
|
||||
sample_rate_(sample_rate),
|
||||
f_min_(f_min),
|
||||
f_max_(f_max),
|
||||
max_iter_(max_iter),
|
||||
tolerance_loss_(tolerance_loss),
|
||||
tolerance_change_(tolerance_change),
|
||||
sgdargs_(sgdargs),
|
||||
norm_(norm),
|
||||
mel_type_(mel_type) {
|
||||
sgd_lr_ = sgdargs_.find("sgd_lr") == sgdargs_.end() ? 0.1 : sgdargs_["sgd_lr"];
|
||||
constexpr float SGD_MOMENTUM_DEFAULT = 0.9;
|
||||
sgd_momentum_ = sgdargs_.find("sgd_momentum") == sgdargs_.end() ? SGD_MOMENTUM_DEFAULT : sgdargs_["sgd_momentum"];
|
||||
}
|
||||
|
||||
InverseMelScaleOperation::~InverseMelScaleOperation() = default;
|
||||
|
||||
std::string InverseMelScaleOperation::Name() const { return kInverseMelScaleOperation; }
|
||||
|
||||
Status InverseMelScaleOperation::ValidateParams() {
|
||||
RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("InverseMelScale", "n_mels", n_mels_));
|
||||
RETURN_IF_NOT_OK(ValidateIntScalarNonNegative("InverseMelScale", "sample_rate", sample_rate_));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(n_stft_ != 1,
|
||||
"InverseMelScale: n_stft can not be equal to 1, but got: " + std::to_string(n_stft_));
|
||||
RETURN_IF_NOT_OK(ValidateFloatScalarNonNegative("InverseMelScale", "f_max", f_max_));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(f_min_ < f_max_, "InverseMelScale: f_max must be greater than f_min.");
|
||||
|
||||
// SGD params
|
||||
RETURN_IF_NOT_OK(ValidateFloatScalarNonNegative("InverseMelScale", "sgd_lr", sgd_lr_));
|
||||
RETURN_IF_NOT_OK(ValidateFloatScalarNonNegative("InverseMelScale", "sgd_momentum", sgd_momentum_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> InverseMelScaleOperation::Build() {
|
||||
std::shared_ptr<InverseMelScaleOp> tensor_op =
|
||||
std::make_shared<InverseMelScaleOp>(n_stft_, n_mels_, sample_rate_, f_min_, f_max_, max_iter_, tolerance_loss_,
|
||||
tolerance_change_, sgd_lr_, sgd_momentum_, norm_, mel_type_);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
Status InverseMelScaleOperation::to_json(nlohmann::json *out_json) {
|
||||
nlohmann::json args;
|
||||
args["n_stft"] = n_stft_;
|
||||
args["n_mels"] = n_mels_;
|
||||
args["sample_rate"] = sample_rate_;
|
||||
args["f_min"] = f_min_;
|
||||
args["f_max"] = f_max_;
|
||||
args["max_iter"] = max_iter_;
|
||||
args["tolerance_loss"] = tolerance_loss_;
|
||||
args["tolerance_change"] = tolerance_change_;
|
||||
args["sgdargs"] = sgdargs_;
|
||||
args["norm"] = norm_;
|
||||
args["mel_type"] = mel_type_;
|
||||
*out_json = args;
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace audio
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,67 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_INVERSE_MEL_SCALE_IR_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_INVERSE_MEL_SCALE_IR_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "include/api/status.h"
|
||||
#include "minddata/dataset/include/dataset/constants.h"
|
||||
#include "minddata/dataset/kernels/ir/tensor_operation.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
namespace audio {
|
||||
constexpr char kInverseMelScaleOperation[] = "InverseMelScale";
|
||||
|
||||
class InverseMelScaleOperation : public TensorOperation {
|
||||
public:
|
||||
InverseMelScaleOperation(int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min, float f_max,
|
||||
int32_t max_iter, float tolerance_loss, float tolerance_change,
|
||||
const std::map<std::string, float> &sgdargs, NormType norm, MelType mel_type);
|
||||
|
||||
~InverseMelScaleOperation();
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override;
|
||||
|
||||
Status to_json(nlohmann::json *out_json) override;
|
||||
|
||||
private:
|
||||
int32_t n_stft_;
|
||||
int32_t n_mels_;
|
||||
int32_t sample_rate_;
|
||||
float f_min_;
|
||||
float f_max_;
|
||||
int32_t max_iter_;
|
||||
float tolerance_loss_;
|
||||
float tolerance_change_;
|
||||
std::map<std::string, float> sgdargs_;
|
||||
float sgd_lr_;
|
||||
float sgd_momentum_;
|
||||
NormType norm_;
|
||||
MelType mel_type_;
|
||||
};
|
||||
} // namespace audio
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_INVERSE_MEL_SCALE_IR_H_
|
|
@ -26,6 +26,7 @@ add_library(audio-kernels OBJECT
|
|||
gain_op.cc
|
||||
griffin_lim_op.cc
|
||||
highpass_biquad_op.cc
|
||||
inverse_mel_scale_op.cc
|
||||
lfilter_op.cc
|
||||
lowpass_biquad_op.cc
|
||||
magphase_op.cc
|
||||
|
|
|
@ -2050,5 +2050,106 @@ Status GriffinLim(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor>
|
|||
momentum, length, rand_init, rnd);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status InverseMelScaleImpl(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t n_stft,
|
||||
int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t max_iter,
|
||||
float tolerance_loss, float tolerance_change, float sgd_lr, float sgd_momentum,
|
||||
NormType norm, MelType mel_type, std::mt19937 rnd) {
|
||||
f_max = f_max == 0 ? static_cast<T>(std::floor(sample_rate / 2)) : f_max;
|
||||
// create fb mat <freq, n_mels>
|
||||
std::shared_ptr<Tensor> freq_bin_mat;
|
||||
RETURN_IF_NOT_OK(CreateFbanks(&freq_bin_mat, n_stft, f_min, f_max, n_mels, sample_rate, norm, mel_type));
|
||||
|
||||
auto fb_ptr = &*freq_bin_mat->begin<float>();
|
||||
Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>> matrix_fb(fb_ptr, n_mels, n_stft);
|
||||
// pack melspec <n, n_mels, time>
|
||||
TensorShape input_shape = input->shape();
|
||||
TensorShape input_reshape({input->Size() / input_shape[-1] / input_shape[-2], input_shape[-2], input_shape[-1]});
|
||||
RETURN_IF_NOT_OK(input->Reshape(input_reshape));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(n_mels == input_shape[-1 * TWO],
|
||||
"InverseMelScale: n_mels must be equal to the penultimate dimension of input.");
|
||||
|
||||
int time = input_shape[-1];
|
||||
int freq = matrix_fb.cols();
|
||||
// input matrix 3d
|
||||
std::vector<T> specgram;
|
||||
// engine for random matrix
|
||||
std::uniform_real_distribution<T> dist(0, 1);
|
||||
for (int channel = 0; channel < input_reshape[0]; channel++) {
|
||||
// slice by first dimension
|
||||
auto data_ptr = &*input->begin<T>();
|
||||
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> input_channel(data_ptr + time * n_mels * channel, time,
|
||||
n_mels);
|
||||
// init specgram at n=channel
|
||||
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> mat_channel =
|
||||
Eigen::MatrixXd::Zero(time, freq).unaryExpr([&rnd, &dist](double dummy) { return dist(rnd); });
|
||||
std::vector<T> vec_channel(mat_channel.data(), mat_channel.data() + mat_channel.size());
|
||||
std::shared_ptr<Tensor> param_channel;
|
||||
TensorShape output_shape = TensorShape({freq, time});
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(vec_channel, TensorShape({freq * time}), ¶m_channel));
|
||||
// sgd
|
||||
T loss = std::numeric_limits<T>::max();
|
||||
for (int epoch = 0; epoch < max_iter; epoch++) {
|
||||
auto pred = mat_channel * (matrix_fb.transpose().template cast<T>());
|
||||
// cal loss with pred and gt
|
||||
auto diff = input_channel - pred;
|
||||
T new_loss = diff.array().square().mean();
|
||||
// cal grad
|
||||
auto grad = diff * (matrix_fb.template cast<T>()) * (-1) / time;
|
||||
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> mat_grad = grad;
|
||||
std::vector<T> vec_grad(mat_grad.data(), mat_grad.data() + mat_grad.size());
|
||||
std::shared_ptr<Tensor> tensor_grad;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(vec_grad, TensorShape({grad.size()}), &tensor_grad));
|
||||
|
||||
std::shared_ptr<Tensor> nspec;
|
||||
RETURN_IF_NOT_OK(SGD<T>(param_channel, &nspec, tensor_grad, sgd_lr, sgd_momentum));
|
||||
|
||||
T diff_loss = std::abs(loss - new_loss);
|
||||
if ((new_loss < tolerance_loss) || (diff_loss < tolerance_change)) {
|
||||
break;
|
||||
}
|
||||
loss = new_loss;
|
||||
data_ptr = &*nspec->begin<T>();
|
||||
mat_channel = Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>(data_ptr, time, freq);
|
||||
// use new mat_channel to update param_channel
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromTensor(nspec, ¶m_channel));
|
||||
}
|
||||
// clamp and transpose
|
||||
auto res = mat_channel.cwiseMax(0);
|
||||
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> mat_res = res;
|
||||
std::vector<T> spec_channel(mat_res.data(), mat_res.data() + mat_res.size());
|
||||
specgram.insert(specgram.end(), spec_channel.begin(), spec_channel.end());
|
||||
}
|
||||
std::shared_ptr<Tensor> final_out;
|
||||
if (input_shape.Size() > TWO) {
|
||||
std::vector<int64_t> out_shape_vec = input_shape.AsVector();
|
||||
out_shape_vec[input_shape.Size() - 1] = time;
|
||||
out_shape_vec[input_shape.Size() - TWO] = freq;
|
||||
TensorShape output_shape(out_shape_vec);
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(specgram, output_shape, &final_out));
|
||||
} else {
|
||||
TensorShape output_shape = TensorShape({input_reshape[0], freq, time});
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(specgram, output_shape, &final_out));
|
||||
}
|
||||
*output = final_out;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status InverseMelScale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t n_stft,
|
||||
int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t max_iter,
|
||||
float tolerance_loss, float tolerance_change, float sgd_lr, float sgd_momentum, NormType norm,
|
||||
MelType mel_type, std::mt19937 rnd) {
|
||||
std::shared_ptr<Tensor> input_tensor;
|
||||
if (input->type() != DataType::DE_FLOAT64) {
|
||||
RETURN_IF_NOT_OK(TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32)));
|
||||
return InverseMelScaleImpl<float>(input_tensor, output, n_stft, n_mels, sample_rate, f_min, f_max, max_iter,
|
||||
tolerance_loss, tolerance_change, sgd_lr, sgd_momentum, norm, mel_type, rnd);
|
||||
} else {
|
||||
input_tensor = input;
|
||||
return InverseMelScaleImpl<double>(input_tensor, output, n_stft, n_mels, sample_rate, f_min, f_max, max_iter,
|
||||
tolerance_loss, tolerance_change, sgd_lr, sgd_momentum, norm, mel_type, rnd);
|
||||
}
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -501,6 +501,80 @@ Status Dct(std::shared_ptr<Tensor> *output, int32_t n_mfcc, int32_t n_mels, Norm
|
|||
/// \return Status code.
|
||||
Status ComplexNorm(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float power);
|
||||
|
||||
/// \brief Stochastic gradient descent.
|
||||
/// \param[in] input Input tensor.
|
||||
/// \param[out] output Output tensor.
|
||||
/// \param[in] grad Input grad for params.
|
||||
/// \param[in] lr Learning rate.
|
||||
/// \param[in] momentum Momentum factor.
|
||||
/// \param[in] dampening Dampening for momentum.
|
||||
/// \param[in] weight_decay Weight decay.
|
||||
/// \param[in] nesterov Whether enable nesterov momentum.
|
||||
/// \param[in] stat Stat.
|
||||
/// \return Status code.
|
||||
template <typename T>
|
||||
Status SGD(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::shared_ptr<Tensor> &grad,
|
||||
float lr, float momentum = 0.0, float dampening = 0.0, float weight_decay = 0.0, bool nesterov = false,
|
||||
float stat = 0.0) {
|
||||
size_t elem_num = input->Size();
|
||||
std::vector<T> accum(elem_num);
|
||||
std::shared_ptr<Tensor> output_param;
|
||||
std::vector<T> out_param(elem_num);
|
||||
int ind = 0;
|
||||
auto itr_inp = input->begin<T>();
|
||||
auto itr_grad = grad->begin<T>();
|
||||
while (itr_inp != input->end<T>() && itr_grad != grad->end<T>()) {
|
||||
T grad_new = (*itr_grad);
|
||||
if (weight_decay > static_cast<float>(0.0)) {
|
||||
grad_new += (*itr_inp) * static_cast<T>(weight_decay);
|
||||
}
|
||||
if (momentum > 0) {
|
||||
if (stat > 0) {
|
||||
accum[ind] = grad_new;
|
||||
stat = 0;
|
||||
} else {
|
||||
accum[ind] = accum[ind] * momentum + (1 - static_cast<T>(dampening)) * grad_new;
|
||||
}
|
||||
if (nesterov) {
|
||||
grad_new += accum[ind] * momentum;
|
||||
} else {
|
||||
grad_new = accum[ind];
|
||||
}
|
||||
}
|
||||
out_param[ind] = (*itr_inp) - lr * grad_new;
|
||||
itr_inp++;
|
||||
itr_grad++;
|
||||
ind++;
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(out_param, TensorShape({input->Size()}), &output_param));
|
||||
*output = output_param;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
/// \brief Use conversion matrix to solve normal STFT from mel frequency STFT.
|
||||
/// \param input Tensor of shape <..., n_mels, time>.
|
||||
/// \param output Tensor of shape <..., freq, time>.
|
||||
/// \param n_stft Number of bins in STFT, the value must be greater than 0.
|
||||
/// \param n_mels Number of mel filter, the value must be greater than 0.
|
||||
/// \param sample_rate Sample rate of the signal, the value can't be zero.
|
||||
/// \param f_min Minimum frequency, the value must be greater than or equal to 0.
|
||||
/// \param f_max Maximum frequency, the value must be greater than 0.
|
||||
/// \param max_iter Maximum number of optimization iterations, the value must be greater than 0.
|
||||
/// \param tolerance_loss Value of loss to stop optimization at, the value must be greater than or equal to 0.
|
||||
/// \param tolerance_change Difference in losses to stop optimization at, the value must be greater than or equal to 0.
|
||||
/// \param sgd_lr Learning rate for SGD optimizer, the value must be greater than or equal to 0.
|
||||
/// \param sgd_momentum Momentum factor for SGD optimizer, the value must be greater than or equal to 0.
|
||||
/// \param norm Type of norm, value should be NormType::kSlaney or NormType::kNone. If norm is NormType::kSlaney,
|
||||
/// divide the triangle mel weight by the width of the mel band.
|
||||
/// \param mel_type Type of mel, value should be MelType::kHtk or MelType::kSlaney.
|
||||
/// \param rnd Random generator.
|
||||
/// \return Status code.
|
||||
Status InverseMelScale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t n_stft,
|
||||
int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t max_iter,
|
||||
float tolerance_loss, float tolerance_change, float sgd_lr, float sgd_momentum, NormType norm,
|
||||
MelType mel_type, std::mt19937 rnd);
|
||||
|
||||
/// \brief Decode mu-law encoded signal.
|
||||
/// \param input Tensor of shape <..., time>.
|
||||
/// \param output Tensor of shape <..., time>.
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "minddata/dataset/audio/kernels/inverse_mel_scale_op.h"
|
||||
|
||||
#include "minddata/dataset/audio/kernels/audio_utils.h"
|
||||
#include "minddata/dataset/kernels/data/data_utils.h"
|
||||
#include "minddata/dataset/util/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
Status InverseMelScaleOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
|
||||
// check and init
|
||||
IO_CHECK(input, output);
|
||||
// check input dimension, it should be greater than 0
|
||||
RETURN_IF_NOT_OK(ValidateLowRank("InverseMelScale", input, kDefaultAudioDim, "<..., freq, time>"));
|
||||
// check input type, it should be [int, float, double]
|
||||
RETURN_IF_NOT_OK(ValidateTensorNumeric("InverseMelScale", input));
|
||||
|
||||
return InverseMelScale(input, output, n_stft_, n_mels_, sample_rate_, f_min_, f_max_, max_iter_, tolerance_loss_,
|
||||
tolerance_change_, sgd_lr_, sgd_momentum_, norm_, mel_type_, rnd_);
|
||||
}
|
||||
|
||||
Status InverseMelScaleOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) {
|
||||
RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
|
||||
outputs.clear();
|
||||
auto input_size = inputs[0].AsVector();
|
||||
input_size.pop_back();
|
||||
TensorShape out = TensorShape(input_size);
|
||||
outputs.emplace_back(out);
|
||||
if (!outputs.empty()) return Status::OK();
|
||||
return Status(StatusCode::kMDUnexpectedError, "InverseMelScale: invalid input shape.");
|
||||
}
|
||||
|
||||
Status InverseMelScaleOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) {
|
||||
RETURN_IF_NOT_OK(TensorOp::OutputType(inputs, outputs));
|
||||
RETURN_IF_NOT_OK(
|
||||
ValidateTensorType("InverseMelScale", inputs[0].IsNumeric(), "[int, float, double]", inputs[0].ToString()));
|
||||
if (inputs[0] == DataType(DataType::DE_FLOAT64)) {
|
||||
outputs[0] = DataType(DataType::DE_FLOAT64);
|
||||
} else {
|
||||
outputs[0] = DataType(DataType::DE_FLOAT32);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,79 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_INVERSE_MEL_SCALE_OP_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_INVERSE_MEL_SCALE_OP_H_
|
||||
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "include/dataset/constants.h"
|
||||
#include "minddata/dataset/core/tensor.h"
|
||||
#include "minddata/dataset/kernels/tensor_op.h"
|
||||
#include "minddata/dataset/util/random.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
class InverseMelScaleOp : public TensorOp {
|
||||
public:
|
||||
InverseMelScaleOp(int32_t n_stft, int32_t n_mels, int32_t sample_rate, float f_min, float f_max, int32_t max_iter,
|
||||
float tolerance_loss, float tolerance_change, float sgd_lr, float sgd_momentum, NormType norm,
|
||||
MelType mel_type)
|
||||
: n_stft_(n_stft),
|
||||
n_mels_(n_mels),
|
||||
sample_rate_(sample_rate),
|
||||
f_min_(f_min),
|
||||
f_max_(f_max),
|
||||
max_iter_(max_iter),
|
||||
tolerance_loss_(tolerance_loss),
|
||||
tolerance_change_(tolerance_change),
|
||||
sgd_lr_(sgd_lr),
|
||||
sgd_momentum_(sgd_momentum),
|
||||
norm_(norm),
|
||||
mel_type_(mel_type) {
|
||||
rnd_.seed(GetSeed());
|
||||
}
|
||||
|
||||
~InverseMelScaleOp() override = default;
|
||||
|
||||
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
|
||||
|
||||
std::string Name() const override { return kInverseMelScaleOp; }
|
||||
|
||||
Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
|
||||
|
||||
Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
|
||||
|
||||
private:
|
||||
int32_t n_stft_;
|
||||
int32_t n_mels_;
|
||||
int32_t sample_rate_;
|
||||
float f_min_;
|
||||
float f_max_;
|
||||
int32_t max_iter_;
|
||||
float tolerance_loss_;
|
||||
float tolerance_change_;
|
||||
float sgd_lr_;
|
||||
float sgd_momentum_;
|
||||
NormType norm_;
|
||||
MelType mel_type_;
|
||||
std::mt19937 rnd_;
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_INVERSE_MEL_SCALE_OP_H_
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
|
||||
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
@ -585,6 +586,43 @@ class MS_API HighpassBiquad final : public TensorTransform {
|
|||
std::shared_ptr<Data> data_;
|
||||
};
|
||||
|
||||
/// \brief InverseMelScale TensorTransform
|
||||
/// \notes Solve for a normal STFT from a mel frequency STFT, using a conversion matrix.
|
||||
class MS_API InverseMelScale final : public TensorTransform {
|
||||
public:
|
||||
/// \brief Constructor.
|
||||
/// \param[in] n_stft Number of bins in STFT, must be positive.
|
||||
/// \param[in] n_mels Number of mel filter, must be positive (Default: 128).
|
||||
/// \param[in] sample_rate Sample rate of the signal, the value can't be zero (Default: 16000).
|
||||
/// \param[in] f_min Minimum frequency, must be non-negative (Default: 0.0).
|
||||
/// \param[in] f_max Maximum frequency, must be non-negative (Default: 0.0, will be set to sample_rate / 2).
|
||||
/// \param[in] max_iter Maximum number of optimization iterations, must be positive (Default: 100000).
|
||||
/// \param[in] tolerance_loss Value of loss to stop optimization at, must be non-negative (Default: 1e-5).
|
||||
/// \param[in] tolerance_change Difference in losses to stop optimization at, must be non-negative (Default: 1e-8).
|
||||
/// \param[in] sgdargs Parameters of SGD optimizer, including lr, momentum
|
||||
/// (Default: {{"sgd_lr", 0.1}, {"sgd_momentum", 0.0}}).
|
||||
/// \param[in] norm Type of norm, value should be NormType::kSlaney or NormType::kNone. If norm is NormType::kSlaney,
|
||||
/// divide the triangle mel weight by the width of the mel band (Default: NormType::kNone).
|
||||
/// \param[in] mel_type Type of mel, value should be MelType::kHtk or MelType::kSlaney (Default: MelType::kHtk).
|
||||
explicit InverseMelScale(int32_t n_stft, int32_t n_mels = 128, int32_t sample_rate = 16000, float f_min = 0.0,
|
||||
float f_max = 0.0, int32_t max_iter = 100000, float tolerance_loss = 1e-5,
|
||||
float tolerance_change = 1e-8,
|
||||
const std::map<std::string, float> &sgdargs = {{"sgd_lr", 0.1}, {"sgd_momentum", 0.0}},
|
||||
NormType norm = NormType::kNone, MelType mel_type = MelType::kHtk);
|
||||
|
||||
/// \brief Destructor.
|
||||
~InverseMelScale() = default;
|
||||
|
||||
protected:
|
||||
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
||||
/// \return Shared pointer to TensorOperation object.
|
||||
std::shared_ptr<TensorOperation> Parse() override;
|
||||
|
||||
private:
|
||||
struct Data;
|
||||
std::shared_ptr<Data> data_;
|
||||
};
|
||||
|
||||
/// \brief Design filter. Similar to SoX implementation.
|
||||
class MS_API LFilter final : public TensorTransform {
|
||||
public:
|
||||
|
|
|
@ -170,6 +170,7 @@ constexpr char kFrequencyMaskingOp[] = "FrequencyMaskingOp";
|
|||
constexpr char kGainOp[] = "GainOp";
|
||||
constexpr char kGriffinLimOp[] = "GriffinLimOp";
|
||||
constexpr char kHighpassBiquadOp[] = "HighpassBiquadOp";
|
||||
constexpr char kInverseMelScaleOp[] = "InverseMelScaleOp";
|
||||
constexpr char kLFilterOp[] = "LFilterOp";
|
||||
constexpr char kLowpassBiquadOp[] = "LowpassBiquadOp";
|
||||
constexpr char kMagphaseOp[] = "MagphaseOp";
|
||||
|
|
|
@ -29,10 +29,10 @@ from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_
|
|||
check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_compute_deltas, \
|
||||
check_contrast, check_db_to_amplitude, check_dc_shift, check_deemph_biquad, check_detect_pitch_frequency, \
|
||||
check_dither, check_equalizer_biquad, check_fade, check_flanger, check_gain, check_griffin_lim, \
|
||||
check_highpass_biquad, check_lfilter, check_lowpass_biquad, check_magphase, check_mask_along_axis, \
|
||||
check_mask_along_axis_iid, check_masking, check_mel_scale, check_mu_law_coding, check_overdrive, \
|
||||
check_phase_vocoder, check_phaser, check_riaa_biquad, check_sliding_window_cmn, check_spectral_centroid, \
|
||||
check_spectrogram, check_time_stretch, check_treble_biquad, check_vol
|
||||
check_highpass_biquad, check_inverse_mel_scale, check_lfilter, check_lowpass_biquad, check_magphase, \
|
||||
check_mask_along_axis, check_mask_along_axis_iid, check_masking, check_mel_scale, check_mu_law_coding, \
|
||||
check_overdrive, check_phase_vocoder, check_phaser, check_riaa_biquad, check_sliding_window_cmn, \
|
||||
check_spectral_centroid, check_spectrogram, check_time_stretch, check_treble_biquad, check_vol
|
||||
|
||||
|
||||
class AudioTensorOperation(TensorOperation):
|
||||
|
@ -1010,6 +1010,58 @@ class HighpassBiquad(AudioTensorOperation):
|
|||
return cde.HighpassBiquadOperation(self.sample_rate, self.cutoff_freq, self.Q)
|
||||
|
||||
|
||||
class InverseMelScale(AudioTensorOperation):
|
||||
"""
|
||||
Solve for a normal STFT form a mel frequency STFT, using a conversion matrix.
|
||||
|
||||
Args:
|
||||
n_stft (int): Number of bins in STFT.
|
||||
n_mels (int, optional): Number of mel filterbanks (default=128).
|
||||
sample_rate (int, optional): Sample rate of audio signal (default=16000).
|
||||
f_min (float, optional): Minimum frequency (default=0.0).
|
||||
f_max (float, optional): Maximum frequency (default=None, will be set to sample_rate // 2).
|
||||
max_iter (int, optional): Maximum number of optimization iterations (default=100000).
|
||||
tolerance_loss (float, optional): Value of loss to stop optimization at (default=1e-5).
|
||||
tolerance_change (float, optional): Difference in losses to stop optimization at (default=1e-8).
|
||||
sgdargs (dict, optional): Arguments for the SGD optimizer (default=None, will be set to
|
||||
{'sgd_lr': 0.1, 'sgd_momentum': 0.9}).
|
||||
norm (NormType, optional): Normalization method, can be NormType.SLANEY or NormType.NONE
|
||||
(default=NormType.NONE).
|
||||
mel_type (MelType, optional): Mel scale to use, can be MelType.SLANEY or MelType.HTK (default=MelType.HTK).
|
||||
|
||||
Examples:
|
||||
>>> import numpy as np
|
||||
>>>
|
||||
>>> waveform = np.random.randn(2, 2, 3, 2)
|
||||
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
||||
>>> transforms = [audio.InverseMelScale(20, 3, 16000, 0, 8000, 10)]
|
||||
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
||||
"""
|
||||
|
||||
@check_inverse_mel_scale
|
||||
def __init__(self, n_stft, n_mels=128, sample_rate=16000, f_min=0.0, f_max=None, max_iter=100000,
|
||||
tolerance_loss=1e-5, tolerance_change=1e-8, sgdargs=None, norm=NormType.NONE, mel_type=MelType.HTK):
|
||||
self.n_stft = n_stft
|
||||
self.n_mels = n_mels
|
||||
self.sample_rate = sample_rate
|
||||
self.f_min = f_min
|
||||
self.f_max = f_max if f_max is not None else sample_rate // 2
|
||||
self.max_iter = max_iter
|
||||
self.tolerance_loss = tolerance_loss
|
||||
self.tolerance_change = tolerance_change
|
||||
if sgdargs is None:
|
||||
self.sgdargs = {'sgd_lr': 0.1, 'sgd_momentum': 0.9}
|
||||
else:
|
||||
self.sgdargs = sgdargs
|
||||
self.norm = norm
|
||||
self.mel_type = mel_type
|
||||
|
||||
def parse(self):
|
||||
return cde.InverseMelScaleOperation(self.n_stft, self.n_mels, self.sample_rate, self.f_min, self.f_max,
|
||||
self.max_iter, self.tolerance_loss, self.tolerance_change, self.sgdargs,
|
||||
DE_C_NORM_TYPE[self.norm], DE_C_MEL_TYPE[self.mel_type])
|
||||
|
||||
|
||||
class LFilter(AudioTensorOperation):
|
||||
"""
|
||||
Design two-pole filter for audio waveform of dimension of (..., time).
|
||||
|
|
|
@ -286,6 +286,87 @@ def check_gain(method):
|
|||
return new_method
|
||||
|
||||
|
||||
def check_mel_scale_n_mels(n_mels):
|
||||
"""Wrapper method to check the parameters of n_mels."""
|
||||
type_check(n_mels, (int,), "n_mels")
|
||||
check_pos_int32(n_mels, "n_mels")
|
||||
|
||||
|
||||
def check_mel_scale_sample_rate(sample_rate):
|
||||
"""Wrapper method to check the parameters of sample_rate."""
|
||||
type_check(sample_rate, (int,), "sample_rate")
|
||||
check_pos_int32(sample_rate, "sample_rate")
|
||||
|
||||
|
||||
def check_mel_scale_freq(f_min, f_max, sample_rate):
|
||||
"""Wrapper method to check the parameters of f_min and f_max."""
|
||||
type_check(f_min, (int, float), "f_min")
|
||||
check_float32(f_min, "f_min")
|
||||
|
||||
if f_max is not None:
|
||||
type_check(f_max, (int, float), "f_max")
|
||||
check_pos_float32(f_max, "f_max")
|
||||
if f_min >= f_max:
|
||||
raise ValueError("MelScale: f_max should be greater than f_min.")
|
||||
else:
|
||||
if f_min >= sample_rate // 2:
|
||||
raise ValueError("MelScale: sample_rate // 2 should be greater than f_min when f_max is set to None.")
|
||||
|
||||
|
||||
def check_mel_scale_n_stft(n_stft):
|
||||
"""Wrapper method to check the parameters of n_stft."""
|
||||
type_check(n_stft, (int,), "n_stft")
|
||||
check_pos_int32(n_stft, "n_stft")
|
||||
|
||||
|
||||
def check_mel_scale_norm(norm):
|
||||
"""Wrapper method to check the parameters of norm."""
|
||||
type_check(norm, (NormType,), "norm")
|
||||
|
||||
|
||||
def check_mel_scale_mel_type(mel_type):
|
||||
"""Wrapper method to check the parameters of mel_type."""
|
||||
type_check(mel_type, (MelType,), "mel_type")
|
||||
|
||||
|
||||
def check_inverse_mel_scale(method):
|
||||
"""Wrapper method to check the parameters of InverseMelScale."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[n_stft, n_mels, sample_rate, f_min, f_max, max_iter, tolerance_loss, tolerance_change, sgdargs, norm,
|
||||
mel_type], _ = parse_user_args(method, *args, **kwargs)
|
||||
check_mel_scale_n_mels(n_mels)
|
||||
check_mel_scale_sample_rate(sample_rate)
|
||||
check_mel_scale_freq(f_min, f_max, sample_rate)
|
||||
check_mel_scale_n_stft(n_stft)
|
||||
check_mel_scale_norm(norm)
|
||||
check_mel_scale_mel_type(mel_type)
|
||||
|
||||
type_check(max_iter, (int,), "max_iter")
|
||||
check_pos_int32(max_iter, "max_iter")
|
||||
|
||||
type_check(tolerance_loss, (int, float), "tolerance_loss")
|
||||
check_pos_float32(tolerance_loss, "tolerance_loss")
|
||||
|
||||
type_check(tolerance_change, (int, float), "tolerance_change")
|
||||
check_pos_float32(tolerance_change, "tolerance_change")
|
||||
|
||||
if sgdargs is not None:
|
||||
sgd_lr = sgdargs["sgd_lr"]
|
||||
sgd_momentum = sgdargs["sgd_momentum"]
|
||||
|
||||
type_check(sgd_lr, (int, float), "sgd_lr")
|
||||
check_non_negative_float32(sgd_lr, "sgd_lr")
|
||||
|
||||
type_check(sgd_momentum, (int, float), "sgd_momentum")
|
||||
check_non_negative_float32(sgd_momentum, "sgd_momentum")
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_lfilter(method):
|
||||
"""Wrapper method to check the parameters of LFilter."""
|
||||
|
||||
|
@ -519,31 +600,12 @@ def check_mel_scale(method):
|
|||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[n_mels, sample_rate, f_min, f_max, n_stft, norm, mel_type], _ = parse_user_args(method, *args, **kwargs)
|
||||
|
||||
type_check(n_mels, (int,), "n_mels")
|
||||
check_pos_int32(n_mels, "n_mels")
|
||||
|
||||
type_check(sample_rate, (int,), "sample_rate")
|
||||
check_pos_int32(sample_rate, "sample_rate")
|
||||
|
||||
type_check(f_min, (int, float), "f_min")
|
||||
check_float32(f_min, "f_min")
|
||||
|
||||
if f_max is not None:
|
||||
type_check(f_max, (int, float), "f_max")
|
||||
check_pos_float32(f_max, "f_max")
|
||||
if f_min >= f_max:
|
||||
raise ValueError("MelScale: f_max should be greater than f_min.")
|
||||
else:
|
||||
if f_min >= sample_rate // 2:
|
||||
raise ValueError("MelScale: sample_rate // 2 should be greater than f_min when f_max is set to None.")
|
||||
|
||||
type_check(n_stft, (int,), "n_stft")
|
||||
check_pos_int32(n_stft, "n_stft")
|
||||
|
||||
type_check(norm, (NormType,), "norm")
|
||||
|
||||
type_check(mel_type, (MelType,), "mel_type")
|
||||
check_mel_scale_n_mels(n_mels)
|
||||
check_mel_scale_sample_rate(sample_rate)
|
||||
check_mel_scale_freq(f_min, f_max, sample_rate)
|
||||
check_mel_scale_n_stft(n_stft)
|
||||
check_mel_scale_norm(norm)
|
||||
check_mel_scale_mel_type(mel_type)
|
||||
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
|
|
|
@ -936,6 +936,164 @@ TEST_F(MindDataTestPipeline, TestHighpassBiquadWrongArgs) {
|
|||
EXPECT_EQ(iter02, nullptr);
|
||||
}
|
||||
|
||||
/// Feature: InverseMelScale
|
||||
/// Description: test basic usage of InverseMelScale
|
||||
/// Expectation: get correct number of data
|
||||
TEST_F(MindDataTestPipeline, TestInverseMelScalePipeline) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestInverseMelScalePipeline.";
|
||||
// Original waveform
|
||||
std::shared_ptr<SchemaObj> schema = Schema();
|
||||
ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {4, 3, 7}));
|
||||
std::shared_ptr<Dataset> ds = RandomData(10, schema);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
ds = ds->SetNumWorkers(4);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
auto inverse_mel_scale_op1 = audio::InverseMelScale(20, 3, 16000, 0, 8000, 10);
|
||||
ds = ds->Map({inverse_mel_scale_op1});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(ds, nullptr);
|
||||
std::unordered_map<std::string, mindspore::MSTensor> row;
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
std::vector<int64_t> expected = {4, 20, 7};
|
||||
int i = 0;
|
||||
while (row.size() != 0) {
|
||||
auto col = row["waveform"];
|
||||
ASSERT_EQ(col.Shape(), expected);
|
||||
ASSERT_EQ(col.Shape().size(), 3);
|
||||
ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
i++;
|
||||
}
|
||||
EXPECT_EQ(i, 10);
|
||||
iter->Stop();
|
||||
|
||||
std::shared_ptr<SchemaObj> schema2 = Schema();
|
||||
ASSERT_OK(schema2->add_column("waveform", mindspore::DataType::kNumberTypeFloat64, {10, 20, 30}));
|
||||
ds = RandomData(10, schema2);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
auto inverse_mel_scale_op2 = audio::InverseMelScale(128, 20, 16000, 0, 8000, 100);
|
||||
ds = ds->Map({inverse_mel_scale_op2});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
iter = ds->CreateIterator();
|
||||
EXPECT_NE(ds, nullptr);
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
expected = {10, 128, 30};
|
||||
i = 0;
|
||||
while (row.size() != 0) {
|
||||
auto col = row["waveform"];
|
||||
ASSERT_EQ(col.Shape(), expected);
|
||||
ASSERT_EQ(col.Shape().size(), 3);
|
||||
ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat64);
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
i++;
|
||||
}
|
||||
EXPECT_EQ(i, 10);
|
||||
iter->Stop();
|
||||
|
||||
std::shared_ptr<SchemaObj> schema3 = Schema();
|
||||
ASSERT_OK(schema3->add_column("waveform", mindspore::DataType::kNumberTypeInt16, {3, 4, 5}));
|
||||
ds = RandomData(10, schema3);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
auto inverse_mel_scale_op3 = audio::InverseMelScale(128, 4, 16000, 0, 8000, 100);
|
||||
ds = ds->Map({inverse_mel_scale_op3});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
iter = ds->CreateIterator();
|
||||
EXPECT_NE(ds, nullptr);
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
expected = {3, 128, 5};
|
||||
i = 0;
|
||||
while (row.size() != 0) {
|
||||
auto col = row["waveform"];
|
||||
ASSERT_EQ(col.Shape(), expected);
|
||||
ASSERT_EQ(col.Shape().size(), 3);
|
||||
ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
i++;
|
||||
}
|
||||
EXPECT_EQ(i, 10);
|
||||
iter->Stop();
|
||||
|
||||
std::shared_ptr<SchemaObj> schema4 = Schema();
|
||||
ASSERT_OK(schema4->add_column("waveform", mindspore::DataType::kNumberTypeInt16, {4, 20}));
|
||||
ds = RandomData(10, schema4);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
auto inverse_mel_scale_op4 = audio::InverseMelScale(20, 4, 16000, 0, 8000, 100);
|
||||
ds = ds->Map({inverse_mel_scale_op4});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
iter = ds->CreateIterator();
|
||||
EXPECT_NE(ds, nullptr);
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
expected = {1, 20, 20};
|
||||
i = 0;
|
||||
while (row.size() != 0) {
|
||||
auto col = row["waveform"];
|
||||
ASSERT_EQ(col.Shape(), expected);
|
||||
ASSERT_EQ(col.Shape().size(), 3);
|
||||
ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
i++;
|
||||
}
|
||||
EXPECT_EQ(i, 10);
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
/// Feature: InverseMelScale
|
||||
/// Description: test WrongArg of InverseMelScale
|
||||
/// Expectation: return error
|
||||
TEST_F(MindDataTestPipeline, TestInverseMelScaleWrongArgs) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestInverseMelScaleWrongArgs.";
|
||||
// MelScale: f_max must be greater than f_min.
|
||||
std::shared_ptr<SchemaObj> schema = Schema();
|
||||
ASSERT_OK(schema->add_column("waveform", mindspore::DataType::kNumberTypeFloat32, {3, 4, 5}));
|
||||
std::shared_ptr<Dataset> ds = RandomData(50, schema);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
ds = ds->SetNumWorkers(4);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
auto inverse_mel_scale_op = audio::InverseMelScale(128, 4, 1000, -100, -100);
|
||||
ds = ds->Map({inverse_mel_scale_op});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
|
||||
// MelScale: n_mels must be greater than 0.
|
||||
inverse_mel_scale_op = audio::InverseMelScale(-128, 16000, 1000, 10, 100);
|
||||
ds = ds->Map({inverse_mel_scale_op});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
iter = ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
|
||||
// MelScale: sample_rate must be greater than f_min.
|
||||
inverse_mel_scale_op = audio::InverseMelScale(128, -16000, 1000, 10, 100);
|
||||
ds = ds->Map({inverse_mel_scale_op});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
iter = ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
|
||||
// MelScale: max_iter must be greater than 0.
|
||||
inverse_mel_scale_op = audio::InverseMelScale(128, 16000, 1000, 10, 100, -10);
|
||||
ds = ds->Map({inverse_mel_scale_op});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
iter = ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
|
||||
// MelScale: tolerance_loss must be greater than 0.
|
||||
inverse_mel_scale_op = audio::InverseMelScale(128, 16000, 1000, 10, 100, 10, -10);
|
||||
ds = ds->Map({inverse_mel_scale_op});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
iter = ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
|
||||
// MelScale: tolerance_change must be greater than 0.
|
||||
inverse_mel_scale_op = audio::InverseMelScale(128, 16000, 1000, 10, 100, 10, 10, -10);
|
||||
ds = ds->Map({inverse_mel_scale_op});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
iter = ds->CreateIterator();
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
/// Feature: MelscaleFbanks.
|
||||
/// Description: Test normal operation.
|
||||
/// Expectation: As expected.
|
||||
|
|
|
@ -1000,6 +1000,29 @@ TEST_F(MindDataTestExecute, TestHighpassBiquadParamCheckSampleRate) {
|
|||
ASSERT_FALSE(rc.IsOk());
|
||||
}
|
||||
|
||||
// Feature: InverseMelScale
|
||||
// Description: test InverseMelScale in eager mode
|
||||
// Expectation: the data is processed successfully
|
||||
TEST_F(MindDataTestExecute, TestInverseMelScale) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestExecute-TestInverseMelScale.";
|
||||
// Original waveform
|
||||
std::vector<float> labels = {
|
||||
2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
|
||||
1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
|
||||
1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
|
||||
1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
|
||||
1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03,
|
||||
1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
|
||||
std::shared_ptr<Tensor> input;
|
||||
ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 2, 3, 2}), &input));
|
||||
auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
|
||||
std::shared_ptr<TensorTransform> inverse_mel_op = std::make_shared<audio::InverseMelScale>(20, 3, 16000, 0, 8000, 10);
|
||||
// apply inverse mel scale
|
||||
mindspore::dataset::Execute trans({inverse_mel_op});
|
||||
Status status = trans(input_ms, &input_ms);
|
||||
EXPECT_TRUE(status.IsOk());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestExecute, TestMuLawDecodingEager) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestExecute-TestMuLawDecodingEager.";
|
||||
// testing
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,155 @@
|
|||
# Copyright 2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Testing InverseMelScale op in DE
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.dataset.audio.transforms as c_audio
|
||||
from mindspore import log as logger
|
||||
from mindspore.dataset.audio.utils import MelType, NormType
|
||||
|
||||
DATA_DIR = "../data/dataset/audiorecord/"
|
||||
|
||||
|
||||
def get_ratio(mat):
|
||||
return mat.sum() / mat.size
|
||||
|
||||
|
||||
def test_inverse_mel_scale_pipeline():
|
||||
"""
|
||||
Feature: InverseMelScale
|
||||
Description: test InverseMelScale cpp op in pipeline
|
||||
Expectation: equal results from Mindspore and benchmark
|
||||
"""
|
||||
in_data = np.load(DATA_DIR + "inverse_mel_scale_8x40.npy")[np.newaxis, :]
|
||||
out_expect = np.load(DATA_DIR + 'inverse_mel_scale_20x40_out.npy')[np.newaxis, :]
|
||||
dataset = ds.NumpySlicesDataset(in_data, column_names=["multi_dimensional_data"], shuffle=False)
|
||||
transforms = [c_audio.InverseMelScale(n_stft=20, n_mels=8, sample_rate=8000,
|
||||
sgdargs={'sgd_lr': 0.05, 'sgd_momentum': 0.9})]
|
||||
dataset = dataset.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
out_data = item["multi_dimensional_data"]
|
||||
epsilon = 1e-60
|
||||
relative_diff = np.abs((out_data - out_expect) / (out_expect + epsilon))
|
||||
assert get_ratio(relative_diff < 1e-1) > 1e-2
|
||||
|
||||
in_data = np.load(DATA_DIR + "inverse_mel_scale_4x80.npy")[np.newaxis, :]
|
||||
out_expect = np.load(DATA_DIR + 'inverse_mel_scale_40x80_out.npy')[np.newaxis, :]
|
||||
dataset = ds.NumpySlicesDataset(in_data, column_names=["multi_dimensional_data"], shuffle=False)
|
||||
transforms = [c_audio.InverseMelScale(n_stft=40, n_mels=4,
|
||||
sgdargs={'sgd_lr': 0.01, 'sgd_momentum': 0.9})]
|
||||
dataset = dataset.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
out_data = item["multi_dimensional_data"]
|
||||
epsilon = 1e-60
|
||||
relative_diff = np.abs((out_data - out_expect) / (out_expect + epsilon))
|
||||
assert get_ratio(relative_diff < 1e-1) > 1e-2
|
||||
|
||||
in_data = np.load(DATA_DIR + "inverse_mel_scale_4x160.npy")[np.newaxis, :]
|
||||
out_expect = np.load(DATA_DIR + 'inverse_mel_scale_40x160_out.npy')[np.newaxis, :]
|
||||
dataset = ds.NumpySlicesDataset(in_data, column_names=["multi_dimensional_data"], shuffle=False)
|
||||
transforms = [c_audio.InverseMelScale(n_stft=40, n_mels=4, f_min=10,
|
||||
sgdargs={'sgd_lr': 0.1, 'sgd_momentum': 0.8})]
|
||||
dataset = dataset.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
for item in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
out_data = item["multi_dimensional_data"]
|
||||
epsilon = 1e-60
|
||||
relative_diff = np.abs((out_data - out_expect) / (out_expect + epsilon))
|
||||
assert get_ratio(relative_diff < 1e-1) > 1e-2
|
||||
|
||||
|
||||
def test_inverse_mel_scale_pipeline_invalid_param():
|
||||
"""
|
||||
Feature: InverseMelScale
|
||||
Description: test InverseMelScale with invalid input parameters
|
||||
Expectation: throw ValueError or TypeError
|
||||
"""
|
||||
logger.info("test InverseMelScale op with default values")
|
||||
in_data = np.load(DATA_DIR + "inverse_mel_scale_32x81.npy")[np.newaxis, :]
|
||||
data1 = ds.GeneratorDataset(in_data, column_names=["multi_dimensional_data"])
|
||||
# f_min and f_max
|
||||
with pytest.raises(ValueError,
|
||||
match="MelScale: f_max should be greater than f_min."):
|
||||
transforms = [c_audio.InverseMelScale(n_mels=20, n_stft=128, sample_rate=16200, f_min=1000, f_max=1000)]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
_ = item["multi_dimensional_data"]
|
||||
# n_mel
|
||||
with pytest.raises(ValueError, match=r"Input n_mels is not within the required interval of \[1, 2147483647\]."):
|
||||
transforms = [c_audio.InverseMelScale(n_mels=-1, n_stft=2000, sample_rate=16200, f_min=10, f_max=1000)]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
# sample_rate
|
||||
with pytest.raises(ValueError,
|
||||
match=r"Input sample_rate is not within the required interval of \[1, 2147483647\]."):
|
||||
transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=0, f_min=10, f_max=1000)]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
# f_max
|
||||
with pytest.raises(ValueError, match=r"Input f_max is not within the required interval of \(0, 16777216\]."):
|
||||
transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10, f_max=-10)]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
# norm
|
||||
with pytest.raises(TypeError, match=r"Argument norm with value slaney is not of type \[<enum 'NormType'>\], " +
|
||||
"but got <class 'str'>."):
|
||||
transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10,
|
||||
f_max=1000, norm="slaney", mel_type=MelType.SLANEY)]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
# mel_type
|
||||
with pytest.raises(TypeError, match=r"Argument mel_type with value SLANEY is not of type \[<enum 'MelType'>\], " +
|
||||
"but got <class 'str'>."):
|
||||
transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10, f_max=1000,
|
||||
norm=NormType.NONE, mel_type="SLANEY")]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
# max_iter
|
||||
with pytest.raises(ValueError, match=r"Input max_iter is not within the required interval of \[1, 2147483647\]."):
|
||||
transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10, f_max=1000,
|
||||
norm=NormType.NONE, mel_type=MelType.SLANEY, max_iter=-10)]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
# tolerance_loss
|
||||
with pytest.raises(ValueError,
|
||||
match=r"Input tolerance_loss is not within the required interval of \(0, 16777216\]."):
|
||||
transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10, f_max=1000,
|
||||
norm=NormType.NONE, mel_type=MelType.SLANEY, tolerance_loss=-10)]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
# tolerance_change
|
||||
with pytest.raises(ValueError,
|
||||
match=r"Input tolerance_change is not within the required interval of \(0, 16777216\]."):
|
||||
transforms = [c_audio.InverseMelScale(n_mels=128, n_stft=2000, sample_rate=16200, f_min=10, f_max=1000,
|
||||
norm=NormType.NONE, mel_type=MelType.SLANEY, tolerance_change=-10)]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
|
||||
|
||||
def test_inverse_mel_scale_eager():
|
||||
"""
|
||||
Feature: InverseMelScale
|
||||
Description: test InverseMelScale cpp op with eager mode
|
||||
Expectation: equal results from Mindspore and benchmark
|
||||
"""
|
||||
spectrogram = np.load(DATA_DIR + 'inverse_mel_scale_32x81.npy')
|
||||
out_ms = c_audio.InverseMelScale(n_stft=80, n_mels=32)(spectrogram)
|
||||
out_expect = np.load(DATA_DIR + 'inverse_mel_scale_80x81_out.npy')
|
||||
|
||||
epsilon = 1e-60
|
||||
relative_diff = np.abs((out_ms - out_expect) / (out_expect + epsilon))
|
||||
assert get_ratio(relative_diff < 1e-1) > 1e-2
|
||||
assert get_ratio(relative_diff < 1e-3) > 1e-3
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_inverse_mel_scale_pipeline()
|
||||
test_inverse_mel_scale_pipeline_invalid_param()
|
||||
test_inverse_mel_scale_eager()
|
Loading…
Reference in New Issue