forked from mindspore-Ecosystem/mindspore
!22893 [assistant][ops] Add new data operator CreateFbMatrix
Merge pull request !22893 from TR-nbu/CreateFbMatrix
This commit is contained in:
commit
19a9ad6f38
|
@ -464,6 +464,30 @@ Magphase::Magphase(float power) : data_(std::make_shared<Data>(power)) {}
|
|||
|
||||
std::shared_ptr<TensorOperation> Magphase::Parse() { return std::make_shared<MagphaseOperation>(data_->power_); }
|
||||
|
||||
// MelscaleFbanks Function.
|
||||
Status MelscaleFbanks(MSTensor *output, int32_t n_freqs, float f_min, float f_max, int32_t n_mels, int32_t sample_rate,
|
||||
NormType norm, MelType mel_type) {
|
||||
RETURN_UNEXPECTED_IF_NULL(output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(n_freqs > 0,
|
||||
"MelscaleFbanks: n_freqs must be greater than 0, got: " + std::to_string(n_freqs));
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(f_min >= 0, "MelscaleFbanks: f_min must be non negative, got: " + std::to_string(f_min));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(f_max > 0,
|
||||
"MelscaleFbanks: f_max must be greater than 0, got: " + std::to_string(f_max));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(n_mels > 0,
|
||||
"MelscaleFbanks: n_mels must be greater than 0, got: " + std::to_string(n_mels));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
sample_rate > 0, "MelscaleFbanks: sample_rate must be greater than 0, got: " + std::to_string(sample_rate));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(f_max > f_min, "MelscaleFbanks: f_max must be greater than f_min, got: f_min = " +
|
||||
std::to_string(f_min) + ", while f_max = " + std::to_string(f_max));
|
||||
std::shared_ptr<dataset::Tensor> fb;
|
||||
RETURN_IF_NOT_OK(CreateFbanks(&fb, n_freqs, f_min, f_max, n_mels, sample_rate, norm, mel_type));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(fb->HasData(),
|
||||
"MelscaleFbanks: get an empty tensor with shape " + fb->shape().ToString());
|
||||
*output = mindspore::MSTensor(std::make_shared<DETensor>(fb));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// MuLawDecoding Transform Operation.
|
||||
struct MuLawDecoding::Data {
|
||||
explicit Data(int32_t quantization_channels) : quantization_channels_(quantization_channels) {}
|
||||
|
|
|
@ -30,6 +30,30 @@ PYBIND_REGISTER(CreateDct, 1, ([](py::module *m) {
|
|||
}));
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(MelscaleFbanks, 1, ([](py::module *m) {
|
||||
(void)m->def(
|
||||
"MelscaleFbanks", ([](int32_t n_freqs, float f_min, float f_max, int32_t n_mels,
|
||||
int32_t sample_rate, NormType norm, MelType mel_type) {
|
||||
std::shared_ptr<Tensor> fb;
|
||||
THROW_IF_ERROR(CreateFbanks(&fb, n_freqs, f_min, f_max, n_mels, sample_rate, norm, mel_type));
|
||||
return fb;
|
||||
}));
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(MelType, 0, ([](const py::module *m) {
|
||||
(void)py::enum_<MelType>(*m, "MelType", py::arithmetic())
|
||||
.value("DE_MELTYPE_HTK", MelType::kHtk)
|
||||
.value("DE_MELTYPE_SLANEY", MelType::kSlaney)
|
||||
.export_values();
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(NormType, 0, ([](const py::module *m) {
|
||||
(void)py::enum_<NormType>(*m, "NormType", py::arithmetic())
|
||||
.value("DE_NORMTYPE_NONE", NormType::kNone)
|
||||
.value("DE_NORMTYPE_SLANEY", NormType::kSlaney)
|
||||
.export_values();
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(NormMode, 0, ([](const py::module *m) {
|
||||
(void)py::enum_<NormMode>(*m, "NormMode", py::arithmetic())
|
||||
.value("DE_NORMMODE_NONE", NormMode::kNone)
|
||||
|
|
|
@ -222,6 +222,170 @@ Status Phase(const std::shared_ptr<Tensor> &angle_0, const std::shared_ptr<Tenso
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status CreateTriangularFilterbank(std::shared_ptr<Tensor> *output, const std::shared_ptr<Tensor> &all_freqs,
|
||||
const std::shared_ptr<Tensor> &f_pts) {
|
||||
// calculate the difference between each mel point and each stft freq point in hertz.
|
||||
std::vector<T> f_diff;
|
||||
auto iter_fpts1 = f_pts->begin<T>();
|
||||
auto iter_fpts2 = f_pts->begin<T>();
|
||||
++iter_fpts2;
|
||||
for (size_t i = 1; i < f_pts->Size(); i++) {
|
||||
f_diff.push_back(*iter_fpts2 - *iter_fpts1);
|
||||
++iter_fpts2;
|
||||
++iter_fpts1;
|
||||
}
|
||||
|
||||
std::vector<T> slopes;
|
||||
TensorShape slopes_shape({all_freqs->Size(), f_pts->Size()});
|
||||
auto iter_all_freq = all_freqs->begin<T>();
|
||||
for (; iter_all_freq != all_freqs->end<T>(); ++iter_all_freq) {
|
||||
auto iter_f_pts = f_pts->begin<T>();
|
||||
for (; iter_f_pts != f_pts->end<T>(); ++iter_f_pts) {
|
||||
slopes.push_back(*iter_f_pts - *iter_all_freq);
|
||||
}
|
||||
}
|
||||
|
||||
// calculate up and down slopes for creating overlapping triangles.
|
||||
std::vector<T> down_slopes;
|
||||
TensorShape down_slopes_shape({all_freqs->Size(), f_pts->Size() - 2});
|
||||
for (size_t row = 0; row < down_slopes_shape[0]; row++)
|
||||
for (size_t col = 0; col < down_slopes_shape[1]; col++) {
|
||||
down_slopes.push_back(-slopes[col + row * f_pts->Size()] / f_diff[col]);
|
||||
}
|
||||
std::vector<T> up_slopes;
|
||||
TensorShape up_slopes_shape({all_freqs->Size(), f_pts->Size() - 2});
|
||||
for (size_t row = 0; row < up_slopes_shape[0]; row++)
|
||||
for (size_t col = 2; col < f_pts->Size(); col++) {
|
||||
up_slopes.push_back(slopes[col + row * f_pts->Size()] / f_diff[col - 1]);
|
||||
}
|
||||
|
||||
// clip the value of triangles and save into fb.
|
||||
std::vector<T> fb;
|
||||
TensorShape fb_shape({all_freqs->Size(), f_pts->Size() - 2});
|
||||
for (size_t i = 0; i < down_slopes.size(); i++) {
|
||||
fb.push_back(std::max(0.0f, std::min(down_slopes[i], up_slopes[i])));
|
||||
}
|
||||
|
||||
std::shared_ptr<Tensor> fb_tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(fb, fb_shape, &fb_tensor));
|
||||
*output = fb_tensor;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status CreateFbanks(std::shared_ptr<Tensor> *output, int32_t n_freqs, float f_min, float f_max, int32_t n_mels,
|
||||
int32_t sample_rate, NormType norm, MelType mel_type) {
|
||||
// min_log_hz, min_log_mel, logstep and f_sp are the const of the mel value equation.
|
||||
const double min_log_hz = 1000.0;
|
||||
const double min_log_mel = 1000 / (200.0 / 3);
|
||||
const double logstep = log2(6.4) / 27.0;
|
||||
const double f_sp = 200.0 / 3;
|
||||
|
||||
// hez_to_mel_c and mel_to_hz_c are the const coefficient of mel frequency cepstrum.
|
||||
const double hz_to_mel_c = 2595.0;
|
||||
const double mel_to_hz_c = 700.0;
|
||||
|
||||
// all_freqs is equivalent filterbank construction.
|
||||
std::shared_ptr<Tensor> all_freqs;
|
||||
// the sampling frequency is at least twice the highest frequency of the signal.
|
||||
const double signal_times = 2;
|
||||
RETURN_IF_NOT_OK(Linspace<float>(&all_freqs, 0, sample_rate / signal_times, n_freqs));
|
||||
|
||||
// calculate mel value by f_min and f_max.
|
||||
double m_min = 0.0;
|
||||
double m_max = 0.0;
|
||||
if (mel_type == MelType::kHtk) {
|
||||
m_min = hz_to_mel_c * log10(1.0 + (f_min / mel_to_hz_c));
|
||||
m_max = hz_to_mel_c * log10(1.0 + (f_max / mel_to_hz_c));
|
||||
} else {
|
||||
m_min = (f_min - 0.0) / f_sp;
|
||||
m_max = (f_max - 0.0) / f_sp;
|
||||
if (m_min >= min_log_hz) {
|
||||
m_min = min_log_mel + log2(f_min / min_log_hz) / logstep;
|
||||
}
|
||||
if (m_max >= min_log_hz) {
|
||||
m_max = min_log_mel + log2(f_max / min_log_hz) / logstep;
|
||||
}
|
||||
}
|
||||
|
||||
// m_pts is mel value sequence in linspace of (m_min, m_max).
|
||||
std::shared_ptr<Tensor> m_pts;
|
||||
const int32_t bias = 2;
|
||||
RETURN_IF_NOT_OK(Linspace<float>(&m_pts, m_min, m_max, n_mels + bias));
|
||||
|
||||
// f_pts saves hertz(mel) though 700.0 * (10.0 **(mel/ 2595.0) - 1.).
|
||||
std::shared_ptr<Tensor> f_pts;
|
||||
const double htk_mel_c = 10.0;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(m_pts->shape(), DataType(DataType::DE_FLOAT32), &f_pts));
|
||||
|
||||
if (mel_type == MelType::kHtk) {
|
||||
auto iter_f = f_pts->begin<float>();
|
||||
auto iter_m = m_pts->begin<float>();
|
||||
for (; iter_m != m_pts->end<float>(); ++iter_m) {
|
||||
*iter_f = mel_to_hz_c * (pow(htk_mel_c, *iter_m / hz_to_mel_c) - 1.0);
|
||||
++iter_f;
|
||||
}
|
||||
} else {
|
||||
auto iter_f = f_pts->begin<float>();
|
||||
auto iter_m = m_pts->begin<float>();
|
||||
for (; iter_m != m_pts->end<float>(); iter_m++, iter_f++) {
|
||||
*iter_f = f_sp * (*iter_m);
|
||||
}
|
||||
iter_f = f_pts->begin<float>();
|
||||
iter_m = m_pts->begin<float>();
|
||||
for (; iter_m != m_pts->end<float>(); iter_m++, iter_f++) {
|
||||
if (*iter_m >= min_log_mel) {
|
||||
*iter_f = min_log_hz * exp(logstep * (*iter_m - min_log_mel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create filterbank
|
||||
TensorShape fb_shape({all_freqs->Size(), f_pts->Size() - 2});
|
||||
std::shared_ptr<Tensor> fb;
|
||||
RETURN_IF_NOT_OK(CreateTriangularFilterbank<float>(&fb, all_freqs, f_pts));
|
||||
|
||||
// normalize with Slaney
|
||||
std::vector<float> enorm;
|
||||
if (norm == NormType::kSlaney) {
|
||||
auto iter_f_pts_0 = f_pts->begin<float>();
|
||||
auto iter_f_pts_2 = f_pts->begin<float>();
|
||||
iter_f_pts_2++;
|
||||
iter_f_pts_2++;
|
||||
for (; iter_f_pts_2 != f_pts->end<float>(); iter_f_pts_0++, iter_f_pts_2++) {
|
||||
enorm.push_back(2.0f / (*iter_f_pts_2 - *iter_f_pts_0));
|
||||
}
|
||||
auto iter_fb = fb->begin<float>();
|
||||
for (size_t row = 0; row < fb_shape[0]; row++) {
|
||||
for (size_t col = 0; col < fb_shape[1]; col++) {
|
||||
*iter_fb = (*iter_fb) * enorm[col];
|
||||
iter_fb++;
|
||||
}
|
||||
}
|
||||
enorm.clear();
|
||||
}
|
||||
|
||||
// anomaly detection.
|
||||
auto iter_fb = fb->begin<float>();
|
||||
std::vector<float> max_val(fb_shape[1], 0);
|
||||
for (size_t row = 0; row < fb_shape[0]; row++) {
|
||||
for (size_t col = 0; col < fb_shape[1]; col++) {
|
||||
max_val[col] = std::max(max_val[col], *iter_fb);
|
||||
iter_fb++;
|
||||
}
|
||||
}
|
||||
for (size_t col = 0; col < fb_shape[1]; col++) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
max_val[col] >= 1e-8,
|
||||
"MelscaleFbanks: at least one mel filterbank is all zeros, check if the value for 'n_mels' " +
|
||||
std::to_string(n_mels) + " is set too high or the value for 'n_freqs' " + std::to_string(n_freqs) +
|
||||
" is set too low.");
|
||||
}
|
||||
|
||||
*output = fb;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
/// \brief Calculate magnitude.
|
||||
/// \param[in] alphas - The alphas.
|
||||
/// \param[in] abs_0 - The norm.
|
||||
|
|
|
@ -380,6 +380,19 @@ Status RandomMaskAlongAxis(const std::shared_ptr<Tensor> &input, std::shared_ptr
|
|||
Status MaskAlongAxis(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t mask_width,
|
||||
int32_t mask_start, float mask_value, int32_t axis);
|
||||
|
||||
/// \brief Create a frequency transformation matrix with shape (n_freqs, n_mels).
|
||||
/// \param output Tensor of the frequency transformation matrix.
|
||||
/// \param n_freqs: Number of frequency.
|
||||
/// \param f_min: Minimum of frequency in Hz.
|
||||
/// \param f_max: Maximum of frequency in Hz.
|
||||
/// \param n_mels: Number of mel filterbanks.
|
||||
/// \param sample_rate: Sample rate.
|
||||
/// \param norm: Norm to use, can be NormTyppe::kSlaney or NormTyppe::kNone.
|
||||
/// \param mel_type: Scale to use, can be MelTyppe::kSlaney or MelTyppe::kHtk.
|
||||
/// \return Status code.
|
||||
Status CreateFbanks(std::shared_ptr<Tensor> *output, int32_t n_freqs, float f_min, float f_max, int32_t n_mels,
|
||||
int32_t sample_rate, NormType norm, MelType mel_type);
|
||||
|
||||
/// \brief Create a DCT transformation matrix with shape (n_mels, n_mfcc), normalized depending on norm.
|
||||
/// \param n_mfcc: Number of mfc coefficients to retain, the value must be greater than 0.
|
||||
/// \param n_mels: Number of mel filterbanks, the value must be greater than 0.
|
||||
|
|
|
@ -616,6 +616,19 @@ class MS_API Magphase final : public TensorTransform {
|
|||
std::shared_ptr<Data> data_;
|
||||
};
|
||||
|
||||
/// \brief Create a frequency transformation matrix with shape (n_freqs, n_mels).
|
||||
/// \param[in] output Tensor of the frequency transformation matrix.
|
||||
/// \param[in] n_freqs Number of frequencies to highlight/apply.
|
||||
/// \param[in] f_min Minimum frequency (Hz).
|
||||
/// \param[in] f_max Maximum frequency (Hz).
|
||||
/// \param[in] n_mels Number of mel filterbanks.
|
||||
/// \param[in] sample_rate Sample rate of the audio waveform.
|
||||
/// \param[in] norm Norm to use, can be NormType::kNone or NormType::kSlaney (Default: NormType::kNone).
|
||||
/// \param[in] mel_type Scale to use, can be MelType::kHtk or MelType::kSlaney (Default: MelType::kHtz).
|
||||
/// \return Status code.
|
||||
Status MS_API MelscaleFbanks(MSTensor *output, int32_t n_freqs, float f_min, float f_max, int32_t n_mels,
|
||||
int32_t sample_rate, NormType norm = NormType::kNone, MelType mel_type = MelType::kHtk);
|
||||
|
||||
/// \brief MuLawDecoding TensorTransform.
|
||||
/// \note Decode mu-law encoded signal.
|
||||
class MS_API MuLawDecoding final : public TensorTransform {
|
||||
|
|
|
@ -84,6 +84,12 @@ enum class MS_API NormMode {
|
|||
kOrtho = 1 ///< Ortho type norm.
|
||||
};
|
||||
|
||||
/// \brief Possible options for norm in MelscaleFbanks.
|
||||
enum class MS_API NormType {
|
||||
kNone = 0, ///< None type norm.
|
||||
kSlaney = 1, ///< Slaney type norm.
|
||||
};
|
||||
|
||||
/// \brief The mode for manual offload.
|
||||
enum class MS_API ManualOffloadMode {
|
||||
kUnspecified, ///< Not set, will use auto_offload setting instead.
|
||||
|
@ -98,6 +104,12 @@ enum class MS_API MapTargetDevice {
|
|||
kAscend310 ///< Ascend310 Device.
|
||||
};
|
||||
|
||||
/// \brief Possible options for mel_type in MelscaleFbanks.
|
||||
enum class MS_API MelType {
|
||||
kHtk = 0, ///< Htk scale type.
|
||||
kSlaney = 1, ///< Slaney scale type.
|
||||
};
|
||||
|
||||
/// \brief The initial type of tensor implementation.
|
||||
enum class MS_API TensorImpl {
|
||||
kNone, ///< None type tensor.
|
||||
|
|
|
@ -18,7 +18,8 @@ Enum for audio ops.
|
|||
from enum import Enum
|
||||
|
||||
import mindspore._c_dataengine as cde
|
||||
|
||||
from mindspore.dataset.core.validator_helpers import check_non_negative_float32, check_non_negative_int32, check_pos_float32, check_pos_int32, \
|
||||
type_check
|
||||
|
||||
class DensityFunction(str, Enum):
|
||||
"""
|
||||
|
@ -110,6 +111,84 @@ class ScaleType(str, Enum):
|
|||
MAGNITUDE: str = "magnitude"
|
||||
|
||||
|
||||
class NormType(str, Enum):
|
||||
"""
|
||||
Norm Types.
|
||||
|
||||
Possible enumeration values are: NormType.NONE, NormType.SLANEY.
|
||||
|
||||
- NormType.NONE: norm the input data with none.
|
||||
- NormType.SLANEY: norm the input data with slaney.
|
||||
"""
|
||||
NONE: str = "none"
|
||||
SLANEY: str = "slaney"
|
||||
|
||||
|
||||
DE_C_NORMTYPE_TYPE = {NormType.NONE: cde.NormType.DE_NORMTYPE_NONE,
|
||||
NormType.SLANEY: cde.NormType.DE_NORMTYPE_SLANEY}
|
||||
|
||||
|
||||
class MelType(str, Enum):
|
||||
"""
|
||||
Mel Types.
|
||||
|
||||
Possible enumeration values are: MelType.HTK, MelType.SLANEY.
|
||||
|
||||
- MelType.NONE: scale the input data with htk.
|
||||
- MelType.ORTHO: scale the input data with slaney.
|
||||
"""
|
||||
HTK: str = "htk"
|
||||
SLANEY: str = "slaney"
|
||||
|
||||
|
||||
DE_C_MELTYPE_TYPE = {MelType.HTK: cde.MelType.DE_MELTYPE_HTK,
|
||||
MelType.SLANEY: cde.MelType.DE_MELTYPE_SLANEY}
|
||||
|
||||
|
||||
def melscale_fbanks(n_freqs, f_min, f_max, n_mels, sample_rate, norm=NormType.NONE, mel_type=MelType.HTK):
|
||||
"""
|
||||
Create a frequency transformation matrix with shape (n_freqs, n_mels).
|
||||
|
||||
Args:
|
||||
n_freqs (int): Number of frequency.
|
||||
f_min (float): Minimum of frequency in Hz.
|
||||
f_max (float): Maximum of frequency in Hz.
|
||||
n_mels (int): Number of mel filterbanks.
|
||||
sample_rate (int): Sample rate.
|
||||
norm (NormType, optional): Norm to use, can be NormType.NONE or NormType.SLANEY (Default: NormType.NONE).
|
||||
mel_type (MelType, optional): Scale to use, can be MelType.HTK or MelType.SLANEY (Default: NormType.SLANEY).
|
||||
|
||||
Returns:
|
||||
numpy.ndarray, the frequency transformation matrix.
|
||||
|
||||
Examples:
|
||||
>>> melscale_fbanks = audio.melscale_fbanks(n_freqs=4096, f_min=0, f_max=8000, n_mels=40, sample_rate=16000)
|
||||
"""
|
||||
|
||||
type_check(n_freqs, (int,), "n_freqs")
|
||||
check_non_negative_int32(n_freqs, "n_freqs")
|
||||
|
||||
type_check(f_min, (int, float,), "f_min")
|
||||
check_non_negative_float32(f_min, "f_min")
|
||||
|
||||
type_check(f_max, (int, float,), "f_max")
|
||||
check_pos_float32(f_max, "f_max")
|
||||
if f_min > f_max:
|
||||
raise ValueError(
|
||||
"Input f_min should be no more than f_max, but got f_min: {0} and f_max: {1}.".format(f_min, f_max))
|
||||
|
||||
type_check(n_mels, (int,), "n_mels")
|
||||
check_pos_int32(n_mels, "n_mels")
|
||||
|
||||
type_check(sample_rate, (int,), "sample_rate")
|
||||
check_pos_int32(sample_rate, "sample_rate")
|
||||
|
||||
type_check(norm, (NormType,), "norm")
|
||||
type_check(mel_type, (MelType,), "mel_type")
|
||||
return cde.MelscaleFbanks(n_freqs, f_min, f_max, n_mels, sample_rate, DE_C_NORMTYPE_TYPE[norm],
|
||||
DE_C_MELTYPE_TYPE[mel_type]).as_array()
|
||||
|
||||
|
||||
class NormMode(str, Enum):
|
||||
"""
|
||||
Norm Types.
|
||||
|
|
|
@ -936,6 +936,34 @@ TEST_F(MindDataTestPipeline, TestHighpassBiquadWrongArgs) {
|
|||
EXPECT_EQ(iter02, nullptr);
|
||||
}
|
||||
|
||||
/// Feature: MelscaleFbanks.
|
||||
/// Description: Test normal operation.
|
||||
/// Expectation: As expected.
|
||||
TEST_F(MindDataTestPipeline, TestMelscaleFbanksNormal) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-MelscaleFbanksNormal.";
|
||||
mindspore::MSTensor output;
|
||||
NormType norm = NormType::kSlaney;
|
||||
MelType mel_type = MelType::kHtk;
|
||||
Status s01 = audio::MelscaleFbanks(&output, 1024, 0, 1000, 40, 16000, norm, mel_type);
|
||||
EXPECT_TRUE(s01.IsOk());
|
||||
}
|
||||
|
||||
/// Feature: MelscaleFbanks.
|
||||
/// Description: Test operation with invalid input.
|
||||
/// Expectation: Throw exception as expected.
|
||||
TEST_F(MindDataTestPipeline, TestMelscaleFbanksWithInvalidInput) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMelscaleFbanksWithInvalidInput.";
|
||||
mindspore::MSTensor output;
|
||||
MS_LOG(INFO) << "n_freqs is too low.";
|
||||
NormType norm = NormType::kNone;
|
||||
MelType mel_type = MelType::kHtk;
|
||||
Status s01 = audio::MelscaleFbanks(&output, 1, 50, 1000, 20, 16000, norm, mel_type);
|
||||
EXPECT_FALSE(s01.IsOk());
|
||||
MS_LOG(INFO) << "n_mels is too high.";
|
||||
Status s02 = audio::MelscaleFbanks(&output, 100, 50, 1000, 40, 16000, norm, mel_type);
|
||||
EXPECT_FALSE(s02.IsOk());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestMuLawDecodingBasic) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMuLawDecodingBasic.";
|
||||
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
# Copyright 2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.dataset.audio.utils as audio
|
||||
from mindspore import log as logger
|
||||
|
||||
|
||||
def count_unequal_element(data_expected, data_me, rtol, atol):
|
||||
assert data_expected.shape == data_me.shape
|
||||
total_count = len(data_expected.flatten())
|
||||
error = np.abs(data_expected - data_me)
|
||||
greater = np.greater(error, atol + np.abs(data_expected) * rtol)
|
||||
loss_count = np.count_nonzero(greater)
|
||||
assert (loss_count / total_count) < rtol, \
|
||||
"\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \
|
||||
format(data_expected[greater], data_me[greater], error[greater])
|
||||
|
||||
|
||||
def test_melscale_fbanks_normal():
|
||||
"""
|
||||
Feature: melscale_fbanks.
|
||||
Description: Test normal operation with NormType.NONE and MelType.HTK.
|
||||
Expectation: The output data is the same as the result of torchaudio.functional.melscale_fbanks.
|
||||
"""
|
||||
expect = np.array([[0.0000, 0.0000, 0.0000, 0.0000],
|
||||
[0.5502, 0.0000, 0.0000, 0.0000],
|
||||
[0.6898, 0.3102, 0.0000, 0.0000],
|
||||
[0.0000, 0.9366, 0.0634, 0.0000],
|
||||
[0.0000, 0.1924, 0.8076, 0.0000],
|
||||
[0.0000, 0.0000, 0.4555, 0.5445],
|
||||
[0.0000, 0.0000, 0.0000, 0.7247],
|
||||
[0.0000, 0.0000, 0.0000, 0.0000]], dtype=np.float64)
|
||||
output = audio.melscale_fbanks(8, 2, 50, 4, 100, audio.NormType.NONE, audio.MelType.HTK)
|
||||
count_unequal_element(expect, output, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_melscale_fbanks_none_slaney():
|
||||
"""
|
||||
Feature: melscale_fbanks.
|
||||
Description: Test normal operation with NormType.NONE and MelType.SLANEY.
|
||||
Expectation: The output data is the same as the result of torchaudio.functional.melscale_fbanks.
|
||||
"""
|
||||
expect = np.array([[0.0000, 0.0000, 0.0000, 0.0000],
|
||||
[0.5357, 0.0000, 0.0000, 0.0000],
|
||||
[0.7202, 0.2798, 0.0000, 0.0000],
|
||||
[0.0000, 0.9762, 0.0238, 0.0000],
|
||||
[0.0000, 0.2321, 0.7679, 0.0000],
|
||||
[0.0000, 0.0000, 0.4881, 0.5119],
|
||||
[0.0000, 0.0000, 0.0000, 0.7440],
|
||||
[0.0000, 0.0000, 0.0000, 0.0000]], dtype=np.float64)
|
||||
output = audio.melscale_fbanks(8, 2, 50, 4, 100, audio.NormType.NONE, audio.MelType.SLANEY)
|
||||
count_unequal_element(expect, output, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_melscale_fbanks_with_slaney_htk():
|
||||
"""
|
||||
Feature: melscale_fbanks.
|
||||
Description: Test normal operation with NormType.SLANEY and MelType.HTK.
|
||||
Expectation: The output data is the same as the result of torchaudio.functional.melscale_fbanks.
|
||||
"""
|
||||
output = audio.melscale_fbanks(10, 0, 50, 5, 100, audio.NormType.SLANEY, audio.MelType.HTK)
|
||||
expect = np.array([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
|
||||
[0.0843, 0.0000, 0.0000, 0.0000, 0.0000],
|
||||
[0.0776, 0.0447, 0.0000, 0.0000, 0.0000],
|
||||
[0.0000, 0.1158, 0.0055, 0.0000, 0.0000],
|
||||
[0.0000, 0.0344, 0.0860, 0.0000, 0.0000],
|
||||
[0.0000, 0.0000, 0.0741, 0.0454, 0.0000],
|
||||
[0.0000, 0.0000, 0.0000, 0.1133, 0.0053],
|
||||
[0.0000, 0.0000, 0.0000, 0.0355, 0.0822],
|
||||
[0.0000, 0.0000, 0.0000, 0.0000, 0.0760],
|
||||
[0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], dtype=np.float64)
|
||||
count_unequal_element(expect, output, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_melscale_fbanks_with_slaney_slaney():
|
||||
"""
|
||||
Feature: melscale_fbanks.
|
||||
Description: Test normal operation with NormType.SLANEY and MelType.SLANEY.
|
||||
Expectation: The output data is the same as the result of torchaudio.functional.melscale_fbanks.
|
||||
"""
|
||||
output = audio.melscale_fbanks(8, 2, 50, 4, 100, audio.NormType.SLANEY, audio.MelType.SLANEY)
|
||||
expect = np.array([[0.0000, 0.0000, 0.0000, 0.0000],
|
||||
[0.0558, 0.0000, 0.0000, 0.0000],
|
||||
[0.0750, 0.0291, 0.0000, 0.0000],
|
||||
[0.0000, 0.1017, 0.0025, 0.0000],
|
||||
[0.0000, 0.0242, 0.0800, 0.0000],
|
||||
[0.0000, 0.0000, 0.0508, 0.0533],
|
||||
[0.0000, 0.0000, 0.0000, 0.0775],
|
||||
[0.0000, 0.0000, 0.0000, 0.0000]], dtype=np.float64)
|
||||
count_unequal_element(expect, output, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_melscale_fbanks_invalid_input():
|
||||
"""
|
||||
Feature: melscale_fbanks.
|
||||
Description: Test operation with invalid input.
|
||||
Expectation: Throw exception as expected.
|
||||
"""
|
||||
|
||||
def test_invalid_input(test_name, n_freqs, f_min, f_max, n_mels, sample_rate, norm, mel_type, error, error_msg):
|
||||
logger.info("Test melscale_fbanks with bad input: {0}".format(test_name))
|
||||
with pytest.raises(error) as error_info:
|
||||
audio.melscale_fbanks(n_freqs, f_min, f_max, n_mels, sample_rate, norm, mel_type)
|
||||
print(error_info)
|
||||
assert error_msg in str(error_info.value)
|
||||
|
||||
test_invalid_input("invalid n_freqs parameter Value", 99999999999, 0, 50, 5, 100, audio.NormType.NONE,
|
||||
audio.MelType.HTK, ValueError, "n_freqs")
|
||||
test_invalid_input("invalid n_freqs parameter type", 10.5, 0, 50, 5, 100, audio.NormType.NONE, audio.MelType.HTK,
|
||||
TypeError, "n_freqs")
|
||||
test_invalid_input("invalid f_min parameter type", 10, None, 50, 5, 100, audio.NormType.NONE, audio.MelType.HTK,
|
||||
TypeError, "f_min")
|
||||
test_invalid_input("invalid f_max parameter type", 10, 0, None, 5, 100, audio.NormType.NONE, audio.MelType.HTK,
|
||||
TypeError, "f_max")
|
||||
test_invalid_input("invalid n_mels parameter type", 10, 0, 50, 10.1, 100, audio.NormType.NONE, audio.MelType.HTK,
|
||||
TypeError, "n_mels")
|
||||
test_invalid_input("invalid n_mels parameter Value", 20, 0, 50, 999999999999, 100, audio.NormType.NONE,
|
||||
audio.MelType.HTK, ValueError, "n_mels")
|
||||
test_invalid_input("invalid sample_rate parameter type", 10, 0, 50, 5, 100.1, audio.NormType.NONE,
|
||||
audio.MelType.HTK, TypeError, "sample_rate")
|
||||
test_invalid_input("invalid sample_rate parameter Value", 20, 0, 50, 5, 999999999999, audio.NormType.NONE,
|
||||
audio.MelType.HTK, ValueError, "sample_rate")
|
||||
test_invalid_input("invalid norm parameter type", 10, 0, 50, 5, 100, None, audio.MelType.HTK,
|
||||
TypeError, "norm")
|
||||
test_invalid_input("invalid norm parameter type", 10, 0, 50, 5, 100, audio.NormType.SLANEY, None,
|
||||
TypeError, "mel_type")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_melscale_fbanks_normal()
|
||||
test_melscale_fbanks_none_slaney()
|
||||
test_melscale_fbanks_with_slaney_htk()
|
||||
test_melscale_fbanks_with_slaney_slaney()
|
||||
test_melscale_fbanks_invalid_input()
|
Loading…
Reference in New Issue