forked from mindspore-Ecosystem/mindspore
[feat][assistant][I3CEGI] add op vol
This commit is contained in:
parent
0fb2337d2e
commit
9e62623b4a
|
@ -37,6 +37,7 @@
|
|||
#include "minddata/dataset/audio/ir/kernels/mu_law_decoding_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/time_masking_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/vol_ir.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
@ -360,6 +361,19 @@ TimeStretch::TimeStretch(float hop_length, int n_freq, float fixed_rate)
|
|||
std::shared_ptr<TensorOperation> TimeStretch::Parse() {
|
||||
return std::make_shared<TimeStretchOperation>(data_->hop_length_, data_->n_freq_, data_->fixed_rate_);
|
||||
}
|
||||
|
||||
// Vol Transform Operation.
|
||||
struct Vol::Data {
|
||||
Data(float gain, GainType gain_type) : gain_(gain), gain_type_(gain_type) {}
|
||||
float gain_;
|
||||
GainType gain_type_;
|
||||
};
|
||||
|
||||
Vol::Vol(float gain, GainType gain_type) : data_(std::make_shared<Data>(gain, gain_type)) {}
|
||||
|
||||
std::shared_ptr<TensorOperation> Vol::Parse() {
|
||||
return std::make_shared<VolOperation>(data_->gain_, data_->gain_type_);
|
||||
}
|
||||
} // namespace audio
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include "minddata/dataset/audio/ir/kernels/mu_law_decoding_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/time_masking_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h"
|
||||
#include "minddata/dataset/audio/ir/kernels/vol_ir.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
@ -292,5 +293,22 @@ PYBIND_REGISTER(
|
|||
}));
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(VolOperation, 1, ([](const py::module *m) {
|
||||
(void)py::class_<audio::VolOperation, TensorOperation, std::shared_ptr<audio::VolOperation>>(
|
||||
*m, "VolOperation")
|
||||
.def(py::init([](float gain, GainType gain_type) {
|
||||
auto vol = std::make_shared<audio::VolOperation>(gain, gain_type);
|
||||
THROW_IF_ERROR(vol->ValidateParams());
|
||||
return vol;
|
||||
}));
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(GainType, 0, ([](const py::module *m) {
|
||||
(void)py::enum_<GainType>(*m, "GainType", py::arithmetic())
|
||||
.value("DE_GAINTYPE_AMPLITUDE", GainType::kAmplitude)
|
||||
.value("DE_GAINTYPE_POWER", GainType::kPower)
|
||||
.value("DE_GAINTYPE_DB", GainType::kDb)
|
||||
.export_values();
|
||||
}));
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -23,5 +23,6 @@ add_library(audio-ir-kernels OBJECT
|
|||
mu_law_decoding_ir.cc
|
||||
time_masking_ir.cc
|
||||
time_stretch_ir.cc
|
||||
vol_ir.cc
|
||||
)
|
||||
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "minddata/dataset/audio/ir/kernels/vol_ir.h"
|
||||
|
||||
#include "minddata/dataset/audio/kernels/vol_op.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
namespace audio {
|
||||
|
||||
// Vol
|
||||
VolOperation::VolOperation(float gain, GainType gain_type) : gain_(gain), gain_type_(gain_type) {}
|
||||
|
||||
VolOperation::~VolOperation() = default;
|
||||
|
||||
std::string VolOperation::Name() const { return kVolOperation; }
|
||||
|
||||
Status VolOperation::ValidateParams() {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
!(gain_type_ == GainType::kPower && gain_ <= 0),
|
||||
"Vol: gain must be greater than 0 when gain_type is Power, but got: " + std::to_string(gain_));
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
!(gain_type_ == GainType::kAmplitude && gain_ < 0),
|
||||
"Vol: gain must be greater than or equal to 0 when gain_type is Amplitude, but got: " + std::to_string(gain_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> VolOperation::Build() {
|
||||
std::shared_ptr<VolOp> tensor_op = std::make_shared<VolOp>(gain_, gain_type_);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
Status VolOperation::to_json(nlohmann::json *out_json) {
|
||||
nlohmann::json args;
|
||||
args["gain"] = gain_;
|
||||
args["gain_type"] = gain_type_;
|
||||
*out_json = args;
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace audio
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,55 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_VOL_IR_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_VOL_IR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "include/api/status.h"
|
||||
#include "minddata/dataset/include/dataset/constants.h"
|
||||
#include "minddata/dataset/kernels/ir/tensor_operation.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
namespace audio {
|
||||
|
||||
constexpr char kVolOperation[] = "Vol";
|
||||
|
||||
class VolOperation : public TensorOperation {
|
||||
public:
|
||||
VolOperation(float gain, GainType gain_type);
|
||||
|
||||
~VolOperation();
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override;
|
||||
|
||||
Status to_json(nlohmann::json *out_json) override;
|
||||
|
||||
private:
|
||||
float gain_;
|
||||
GainType gain_type_;
|
||||
};
|
||||
|
||||
} // namespace audio
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_VOL_IR_H_
|
|
@ -24,5 +24,6 @@ add_library(audio-kernels OBJECT
|
|||
mu_law_decoding_op.cc
|
||||
time_masking_op.cc
|
||||
time_stretch_op.cc
|
||||
vol_op.cc
|
||||
)
|
||||
|
||||
|
|
|
@ -272,11 +272,11 @@ Status LFilter(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *ou
|
|||
}
|
||||
|
||||
/// \brief Stretch STFT in time at a given rate, without changing the pitch.
|
||||
/// \param[in] input - Tensor of shape <..., freq, time>.
|
||||
/// \param[in] rate - Stretch factor.
|
||||
/// \param[in] phase_advance - Expected phase advance in each bin.
|
||||
/// \param[out] output - Tensor after stretch in time domain.
|
||||
/// \return Status return code.
|
||||
/// \param input: Tensor of shape <..., freq, time>.
|
||||
/// \param rate: Stretch factor.
|
||||
/// \param phase_advance: Expected phase advance in each bin.
|
||||
/// \param output: Tensor after stretch in time domain.
|
||||
/// \return Status code.
|
||||
Status TimeStretch(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, float rate, float hop_length,
|
||||
float n_freq);
|
||||
|
||||
|
@ -325,6 +325,46 @@ Status MuLawDecoding(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tenso
|
|||
/// \param[in] fade_shape: Shape of fade.
|
||||
Status Fade(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t fade_in_len,
|
||||
int32_t fade_out_len, FadeShape fade_shape);
|
||||
|
||||
/// \brief Add a volume to an waveform.
|
||||
/// \param input/output: Tensor of shape <..., time>.
|
||||
/// \param gain: Gain value, varies according to the value of gain_type.
|
||||
/// \param gain_type: Type of gain, should be one of [GainType::kAmplitude, GainType::kDb, GainType::kPower].
|
||||
/// \return Status code.
|
||||
template <typename T>
|
||||
Status Vol(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, T gain, GainType gain_type) {
|
||||
const T lower_bound = -1;
|
||||
const T upper_bound = 1;
|
||||
|
||||
// DB is a unit which converts a numeric value into decibel scale and for conversion, we have to use log10
|
||||
// A(in dB) = 20log10(A in amplitude)
|
||||
// When referring to measurements of power quantities, a ratio can be expressed as a level in decibels by evaluating
|
||||
// ten times the base-10 logarithm of the ratio of the measured quantity to reference value
|
||||
// A(in dB) = 10log10(A in power)
|
||||
const int power_factor_div = 20;
|
||||
const int power_factor_mul = 10;
|
||||
const int base = 10;
|
||||
|
||||
if (gain_type == GainType::kDb) {
|
||||
if (gain != 0) {
|
||||
gain = std::pow(base, (gain / power_factor_div));
|
||||
}
|
||||
} else if (gain_type == GainType::kPower) {
|
||||
gain = power_factor_mul * std::log10(gain);
|
||||
gain = std::pow(base, (gain / power_factor_div));
|
||||
}
|
||||
|
||||
for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++) {
|
||||
if (gain != 0 || gain_type == GainType::kAmplitude) {
|
||||
*itr = (*itr) * gain;
|
||||
}
|
||||
*itr = std::min(std::max((*itr), lower_bound), upper_bound);
|
||||
}
|
||||
|
||||
*output = input;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "minddata/dataset/audio/kernels/vol_op.h"
|
||||
|
||||
#include "minddata/dataset/audio/kernels/audio_utils.h"
|
||||
#include "minddata/dataset/kernels/data/data_utils.h"
|
||||
#include "minddata/dataset/util/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
Status VolOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
|
||||
IO_CHECK(input, output);
|
||||
std::shared_ptr<Tensor> input_tensor;
|
||||
TensorShape input_shape = input->shape();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "Vol: input tensor is not in shape of <..., time>.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
input->type().IsNumeric(),
|
||||
"Vol: input tensor type should be int, float or double, but got: " + input->type().ToString());
|
||||
if (input->type() != DataType::DE_FLOAT64) {
|
||||
RETURN_IF_NOT_OK(TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32)));
|
||||
return Vol(input_tensor, output, gain_, gain_type_);
|
||||
} else {
|
||||
input_tensor = input;
|
||||
return Vol(input_tensor, output, static_cast<double>(gain_), gain_type_);
|
||||
}
|
||||
}
|
||||
|
||||
Status VolOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) {
|
||||
RETURN_IF_NOT_OK(TensorOp::OutputType(inputs, outputs));
|
||||
if (!inputs[0].IsNumeric()) {
|
||||
RETURN_STATUS_UNEXPECTED("Vol: input tensor type should be int, float or double, but got: " + inputs[0].ToString());
|
||||
} else if (inputs[0] == DataType(DataType::DE_FLOAT64)) {
|
||||
outputs[0] = DataType(DataType::DE_FLOAT64);
|
||||
} else {
|
||||
outputs[0] = DataType(DataType::DE_FLOAT32);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_VOL_OP_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_VOL_OP_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "minddata/dataset/include/dataset/constants.h"
|
||||
#include "minddata/dataset/kernels/tensor_op.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
class VolOp : public TensorOp {
|
||||
public:
|
||||
explicit VolOp(float gain, GainType gain_type = GainType::kAmplitude) : gain_(gain), gain_type_(gain_type) {}
|
||||
|
||||
~VolOp() override = default;
|
||||
|
||||
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
|
||||
|
||||
std::string Name() const override { return kVolOp; }
|
||||
|
||||
Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
|
||||
|
||||
private:
|
||||
float gain_;
|
||||
GainType gain_type_;
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_VOL_OP_H_
|
|
@ -515,6 +515,29 @@ class TimeStretch final : public TensorTransform {
|
|||
std::shared_ptr<Data> data_;
|
||||
};
|
||||
|
||||
/// \brief Vol TensorTransform.
|
||||
/// \notes Add a volume to an waveform.
|
||||
class Vol final : public TensorTransform {
|
||||
public:
|
||||
/// \brief Constructor.
|
||||
/// \param[in] gain Gain value, varies according to the value of gain_type. If gain_type is GainType::kAmplitude,
|
||||
/// gain must be greater than or equal to zero. If gain_type is GainType::kPower, gain must be greater than zero.
|
||||
/// If gain_type is GainType::kDb, there is no limit for gain.
|
||||
/// \param[in] gain_type Type of gain, should be one of [GainType::kAmplitude, GainType::kDb, GainType::kPower].
|
||||
explicit Vol(float gain, GainType gain_type = GainType::kAmplitude);
|
||||
|
||||
/// \brief Destructor.
|
||||
~Vol() = default;
|
||||
|
||||
protected:
|
||||
/// \brief Function to convert TensorTransform object into a TensorOperation object.
|
||||
/// \return Shared pointer to TensorOperation object.
|
||||
std::shared_ptr<TensorOperation> Parse() override;
|
||||
|
||||
private:
|
||||
struct Data;
|
||||
std::shared_ptr<Data> data_;
|
||||
};
|
||||
} // namespace audio
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -79,6 +79,13 @@ enum class ScaleType {
|
|||
kPower = 1, ///< Audio scale is power.
|
||||
};
|
||||
|
||||
/// \brief The scale for gain type.
|
||||
enum class GainType {
|
||||
kAmplitude = 0, ///< Audio gain type is amplitude.
|
||||
kPower = 1, ///< Audio gain type is power.
|
||||
kDb = 2, ///< Audio gain type is db.
|
||||
};
|
||||
|
||||
/// \brief The method of padding.
|
||||
enum class BorderType {
|
||||
kConstant = 0, ///< Fill the border with constant values.
|
||||
|
|
|
@ -160,6 +160,7 @@ constexpr char kLowpassBiquadOp[] = "LowpassBiquadOp";
|
|||
constexpr char kMuLawDecodingOp[] = "MuLawDecodingOp";
|
||||
constexpr char kTimeMaskingOp[] = "TimeMaskingOp";
|
||||
constexpr char kTimeStretchOp[] = "TimeStretchOp";
|
||||
constexpr char kVolOp[] = "VolOp";
|
||||
|
||||
// data
|
||||
constexpr char kConcatenateOp[] = "ConcatenateOp";
|
||||
|
|
|
@ -23,11 +23,11 @@ import numpy as np
|
|||
|
||||
import mindspore._c_dataengine as cde
|
||||
from ..transforms.c_transforms import TensorOperation
|
||||
from .utils import FadeShape, ScaleType
|
||||
from .utils import FadeShape, GainType, ScaleType
|
||||
from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \
|
||||
check_bandreject_biquad, check_bass_biquad, check_biquad, check_complex_norm, check_contrast, check_dc_shift, \
|
||||
check_deemph_biquad, check_equalizer_biquad, check_fade, check_highpass_biquad, check_lfilter, \
|
||||
check_lowpass_biquad, check_masking, check_mu_law_decoding, check_time_stretch
|
||||
check_lowpass_biquad, check_masking, check_mu_law_decoding, check_time_stretch, check_vol
|
||||
|
||||
|
||||
class AudioTensorOperation(TensorOperation):
|
||||
|
@ -644,15 +644,12 @@ class TimeStretch(AudioTensorOperation):
|
|||
fixed_rate (float, optional): Rate to speed up or slow down the input in time (default=None).
|
||||
|
||||
Examples:
|
||||
>>> freq = 44100
|
||||
>>> num_frame = 30
|
||||
>>> def gen():
|
||||
... np.random.seed(0)
|
||||
... data = np.random.random([freq, num_frame])
|
||||
... yield (np.array(data, dtype=np.float32), )
|
||||
>>> data1 = ds.GeneratorDataset(source=gen, column_names=["multi_dimensional_data"])
|
||||
>>> transforms = [py_audio.TimeStretch()]
|
||||
>>> data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
>>> import numpy as np
|
||||
>>>
|
||||
>>> waveform = np.random.random([1, 30])
|
||||
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
||||
>>> transforms = [audio.TimeStretch()]
|
||||
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
||||
"""
|
||||
@check_time_stretch
|
||||
def __init__(self, hop_length=None, n_freq=201, fixed_rate=None):
|
||||
|
@ -665,3 +662,38 @@ class TimeStretch(AudioTensorOperation):
|
|||
|
||||
def parse(self):
|
||||
return cde.TimeStretchOperation(self.hop_length, self.n_freq, self.fixed_rate)
|
||||
|
||||
|
||||
DE_C_GAINTYPE_TYPE = {GainType.AMPLITUDE: cde.GainType.DE_GAINTYPE_AMPLITUDE,
|
||||
GainType.POWER: cde.GainType.DE_GAINTYPE_POWER,
|
||||
GainType.DB: cde.GainType.DE_GAINTYPE_DB}
|
||||
|
||||
|
||||
class Vol(AudioTensorOperation):
|
||||
"""
|
||||
Apply amplification or attenuation to the whole waveform.
|
||||
|
||||
Args:
|
||||
gain (float): Value of gain adjustment.
|
||||
If gain_type = amplitude, gain stands for nonnegative amplitude ratio.
|
||||
If gain_type = power, gain stands for power.
|
||||
If gain_type = db, gain stands for decibels.
|
||||
gain_type (ScaleType, optional): Type of gain, contains the following three enumeration values
|
||||
GainType.AMPLITUDE, GainType.POWER and GainType.DB (default=GainType.AMPLITUDE).
|
||||
|
||||
Examples:
|
||||
>>> import numpy as np
|
||||
>>>
|
||||
>>> waveform = np.random.random([20, 30])
|
||||
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data=waveform, column_names=["audio"])
|
||||
>>> transforms = [audio.Vol(gain=10, gain_type=GainType.DB)]
|
||||
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
|
||||
"""
|
||||
|
||||
@check_vol
|
||||
def __init__(self, gain, gain_type=GainType.AMPLITUDE):
|
||||
self.gain = gain
|
||||
self.gain_type = gain_type
|
||||
|
||||
def parse(self):
|
||||
return cde.VolOperation(self.gain, DE_C_GAINTYPE_TYPE[self.gain_type])
|
||||
|
|
|
@ -19,12 +19,6 @@ enum for audio ops
|
|||
from enum import Enum
|
||||
|
||||
|
||||
class ScaleType(str, Enum):
|
||||
"""Scale Type"""
|
||||
POWER: str = "power"
|
||||
MAGNITUDE: str = "magnitude"
|
||||
|
||||
|
||||
class FadeShape(str, Enum):
|
||||
"""Fade Shape"""
|
||||
LINEAR: str = "linear"
|
||||
|
@ -32,3 +26,16 @@ class FadeShape(str, Enum):
|
|||
LOGARITHMIC: str = "logarithmic"
|
||||
QUARTERSINE: str = "quarter_sine"
|
||||
HALFSINE: str = "half_sine"
|
||||
|
||||
|
||||
class GainType(str, Enum):
|
||||
"""Gain Type"""
|
||||
POWER: str = "power"
|
||||
AMPLITUDE: str = "amplitude"
|
||||
DB: str = "db"
|
||||
|
||||
|
||||
class ScaleType(str, Enum):
|
||||
"""Scale Type"""
|
||||
POWER: str = "power"
|
||||
MAGNITUDE: str = "magnitude"
|
||||
|
|
|
@ -21,7 +21,7 @@ from functools import wraps
|
|||
from mindspore.dataset.core.validator_helpers import check_float32, check_float32_not_zero, check_int32_not_zero, \
|
||||
check_list_same_size, check_non_negative_float32, check_non_negative_int32, check_pos_float32, check_pos_int32, \
|
||||
check_value, parse_user_args, type_check
|
||||
from .utils import FadeShape, ScaleType
|
||||
from .utils import FadeShape, GainType, ScaleType
|
||||
|
||||
|
||||
def check_amplitude_to_db(method):
|
||||
|
@ -384,3 +384,24 @@ def check_fade(method):
|
|||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
||||
|
||||
def check_vol(method):
|
||||
"""Wrapper method to check the parameters of Vol."""
|
||||
|
||||
@wraps(method)
|
||||
def new_method(self, *args, **kwargs):
|
||||
[gain, gain_type], _ = parse_user_args(method, *args, **kwargs)
|
||||
# type check gain
|
||||
type_check(gain, (int, float), "gain")
|
||||
# type check gain_type and value check gain
|
||||
type_check(gain_type, (GainType,), "gain_type")
|
||||
if gain_type == GainType.AMPLITUDE:
|
||||
check_non_negative_float32(gain, "gain")
|
||||
elif gain_type == GainType.POWER:
|
||||
check_pos_float32(gain, "gain")
|
||||
else:
|
||||
check_float32(gain, "gain")
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
return new_method
|
||||
|
|
|
@ -44,7 +44,7 @@ TEST_F(MindDataTestPipeline, TestTimeMaskingPipeline) {
|
|||
ds = ds->Map({timemasking});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Filtered waveform by bandbiquad
|
||||
// mask waveform
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
|
@ -83,7 +83,6 @@ TEST_F(MindDataTestPipeline, TestTimeMaskingWrongArgs) {
|
|||
ds = ds->Map({timemasking});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Filtered waveform by bandbiquad
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
// Expect failure
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
|
@ -156,3 +155,62 @@ TEST_F(MindDataTestPipeline, TestTimeStretchPipelineWrongArgs) {
|
|||
// Expect failure
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestVolPipeline) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVolPipeline.";
|
||||
// Original waveform
|
||||
std::shared_ptr<SchemaObj> schema = Schema();
|
||||
ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
|
||||
std::shared_ptr<Dataset> ds = RandomData(50, schema);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
ds = ds->SetNumWorkers(4);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
auto vol = audio::Vol(0.3);
|
||||
|
||||
ds = ds->Map({vol});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
std::unordered_map<std::string, mindspore::MSTensor> row;
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
|
||||
std::vector<int64_t> expected = {2, 200};
|
||||
|
||||
int i = 0;
|
||||
while (row.size() != 0) {
|
||||
auto col = row["inputData"];
|
||||
ASSERT_EQ(col.Shape(), expected);
|
||||
ASSERT_EQ(col.Shape().size(), 2);
|
||||
ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
|
||||
ASSERT_OK(iter->GetNextRow(&row));
|
||||
i++;
|
||||
}
|
||||
EXPECT_EQ(i, 50);
|
||||
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestVolWrongArgs) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVolWrongArgs.";
|
||||
// Original waveform
|
||||
std::shared_ptr<SchemaObj> schema = Schema();
|
||||
ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
|
||||
std::shared_ptr<Dataset> ds = RandomData(50, schema);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
ds = ds->SetNumWorkers(4);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
auto vol_op = audio::Vol(-1.5, GainType::kPower);
|
||||
|
||||
ds = ds->Map({vol_op});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
// Expect failure
|
||||
EXPECT_EQ(iter, nullptr);
|
||||
}
|
||||
|
|
|
@ -1056,4 +1056,29 @@ TEST_F(MindDataTestExecute, TestFadeWithInvalidArg) {
|
|||
mindspore::dataset::Execute Transform04({fade4});
|
||||
Status s04 = Transform04(input_04, &input_04);
|
||||
EXPECT_FALSE(s04.IsOk());
|
||||
}
|
||||
}
|
||||
TEST_F(MindDataTestExecute, TestVolDefalutValue) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestExecute-TestVolDefalutValue.";
|
||||
std::shared_ptr<Tensor> input_tensor_;
|
||||
TensorShape s = TensorShape({2, 6});
|
||||
ASSERT_OK(Tensor::CreateFromVector(
|
||||
std::vector<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}), s, &input_tensor_));
|
||||
auto input_tensor = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input_tensor_));
|
||||
std::shared_ptr<TensorTransform> vol_op = std::make_shared<audio::Vol>(0.333);
|
||||
mindspore::dataset::Execute transform({vol_op});
|
||||
Status status = transform(input_tensor, &input_tensor);
|
||||
EXPECT_TRUE(status.IsOk());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestExecute, TestVolGainTypePower) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestExecute-TestVolGainTypePower.";
|
||||
std::shared_ptr<Tensor> input_tensor_;
|
||||
TensorShape s = TensorShape({4, 3});
|
||||
ASSERT_OK(Tensor::CreateFromVector(
|
||||
std::vector<double>({4.0f, 5.0f, 3.0f, 5.0f, 4.0f, 6.0f, 6.0f, 1.0f, 2.0f, 3.0f, 2.0f, 1.0f}), s, &input_tensor_));
|
||||
auto input_tensor = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input_tensor_));
|
||||
std::shared_ptr<TensorTransform> vol_op = std::make_shared<audio::Vol>(0.2, GainType::kPower);
|
||||
mindspore::dataset::Execute transform({vol_op});
|
||||
Status status = transform(input_tensor, &input_tensor);
|
||||
EXPECT_TRUE(status.IsOk());
|
||||
}
|
||||
|
|
|
@ -0,0 +1,160 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Testing Vol op in DE
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.dataset.audio.transforms as c_audio
|
||||
from mindspore import log as logger
|
||||
from mindspore.dataset.audio import utils
|
||||
|
||||
|
||||
def count_unequal_element(data_expected, data_me, rtol, atol):
|
||||
""" Precision calculation func """
|
||||
assert data_expected.shape == data_me.shape
|
||||
total_count = len(data_expected.flatten())
|
||||
error = np.abs(data_expected - data_me)
|
||||
greater = np.greater(error, atol + np.abs(data_expected) * rtol)
|
||||
loss_count = np.count_nonzero(greater)
|
||||
assert (loss_count / total_count) < rtol, \
|
||||
"\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \
|
||||
format(data_expected[greater], data_me[greater], error[greater])
|
||||
|
||||
|
||||
def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True):
|
||||
""" Precision calculation formula """
|
||||
if np.any(np.isnan(data_expected)):
|
||||
assert np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan)
|
||||
elif not np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan):
|
||||
count_unequal_element(data_expected, data_me, rtol, atol)
|
||||
|
||||
|
||||
def test_func_vol_eager():
|
||||
""" mindspore eager mode normal testcase:vol op"""
|
||||
|
||||
logger.info("check vol op output")
|
||||
ndarr_in = np.array([[0.3667, 0.5295, 0.2949, 0.4508, 0.6457, 0.3625, 0.4377, 0.3568],
|
||||
[0.6488, 0.6525, 0.5140, 0.6725, 0.9261, 0.0609, 0.3910, 0.4608],
|
||||
[0.0454, 0.0487, 0.6990, 0.1637, 0.5763, 0.1086, 0.5343, 0.4699],
|
||||
[0.9993, 0.0776, 0.3498, 0.0429, 0.1588, 0.3061, 0.1166, 0.3716],
|
||||
[0.7625, 0.2410, 0.8888, 0.5027, 0.0913, 0.2520, 0.5625, 0.9873]]).astype(np.float32)
|
||||
# cal from benchmark
|
||||
out_expect = np.array([[0.0733, 0.1059, 0.0590, 0.0902, 0.1291, 0.0725, 0.0875, 0.0714],
|
||||
[0.1298, 0.1305, 0.1028, 0.1345, 0.1852, 0.0122, 0.0782, 0.0922],
|
||||
[0.0091, 0.0097, 0.1398, 0.0327, 0.1153, 0.0217, 0.1069, 0.0940],
|
||||
[0.1999, 0.0155, 0.0700, 0.0086, 0.0318, 0.0612, 0.0233, 0.0743],
|
||||
[0.1525, 0.0482, 0.1778, 0.1005, 0.0183, 0.0504, 0.1125, 0.1975]])
|
||||
op = c_audio.Vol(gain=0.2, gain_type=utils.GainType.AMPLITUDE)
|
||||
out_mindspore = op(ndarr_in)
|
||||
allclose_nparray(out_mindspore, out_expect, 0.0001, 0.0001)
|
||||
|
||||
ndarr_in = np.array([[[-0.5794799327850342, 0.19526369869709015],
|
||||
[-0.5935744047164917, 0.2948109209537506],
|
||||
[-0.42077431082725525, 0.04923877865076065]],
|
||||
[[0.5497273802757263, -0.22815021872520447],
|
||||
[-0.05891447141766548, -0.16206198930740356],
|
||||
[-1.4782767295837402, -1.3815662860870361]]]).astype(np.float32)
|
||||
# cal from benchmark
|
||||
out_expect = np.array([[[-0.5761537551879883, 0.1941428929567337],
|
||||
[-0.5901673436164856, 0.2931187152862549],
|
||||
[-0.41835910081863403, 0.04895615205168724]],
|
||||
[[0.5465719699859619, -0.22684065997600555],
|
||||
[-0.0585763081908226, -0.16113176941871643],
|
||||
[-1.0, -1.0]]])
|
||||
op = c_audio.Vol(gain=-0.05, gain_type=utils.GainType.DB)
|
||||
out_mindspore = op(ndarr_in)
|
||||
allclose_nparray(out_mindspore, out_expect, 0.0001, 0.0001)
|
||||
|
||||
ndarr_in = np.array([[[0.09491927176713943, 0.11639882624149323, -0.1725238710641861, -0.18556903302669525],
|
||||
[-0.7140364646911621, 1.6223102807998657, 1.6710518598556519, 0.6019048094749451]],
|
||||
[[-0.8635917901992798, -0.31538113951683044, -0.2209240198135376, 1.3067045211791992],
|
||||
[-2.0922982692718506, 0.6822009682655334, 0.20066820085048676, 0.006392406765371561]]])
|
||||
# cal from benchmark
|
||||
out_expect = np.array([[[0.042449187487363815, 0.05205513536930084, -0.07715501636266708, -0.08298899233341217],
|
||||
[-0.31932681798934937, 0.7255191802978516, 0.7473170757293701, 0.2691799998283386]],
|
||||
[[-0.38620999455451965, -0.14104272425174713, -0.09880022704601288, 0.5843760371208191],
|
||||
[-0.935704231262207, 0.30508953332901, 0.0897415429353714, 0.0028587712440639734]]])
|
||||
op = c_audio.Vol(gain=0.2, gain_type=utils.GainType.POWER)
|
||||
out_mindspore = op(ndarr_in)
|
||||
allclose_nparray(out_mindspore, out_expect, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_func_vol_pipeline():
|
||||
""" mindspore pipeline mode normal testcase:vol op"""
|
||||
|
||||
logger.info("test vol op with gain_type='power'")
|
||||
data = np.array([[[0.7012, 0.2500, 0.0108],
|
||||
[0.3617, 0.6367, 0.6096]]]).astype(np.float32)
|
||||
out_expect = np.array([[1.0000, 0.7906, 0.0342],
|
||||
[1.0000, 1.0000, 1.0000]])
|
||||
data1 = ds.NumpySlicesDataset(data, column_names=["multi_dimensional_data"])
|
||||
transforms = [c_audio.Vol(gain=10, gain_type=utils.GainType.POWER)]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
out_put = item["multi_dimensional_data"]
|
||||
allclose_nparray(out_put, out_expect, 0.0001, 0.0001)
|
||||
|
||||
logger.info("test vol op with gain_type='amplitude' and datatype='float64'")
|
||||
data = np.array([[[0.9342139979247938, 0.613965955965896, 0.5356328030249583, 0.589909976354571],
|
||||
[0.7301220295167696, 0.31194499547960186, 0.3982210622160919, 0.20984374897512215],
|
||||
[0.18619300588033616, 0.9443723899839336, 0.7395507950492876, 0.4904588086175671]]])
|
||||
data = data.astype(np.float64)
|
||||
out_expect = np.array([[0.18684279918670654, 0.12279318571090699, 0.10712655782699586, 0.1179819941520691],
|
||||
[0.1460244059562683, 0.062388998270034794, 0.07964421510696412, 0.04196875095367432],
|
||||
[0.03723860085010529, 0.1888744831085205, 0.14791015386581421, 0.09809176325798036]])
|
||||
data1 = ds.NumpySlicesDataset(data, column_names=["multi_dimensional_data"])
|
||||
transforms = [c_audio.Vol(gain=0.2, gain_type=utils.GainType.AMPLITUDE)]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
out_put = item["multi_dimensional_data"]
|
||||
allclose_nparray(out_put, out_expect, 0.0001, 0.0001)
|
||||
|
||||
logger.info("test vol op with gain_type='db'")
|
||||
data = np.array([[[0.1302, 0.5908, 0.1225, 0.7044],
|
||||
[0.6405, 0.6540, 0.9908, 0.8605],
|
||||
[0.7023, 0.0115, 0.8790, 0.5806]]]).astype(np.float32)
|
||||
out_expect = np.array([[0.1096, 0.4971, 0.1031, 0.5927],
|
||||
[0.5389, 0.5503, 0.8336, 0.7240],
|
||||
[0.5909, 0.0097, 0.7396, 0.4885]])
|
||||
data1 = ds.NumpySlicesDataset(data, column_names=["multi_dimensional_data"])
|
||||
transforms = [c_audio.Vol(gain=-1.5, gain_type=utils.GainType.DB)]
|
||||
data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
|
||||
for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
|
||||
out_put = item["multi_dimensional_data"]
|
||||
allclose_nparray(out_put, out_expect, 0.0001, 0.0001)
|
||||
|
||||
|
||||
def test_vol_invalid_input():
|
||||
def test_invalid_input(test_name, gain, gain_type, error, error_msg):
|
||||
logger.info("Test Vol with invalid input: {0}".format(test_name))
|
||||
with pytest.raises(error) as error_info:
|
||||
c_audio.Vol(gain, gain_type)
|
||||
assert error_msg in str(error_info.value)
|
||||
|
||||
test_invalid_input("invalid gain value when gain_type equals 'power'", -1.5, utils.GainType.POWER, ValueError,
|
||||
"Input gain is not within the required interval of (0, 16777216].")
|
||||
test_invalid_input("invalid gain value when gain_type equals 'amplitude'", -1.5, utils.GainType.AMPLITUDE,
|
||||
ValueError, "Input gain is not within the required interval of [0, 16777216].")
|
||||
test_invalid_input("invalid gain value when gain_type equals 'amplitude'", 1.5, "TEST", TypeError,
|
||||
"Argument gain_type with value TEST is not of type [<enum 'GainType'>], but got <class 'str'>.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_func_vol_eager()
|
||||
test_func_vol_pipeline()
|
||||
test_vol_invalid_input()
|
Loading…
Reference in New Issue